@nxtedition/rocksdb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1088) hide show
  1. package/CHANGELOG.md +294 -0
  2. package/LICENSE +21 -0
  3. package/README.md +102 -0
  4. package/UPGRADING.md +91 -0
  5. package/binding.cc +1276 -0
  6. package/binding.gyp +73 -0
  7. package/binding.js +1 -0
  8. package/chained-batch.js +44 -0
  9. package/deps/rocksdb/build_version.cc +4 -0
  10. package/deps/rocksdb/rocksdb/CMakeLists.txt +1356 -0
  11. package/deps/rocksdb/rocksdb/COPYING +339 -0
  12. package/deps/rocksdb/rocksdb/LICENSE.Apache +202 -0
  13. package/deps/rocksdb/rocksdb/LICENSE.leveldb +29 -0
  14. package/deps/rocksdb/rocksdb/Makefile +2521 -0
  15. package/deps/rocksdb/rocksdb/TARGETS +2100 -0
  16. package/deps/rocksdb/rocksdb/cache/cache.cc +63 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +381 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +114 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_test.cc +775 -0
  20. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +769 -0
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.h +16 -0
  22. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +574 -0
  23. package/deps/rocksdb/rocksdb/cache/lru_cache.h +339 -0
  24. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +199 -0
  25. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +162 -0
  26. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +111 -0
  27. package/deps/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in +54 -0
  28. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  29. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  30. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  37. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +108 -0
  38. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +115 -0
  39. package/deps/rocksdb/rocksdb/db/blob/blob_constants.h +16 -0
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +154 -0
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.h +67 -0
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +206 -0
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +316 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +91 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +660 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +99 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +49 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +268 -0
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.cc +134 -0
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.h +57 -0
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +173 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +55 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +164 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +423 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +81 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +771 -0
  57. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +184 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +145 -0
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +148 -0
  60. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +132 -0
  61. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +76 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +168 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +83 -0
  64. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +307 -0
  65. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +464 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +358 -0
  67. package/deps/rocksdb/rocksdb/db/builder.h +95 -0
  68. package/deps/rocksdb/rocksdb/db/c.cc +5281 -0
  69. package/deps/rocksdb/rocksdb/db/c_test.c +2883 -0
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +1602 -0
  71. package/deps/rocksdb/rocksdb/db/column_family.h +787 -0
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +3427 -0
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +425 -0
  74. package/deps/rocksdb/rocksdb/db/compacted_db_impl.cc +169 -0
  75. package/deps/rocksdb/rocksdb/db/compacted_db_impl.h +118 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +591 -0
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +389 -0
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +37 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +1023 -0
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +353 -0
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1254 -0
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1917 -0
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +208 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +1037 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1224 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1135 -0
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +318 -0
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +255 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +57 -0
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +510 -0
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +33 -0
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2190 -0
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1103 -0
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +32 -0
  95. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +44 -0
  96. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +660 -0
  97. package/deps/rocksdb/rocksdb/db/convenience.cc +78 -0
  98. package/deps/rocksdb/rocksdb/db/corruption_test.cc +921 -0
  99. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +359 -0
  100. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3820 -0
  101. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1058 -0
  102. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2128 -0
  103. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +851 -0
  104. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +6292 -0
  105. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +509 -0
  106. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +130 -0
  107. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +137 -0
  108. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1119 -0
  109. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5057 -0
  110. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +2274 -0
  111. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3421 -0
  112. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +298 -0
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +151 -0
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +967 -0
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1806 -0
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +270 -0
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +146 -0
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +683 -0
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +333 -0
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2024 -0
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc +932 -0
  122. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +137 -0
  123. package/deps/rocksdb/rocksdb/db/db_info_dumper.h +15 -0
  124. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +178 -0
  125. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +592 -0
  126. package/deps/rocksdb/rocksdb/db/db_iter.cc +1493 -0
  127. package/deps/rocksdb/rocksdb/db/db_iter.h +390 -0
  128. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +657 -0
  129. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +3268 -0
  130. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +3197 -0
  131. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +299 -0
  132. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +513 -0
  133. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +329 -0
  134. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +241 -0
  135. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +671 -0
  136. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1022 -0
  137. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1723 -0
  138. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1694 -0
  139. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1261 -0
  140. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +164 -0
  141. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +488 -0
  142. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +567 -0
  143. package/deps/rocksdb/rocksdb/db/db_test.cc +6736 -0
  144. package/deps/rocksdb/rocksdb/db/db_test2.cc +5408 -0
  145. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1633 -0
  146. package/deps/rocksdb/rocksdb/db/db_test_util.h +1194 -0
  147. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +2235 -0
  148. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +1780 -0
  149. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2520 -0
  150. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +119 -0
  151. package/deps/rocksdb/rocksdb/db/db_write_test.cc +465 -0
  152. package/deps/rocksdb/rocksdb/db/dbformat.cc +222 -0
  153. package/deps/rocksdb/rocksdb/db/dbformat.h +786 -0
  154. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +206 -0
  155. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +580 -0
  156. package/deps/rocksdb/rocksdb/db/error_handler.cc +726 -0
  157. package/deps/rocksdb/rocksdb/db/error_handler.h +117 -0
  158. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +2598 -0
  159. package/deps/rocksdb/rocksdb/db/event_helpers.cc +233 -0
  160. package/deps/rocksdb/rocksdb/db/event_helpers.h +57 -0
  161. package/deps/rocksdb/rocksdb/db/experimental.cc +50 -0
  162. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1559 -0
  163. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +910 -0
  164. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +195 -0
  165. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2936 -0
  166. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +556 -0
  167. package/deps/rocksdb/rocksdb/db/file_indexer.cc +216 -0
  168. package/deps/rocksdb/rocksdb/db/file_indexer.h +142 -0
  169. package/deps/rocksdb/rocksdb/db/file_indexer_test.cc +350 -0
  170. package/deps/rocksdb/rocksdb/db/filename_test.cc +179 -0
  171. package/deps/rocksdb/rocksdb/db/flush_job.cc +514 -0
  172. package/deps/rocksdb/rocksdb/db/flush_job.h +169 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +647 -0
  174. package/deps/rocksdb/rocksdb/db/flush_scheduler.cc +86 -0
  175. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +54 -0
  176. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1023 -0
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.h +163 -0
  178. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +377 -0
  179. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +282 -0
  180. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +75 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +632 -0
  182. package/deps/rocksdb/rocksdb/db/internal_stats.cc +1461 -0
  183. package/deps/rocksdb/rocksdb/db/internal_stats.h +712 -0
  184. package/deps/rocksdb/rocksdb/db/job_context.h +226 -0
  185. package/deps/rocksdb/rocksdb/db/listener_test.cc +1118 -0
  186. package/deps/rocksdb/rocksdb/db/log_format.h +48 -0
  187. package/deps/rocksdb/rocksdb/db/log_reader.cc +654 -0
  188. package/deps/rocksdb/rocksdb/db/log_reader.h +192 -0
  189. package/deps/rocksdb/rocksdb/db/log_test.cc +901 -0
  190. package/deps/rocksdb/rocksdb/db/log_writer.cc +164 -0
  191. package/deps/rocksdb/rocksdb/db/log_writer.h +115 -0
  192. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.cc +67 -0
  193. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +63 -0
  194. package/deps/rocksdb/rocksdb/db/lookup_key.h +66 -0
  195. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +54 -0
  196. package/deps/rocksdb/rocksdb/db/malloc_stats.h +24 -0
  197. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +296 -0
  198. package/deps/rocksdb/rocksdb/db/memtable.cc +1169 -0
  199. package/deps/rocksdb/rocksdb/db/memtable.h +554 -0
  200. package/deps/rocksdb/rocksdb/db/memtable_list.cc +888 -0
  201. package/deps/rocksdb/rocksdb/db/memtable_list.h +438 -0
  202. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +935 -0
  203. package/deps/rocksdb/rocksdb/db/merge_context.h +134 -0
  204. package/deps/rocksdb/rocksdb/db/merge_helper.cc +421 -0
  205. package/deps/rocksdb/rocksdb/db/merge_helper.h +197 -0
  206. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +290 -0
  207. package/deps/rocksdb/rocksdb/db/merge_operator.cc +86 -0
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +608 -0
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +338 -0
  210. package/deps/rocksdb/rocksdb/db/options_file_test.cc +119 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.cc +30 -0
  212. package/deps/rocksdb/rocksdb/db/output_validator.h +47 -0
  213. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +993 -0
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +113 -0
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +76 -0
  216. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +231 -0
  217. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +87 -0
  218. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1374 -0
  219. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +38 -0
  220. package/deps/rocksdb/rocksdb/db/prefix_test.cc +910 -0
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +489 -0
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +446 -0
  223. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +260 -0
  224. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +709 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +439 -0
  226. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +256 -0
  227. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +552 -0
  228. package/deps/rocksdb/rocksdb/db/read_callback.h +53 -0
  229. package/deps/rocksdb/rocksdb/db/repair.cc +722 -0
  230. package/deps/rocksdb/rocksdb/db/repair_test.cc +390 -0
  231. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +61 -0
  232. package/deps/rocksdb/rocksdb/db/snapshot_impl.cc +26 -0
  233. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +167 -0
  234. package/deps/rocksdb/rocksdb/db/table_cache.cc +704 -0
  235. package/deps/rocksdb/rocksdb/db/table_cache.h +233 -0
  236. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +75 -0
  237. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +107 -0
  238. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +517 -0
  239. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +318 -0
  240. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +128 -0
  241. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.cc +54 -0
  242. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.h +44 -0
  243. package/deps/rocksdb/rocksdb/db/version_builder.cc +1078 -0
  244. package/deps/rocksdb/rocksdb/db/version_builder.h +69 -0
  245. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1551 -0
  246. package/deps/rocksdb/rocksdb/db/version_edit.cc +955 -0
  247. package/deps/rocksdb/rocksdb/db/version_edit.h +609 -0
  248. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +699 -0
  249. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +252 -0
  250. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +597 -0
  251. package/deps/rocksdb/rocksdb/db/version_set.cc +6333 -0
  252. package/deps/rocksdb/rocksdb/db/version_set.h +1485 -0
  253. package/deps/rocksdb/rocksdb/db/version_set_test.cc +3035 -0
  254. package/deps/rocksdb/rocksdb/db/wal_edit.cc +204 -0
  255. package/deps/rocksdb/rocksdb/db/wal_edit.h +166 -0
  256. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +214 -0
  257. package/deps/rocksdb/rocksdb/db/wal_manager.cc +517 -0
  258. package/deps/rocksdb/rocksdb/db/wal_manager.h +119 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +340 -0
  260. package/deps/rocksdb/rocksdb/db/write_batch.cc +2174 -0
  261. package/deps/rocksdb/rocksdb/db/write_batch_base.cc +94 -0
  262. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +250 -0
  263. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +907 -0
  264. package/deps/rocksdb/rocksdb/db/write_callback.h +27 -0
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +457 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +128 -0
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +144 -0
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +135 -0
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +796 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +433 -0
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +14 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +341 -0
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +520 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +23 -0
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +337 -0
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +554 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +79 -0
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +173 -0
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +17 -0
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +38 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +763 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +222 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc +27 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +428 -0
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +218 -0
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +64 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2430 -0
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +237 -0
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +343 -0
  290. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +800 -0
  291. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +920 -0
  292. package/deps/rocksdb/rocksdb/env/env.cc +733 -0
  293. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +352 -0
  294. package/deps/rocksdb/rocksdb/env/env_chroot.cc +346 -0
  295. package/deps/rocksdb/rocksdb/env/env_chroot.h +22 -0
  296. package/deps/rocksdb/rocksdb/env/env_encryption.cc +1148 -0
  297. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +137 -0
  298. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +648 -0
  299. package/deps/rocksdb/rocksdb/env/env_posix.cc +514 -0
  300. package/deps/rocksdb/rocksdb/env/env_test.cc +2230 -0
  301. package/deps/rocksdb/rocksdb/env/file_system.cc +132 -0
  302. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +448 -0
  303. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +415 -0
  304. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1086 -0
  305. package/deps/rocksdb/rocksdb/env/io_posix.cc +1499 -0
  306. package/deps/rocksdb/rocksdb/env/io_posix.h +402 -0
  307. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +140 -0
  308. package/deps/rocksdb/rocksdb/env/mock_env.cc +1066 -0
  309. package/deps/rocksdb/rocksdb/env/mock_env.h +41 -0
  310. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +85 -0
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +402 -0
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +150 -0
  313. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +717 -0
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +156 -0
  315. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +99 -0
  316. package/deps/rocksdb/rocksdb/file/file_util.cc +268 -0
  317. package/deps/rocksdb/rocksdb/file/file_util.h +96 -0
  318. package/deps/rocksdb/rocksdb/file/filename.cc +473 -0
  319. package/deps/rocksdb/rocksdb/file/filename.h +182 -0
  320. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +188 -0
  321. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +315 -0
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +142 -0
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +482 -0
  324. package/deps/rocksdb/rocksdb/file/read_write_util.cc +67 -0
  325. package/deps/rocksdb/rocksdb/file/read_write_util.h +34 -0
  326. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +169 -0
  327. package/deps/rocksdb/rocksdb/file/readahead_raf.h +29 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +237 -0
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +63 -0
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +552 -0
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +203 -0
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +523 -0
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +251 -0
  334. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +386 -0
  335. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +839 -0
  336. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +2218 -0
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +294 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +71 -0
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +214 -0
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +98 -0
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +137 -0
  342. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +40 -0
  343. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +46 -0
  344. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +359 -0
  345. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +499 -0
  346. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +138 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1697 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +11 -0
  349. package/deps/rocksdb/rocksdb/include/rocksdb/db_dump_tool.h +45 -0
  350. package/deps/rocksdb/rocksdb/include/rocksdb/db_stress_tool.h +11 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1671 -0
  352. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +405 -0
  353. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +29 -0
  354. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +129 -0
  355. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1472 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +238 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +61 -0
  358. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +269 -0
  359. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +56 -0
  360. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +128 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +43 -0
  362. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +556 -0
  363. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +77 -0
  364. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +385 -0
  365. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +257 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +155 -0
  367. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +1702 -0
  368. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +237 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +35 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +73 -0
  371. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +139 -0
  372. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +10 -0
  373. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +269 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +103 -0
  375. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +48 -0
  376. package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +19 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +136 -0
  378. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +47 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +145 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +135 -0
  381. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +592 -0
  382. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +69 -0
  383. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +608 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +711 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +280 -0
  386. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +188 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +58 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +48 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +121 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +74 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +86 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +535 -0
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +61 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/convenience.h +10 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +72 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +49 -0
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +175 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +180 -0
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/info_log_finder.h +19 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +288 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +71 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h +145 -0
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +43 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +55 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +50 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +205 -0
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +100 -0
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +19 -0
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +876 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +128 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +94 -0
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +504 -0
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +95 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +626 -0
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +432 -0
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +92 -0
  417. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +34 -0
  418. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +279 -0
  419. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +16 -0
  420. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +102 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +377 -0
  422. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +127 -0
  423. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +106 -0
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +300 -0
  425. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +165 -0
  426. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +684 -0
  427. package/deps/rocksdb/rocksdb/logging/env_logger.h +165 -0
  428. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +162 -0
  429. package/deps/rocksdb/rocksdb/logging/event_logger.cc +70 -0
  430. package/deps/rocksdb/rocksdb/logging/event_logger.h +203 -0
  431. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +43 -0
  432. package/deps/rocksdb/rocksdb/logging/log_buffer.cc +92 -0
  433. package/deps/rocksdb/rocksdb/logging/log_buffer.h +56 -0
  434. package/deps/rocksdb/rocksdb/logging/logging.h +68 -0
  435. package/deps/rocksdb/rocksdb/logging/posix_logger.h +185 -0
  436. package/deps/rocksdb/rocksdb/memory/allocator.h +57 -0
  437. package/deps/rocksdb/rocksdb/memory/arena.cc +233 -0
  438. package/deps/rocksdb/rocksdb/memory/arena.h +141 -0
  439. package/deps/rocksdb/rocksdb/memory/arena_test.cc +204 -0
  440. package/deps/rocksdb/rocksdb/memory/concurrent_arena.cc +47 -0
  441. package/deps/rocksdb/rocksdb/memory/concurrent_arena.h +218 -0
  442. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +206 -0
  443. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +78 -0
  444. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +33 -0
  445. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +27 -0
  446. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +102 -0
  447. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +38 -0
  448. package/deps/rocksdb/rocksdb/memory/memory_usage.h +25 -0
  449. package/deps/rocksdb/rocksdb/memtable/alloc_tracker.cc +62 -0
  450. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +844 -0
  451. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +49 -0
  452. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +349 -0
  453. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +44 -0
  454. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +997 -0
  455. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +663 -0
  456. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +677 -0
  457. package/deps/rocksdb/rocksdb/memtable/skiplist.h +496 -0
  458. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +388 -0
  459. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +280 -0
  460. package/deps/rocksdb/rocksdb/memtable/stl_wrappers.h +33 -0
  461. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +301 -0
  462. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -0
  463. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +203 -0
  464. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +23 -0
  465. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +287 -0
  466. package/deps/rocksdb/rocksdb/monitoring/histogram.h +149 -0
  467. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +231 -0
  468. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +200 -0
  469. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +84 -0
  470. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.cc +49 -0
  471. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.h +74 -0
  472. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +71 -0
  473. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +98 -0
  474. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +62 -0
  475. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +60 -0
  476. package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +29 -0
  477. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +566 -0
  478. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +97 -0
  479. package/deps/rocksdb/rocksdb/monitoring/perf_level.cc +28 -0
  480. package/deps/rocksdb/rocksdb/monitoring/perf_level_imp.h +18 -0
  481. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +79 -0
  482. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.cc +169 -0
  483. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.h +83 -0
  484. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +431 -0
  485. package/deps/rocksdb/rocksdb/monitoring/statistics.h +138 -0
  486. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -0
  487. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +652 -0
  488. package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +163 -0
  489. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +314 -0
  490. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +233 -0
  491. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +43 -0
  492. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +206 -0
  493. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +134 -0
  494. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +32 -0
  495. package/deps/rocksdb/rocksdb/options/cf_options.cc +1026 -0
  496. package/deps/rocksdb/rocksdb/options/cf_options.h +308 -0
  497. package/deps/rocksdb/rocksdb/options/configurable.cc +681 -0
  498. package/deps/rocksdb/rocksdb/options/configurable_helper.h +251 -0
  499. package/deps/rocksdb/rocksdb/options/configurable_test.cc +757 -0
  500. package/deps/rocksdb/rocksdb/options/configurable_test.h +127 -0
  501. package/deps/rocksdb/rocksdb/options/customizable.cc +77 -0
  502. package/deps/rocksdb/rocksdb/options/customizable_helper.h +216 -0
  503. package/deps/rocksdb/rocksdb/options/customizable_test.cc +625 -0
  504. package/deps/rocksdb/rocksdb/options/db_options.cc +835 -0
  505. package/deps/rocksdb/rocksdb/options/db_options.h +126 -0
  506. package/deps/rocksdb/rocksdb/options/options.cc +664 -0
  507. package/deps/rocksdb/rocksdb/options/options_helper.cc +1391 -0
  508. package/deps/rocksdb/rocksdb/options/options_helper.h +118 -0
  509. package/deps/rocksdb/rocksdb/options/options_parser.cc +721 -0
  510. package/deps/rocksdb/rocksdb/options/options_parser.h +151 -0
  511. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +583 -0
  512. package/deps/rocksdb/rocksdb/options/options_test.cc +3794 -0
  513. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +106 -0
  514. package/deps/rocksdb/rocksdb/port/lang.h +16 -0
  515. package/deps/rocksdb/rocksdb/port/likely.h +18 -0
  516. package/deps/rocksdb/rocksdb/port/malloc.h +17 -0
  517. package/deps/rocksdb/rocksdb/port/port.h +21 -0
  518. package/deps/rocksdb/rocksdb/port/port_dirent.h +44 -0
  519. package/deps/rocksdb/rocksdb/port/port_example.h +101 -0
  520. package/deps/rocksdb/rocksdb/port/port_posix.cc +266 -0
  521. package/deps/rocksdb/rocksdb/port/port_posix.h +223 -0
  522. package/deps/rocksdb/rocksdb/port/stack_trace.cc +179 -0
  523. package/deps/rocksdb/rocksdb/port/stack_trace.h +28 -0
  524. package/deps/rocksdb/rocksdb/port/sys_time.h +47 -0
  525. package/deps/rocksdb/rocksdb/port/util_logger.h +20 -0
  526. package/deps/rocksdb/rocksdb/port/win/env_default.cc +45 -0
  527. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1449 -0
  528. package/deps/rocksdb/rocksdb/port/win/env_win.h +294 -0
  529. package/deps/rocksdb/rocksdb/port/win/io_win.cc +1084 -0
  530. package/deps/rocksdb/rocksdb/port/win/io_win.h +494 -0
  531. package/deps/rocksdb/rocksdb/port/win/port_win.cc +283 -0
  532. package/deps/rocksdb/rocksdb/port/win/port_win.h +411 -0
  533. package/deps/rocksdb/rocksdb/port/win/win_jemalloc.cc +79 -0
  534. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +194 -0
  535. package/deps/rocksdb/rocksdb/port/win/win_logger.h +67 -0
  536. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +183 -0
  537. package/deps/rocksdb/rocksdb/port/win/win_thread.h +122 -0
  538. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +221 -0
  539. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +26 -0
  540. package/deps/rocksdb/rocksdb/port/xpress.h +17 -0
  541. package/deps/rocksdb/rocksdb/src.mk +631 -0
  542. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +126 -0
  543. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +57 -0
  544. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +73 -0
  545. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.h +48 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/block.cc +1049 -0
  547. package/deps/rocksdb/rocksdb/table/block_based/block.h +720 -0
  548. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +348 -0
  549. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +119 -0
  550. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +434 -0
  551. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1835 -0
  552. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +193 -0
  553. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +839 -0
  554. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +95 -0
  555. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +383 -0
  556. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +251 -0
  557. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3563 -0
  558. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +681 -0
  559. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +190 -0
  560. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +347 -0
  561. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +201 -0
  562. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +78 -0
  563. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +66 -0
  564. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +32 -0
  565. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.cc +232 -0
  566. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.h +66 -0
  567. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +623 -0
  568. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  569. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +220 -0
  570. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +59 -0
  571. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +25 -0
  572. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc +93 -0
  573. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +136 -0
  574. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +717 -0
  575. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +180 -0
  576. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +102 -0
  577. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +55 -0
  578. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1407 -0
  579. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +168 -0
  580. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +88 -0
  581. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +41 -0
  582. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +344 -0
  583. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +139 -0
  584. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +333 -0
  585. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +147 -0
  586. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.h +49 -0
  587. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +248 -0
  588. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +444 -0
  589. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +54 -0
  590. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +85 -0
  591. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +56 -0
  592. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +22 -0
  593. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +40 -0
  594. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +521 -0
  595. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +144 -0
  596. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +424 -0
  597. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +163 -0
  598. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +142 -0
  599. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +186 -0
  600. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +51 -0
  601. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +64 -0
  602. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +38 -0
  603. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +120 -0
  604. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +59 -0
  605. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +324 -0
  606. package/deps/rocksdb/rocksdb/table/block_fetcher.h +129 -0
  607. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +534 -0
  608. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +277 -0
  609. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +543 -0
  610. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +136 -0
  611. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +663 -0
  612. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +107 -0
  613. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +81 -0
  614. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +404 -0
  615. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +101 -0
  616. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +585 -0
  617. package/deps/rocksdb/rocksdb/table/format.cc +422 -0
  618. package/deps/rocksdb/rocksdb/table/format.h +348 -0
  619. package/deps/rocksdb/rocksdb/table/get_context.cc +408 -0
  620. package/deps/rocksdb/rocksdb/table/get_context.h +212 -0
  621. package/deps/rocksdb/rocksdb/table/internal_iterator.h +205 -0
  622. package/deps/rocksdb/rocksdb/table/iter_heap.h +42 -0
  623. package/deps/rocksdb/rocksdb/table/iterator.cc +210 -0
  624. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +180 -0
  625. package/deps/rocksdb/rocksdb/table/merger_test.cc +180 -0
  626. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +481 -0
  627. package/deps/rocksdb/rocksdb/table/merging_iterator.h +64 -0
  628. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +541 -0
  629. package/deps/rocksdb/rocksdb/table/meta_blocks.h +154 -0
  630. package/deps/rocksdb/rocksdb/table/mock_table.cc +328 -0
  631. package/deps/rocksdb/rocksdb/table/mock_table.h +89 -0
  632. package/deps/rocksdb/rocksdb/table/multiget_context.h +282 -0
  633. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +116 -0
  634. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +44 -0
  635. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +34 -0
  636. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.cc +78 -0
  637. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +135 -0
  638. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +332 -0
  639. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +153 -0
  640. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +263 -0
  641. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +182 -0
  642. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +211 -0
  643. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +249 -0
  644. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +506 -0
  645. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +201 -0
  646. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +781 -0
  647. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +247 -0
  648. package/deps/rocksdb/rocksdb/table/scoped_arena_iterator.h +61 -0
  649. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +502 -0
  650. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +96 -0
  651. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +98 -0
  652. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +228 -0
  653. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +340 -0
  654. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +94 -0
  655. package/deps/rocksdb/rocksdb/table/table_builder.h +203 -0
  656. package/deps/rocksdb/rocksdb/table/table_factory.cc +38 -0
  657. package/deps/rocksdb/rocksdb/table/table_properties.cc +300 -0
  658. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +30 -0
  659. package/deps/rocksdb/rocksdb/table/table_reader.h +147 -0
  660. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +347 -0
  661. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +39 -0
  662. package/deps/rocksdb/rocksdb/table/table_test.cc +4769 -0
  663. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +215 -0
  664. package/deps/rocksdb/rocksdb/table/two_level_iterator.h +43 -0
  665. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +38 -0
  666. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +74 -0
  667. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +93 -0
  668. package/deps/rocksdb/rocksdb/test_util/sync_point.h +161 -0
  669. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +129 -0
  670. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +74 -0
  671. package/deps/rocksdb/rocksdb/test_util/testharness.cc +56 -0
  672. package/deps/rocksdb/rocksdb/test_util/testharness.h +53 -0
  673. package/deps/rocksdb/rocksdb/test_util/testutil.cc +566 -0
  674. package/deps/rocksdb/rocksdb/test_util/testutil.h +887 -0
  675. package/deps/rocksdb/rocksdb/test_util/testutil_test.cc +43 -0
  676. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +388 -0
  677. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +132 -0
  678. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +27 -0
  679. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +45 -0
  680. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +166 -0
  681. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +570 -0
  682. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +92 -0
  683. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +54 -0
  684. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +152 -0
  685. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +59 -0
  686. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +141 -0
  687. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +33 -0
  688. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +74 -0
  689. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +117 -0
  690. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +263 -0
  691. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +96 -0
  692. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +40 -0
  693. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +29 -0
  694. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +144 -0
  695. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +30 -0
  696. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +51 -0
  697. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +28 -0
  698. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +10 -0
  699. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +26 -0
  700. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +138 -0
  701. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +23 -0
  702. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +57 -0
  703. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +260 -0
  704. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +52 -0
  705. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +328 -0
  706. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +1703 -0
  707. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +16 -0
  708. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +304 -0
  709. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +39 -0
  710. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +26 -0
  711. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +318 -0
  712. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.cpp +12 -0
  713. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +57 -0
  714. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +219 -0
  715. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +207 -0
  716. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +164 -0
  717. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +57 -0
  718. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +77 -0
  719. package/deps/rocksdb/rocksdb/third-party/gcc/ppc-asm.h +390 -0
  720. package/deps/rocksdb/rocksdb/thirdparty.inc +268 -0
  721. package/deps/rocksdb/rocksdb/tools/CMakeLists.txt +30 -0
  722. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +110 -0
  723. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/__init__.py +2 -0
  724. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +2000 -0
  725. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh +156 -0
  726. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +734 -0
  727. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +2307 -0
  728. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +395 -0
  729. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +721 -0
  730. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +719 -0
  731. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc +25 -0
  732. package/deps/rocksdb/rocksdb/tools/db_bench.cc +21 -0
  733. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +7416 -0
  734. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +328 -0
  735. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +130 -0
  736. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +297 -0
  737. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +259 -0
  738. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc +63 -0
  739. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +62 -0
  740. package/deps/rocksdb/rocksdb/tools/io_tracer_parser.cc +25 -0
  741. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +187 -0
  742. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +120 -0
  743. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.h +40 -0
  744. package/deps/rocksdb/rocksdb/tools/ldb.cc +21 -0
  745. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3609 -0
  746. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +665 -0
  747. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +746 -0
  748. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +159 -0
  749. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +221 -0
  750. package/deps/rocksdb/rocksdb/tools/sst_dump.cc +20 -0
  751. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +427 -0
  752. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +541 -0
  753. package/deps/rocksdb/rocksdb/tools/trace_analyzer.cc +25 -0
  754. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +752 -0
  755. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +2001 -0
  756. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +292 -0
  757. package/deps/rocksdb/rocksdb/tools/write_stress.cc +305 -0
  758. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +496 -0
  759. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +294 -0
  760. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +379 -0
  761. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +229 -0
  762. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +174 -0
  763. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +215 -0
  764. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +491 -0
  765. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +195 -0
  766. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +255 -0
  767. package/deps/rocksdb/rocksdb/util/autovector.h +367 -0
  768. package/deps/rocksdb/rocksdb/util/autovector_test.cc +330 -0
  769. package/deps/rocksdb/rocksdb/util/bloom_impl.h +485 -0
  770. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1191 -0
  771. package/deps/rocksdb/rocksdb/util/build_version.cc.in +5 -0
  772. package/deps/rocksdb/rocksdb/util/build_version.h +15 -0
  773. package/deps/rocksdb/rocksdb/util/cast_util.h +20 -0
  774. package/deps/rocksdb/rocksdb/util/channel.h +67 -0
  775. package/deps/rocksdb/rocksdb/util/coding.cc +89 -0
  776. package/deps/rocksdb/rocksdb/util/coding.h +419 -0
  777. package/deps/rocksdb/rocksdb/util/coding_lean.h +101 -0
  778. package/deps/rocksdb/rocksdb/util/coding_test.cc +217 -0
  779. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +92 -0
  780. package/deps/rocksdb/rocksdb/util/comparator.cc +219 -0
  781. package/deps/rocksdb/rocksdb/util/compression.h +1529 -0
  782. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +108 -0
  783. package/deps/rocksdb/rocksdb/util/compression_context_cache.h +47 -0
  784. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.cc +67 -0
  785. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.h +67 -0
  786. package/deps/rocksdb/rocksdb/util/core_local.h +83 -0
  787. package/deps/rocksdb/rocksdb/util/crc32c.cc +1283 -0
  788. package/deps/rocksdb/rocksdb/util/crc32c.h +51 -0
  789. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +169 -0
  790. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +50 -0
  791. package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +94 -0
  792. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +19 -0
  793. package/deps/rocksdb/rocksdb/util/crc32c_ppc_asm.S +756 -0
  794. package/deps/rocksdb/rocksdb/util/crc32c_ppc_constants.h +900 -0
  795. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +180 -0
  796. package/deps/rocksdb/rocksdb/util/defer.h +52 -0
  797. package/deps/rocksdb/rocksdb/util/defer_test.cc +39 -0
  798. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +68 -0
  799. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +70 -0
  800. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +214 -0
  801. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +323 -0
  802. package/deps/rocksdb/rocksdb/util/fastrange.h +112 -0
  803. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +136 -0
  804. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +98 -0
  805. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +449 -0
  806. package/deps/rocksdb/rocksdb/util/filelock_test.cc +152 -0
  807. package/deps/rocksdb/rocksdb/util/filter_bench.cc +781 -0
  808. package/deps/rocksdb/rocksdb/util/gflags_compat.h +20 -0
  809. package/deps/rocksdb/rocksdb/util/hash.cc +83 -0
  810. package/deps/rocksdb/rocksdb/util/hash.h +107 -0
  811. package/deps/rocksdb/rocksdb/util/hash_map.h +67 -0
  812. package/deps/rocksdb/rocksdb/util/hash_test.cc +593 -0
  813. package/deps/rocksdb/rocksdb/util/heap.h +166 -0
  814. package/deps/rocksdb/rocksdb/util/heap_test.cc +139 -0
  815. package/deps/rocksdb/rocksdb/util/kv_map.h +33 -0
  816. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +86 -0
  817. package/deps/rocksdb/rocksdb/util/math.h +186 -0
  818. package/deps/rocksdb/rocksdb/util/math128.h +298 -0
  819. package/deps/rocksdb/rocksdb/util/murmurhash.cc +191 -0
  820. package/deps/rocksdb/rocksdb/util/murmurhash.h +42 -0
  821. package/deps/rocksdb/rocksdb/util/mutexlock.h +186 -0
  822. package/deps/rocksdb/rocksdb/util/ppc-opcode.h +27 -0
  823. package/deps/rocksdb/rocksdb/util/random.cc +56 -0
  824. package/deps/rocksdb/rocksdb/util/random.h +186 -0
  825. package/deps/rocksdb/rocksdb/util/random_test.cc +105 -0
  826. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +340 -0
  827. package/deps/rocksdb/rocksdb/util/rate_limiter.h +113 -0
  828. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +251 -0
  829. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +151 -0
  830. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +107 -0
  831. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1201 -0
  832. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +1062 -0
  833. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +931 -0
  834. package/deps/rocksdb/rocksdb/util/set_comparator.h +22 -0
  835. package/deps/rocksdb/rocksdb/util/slice.cc +243 -0
  836. package/deps/rocksdb/rocksdb/util/slice_test.cc +163 -0
  837. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +153 -0
  838. package/deps/rocksdb/rocksdb/util/status.cc +149 -0
  839. package/deps/rocksdb/rocksdb/util/stderr_logger.h +31 -0
  840. package/deps/rocksdb/rocksdb/util/stop_watch.h +118 -0
  841. package/deps/rocksdb/rocksdb/util/string_util.cc +422 -0
  842. package/deps/rocksdb/rocksdb/util/string_util.h +144 -0
  843. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +354 -0
  844. package/deps/rocksdb/rocksdb/util/thread_local.cc +554 -0
  845. package/deps/rocksdb/rocksdb/util/thread_local.h +101 -0
  846. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +583 -0
  847. package/deps/rocksdb/rocksdb/util/thread_operation.h +121 -0
  848. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +506 -0
  849. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +112 -0
  850. package/deps/rocksdb/rocksdb/util/timer.h +331 -0
  851. package/deps/rocksdb/rocksdb/util/timer_queue.h +230 -0
  852. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +72 -0
  853. package/deps/rocksdb/rocksdb/util/timer_test.cc +399 -0
  854. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +80 -0
  855. package/deps/rocksdb/rocksdb/util/vector_iterator.h +101 -0
  856. package/deps/rocksdb/rocksdb/util/work_queue.h +148 -0
  857. package/deps/rocksdb/rocksdb/util/work_queue_test.cc +268 -0
  858. package/deps/rocksdb/rocksdb/util/xxh3p.h +1392 -0
  859. package/deps/rocksdb/rocksdb/util/xxhash.cc +1158 -0
  860. package/deps/rocksdb/rocksdb/util/xxhash.h +598 -0
  861. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +2354 -0
  862. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc +2955 -0
  863. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +488 -0
  864. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +199 -0
  865. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +112 -0
  866. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +266 -0
  867. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +52 -0
  868. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2167 -0
  869. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +500 -0
  870. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +113 -0
  871. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +147 -0
  872. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +66 -0
  873. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2386 -0
  874. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +281 -0
  875. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +58 -0
  876. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +314 -0
  877. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +244 -0
  878. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +47 -0
  879. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +42 -0
  880. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +375 -0
  881. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +327 -0
  882. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +114 -0
  883. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +187 -0
  884. package/deps/rocksdb/rocksdb/utilities/cassandra/format.cc +390 -0
  885. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +184 -0
  886. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +67 -0
  887. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +44 -0
  888. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +75 -0
  889. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +72 -0
  890. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +43 -0
  891. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +588 -0
  892. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +82 -0
  893. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +821 -0
  894. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +37 -0
  895. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +29 -0
  896. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +27 -0
  897. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +25 -0
  898. package/deps/rocksdb/rocksdb/utilities/debug.cc +82 -0
  899. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +1497 -0
  900. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +1146 -0
  901. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +262 -0
  902. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +223 -0
  903. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +145 -0
  904. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +44 -0
  905. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +490 -0
  906. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +242 -0
  907. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +581 -0
  908. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +437 -0
  909. package/deps/rocksdb/rocksdb/utilities/leveldb_options/leveldb_options.cc +56 -0
  910. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +275 -0
  911. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +52 -0
  912. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc +59 -0
  913. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +39 -0
  914. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +77 -0
  915. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +83 -0
  916. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +97 -0
  917. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +38 -0
  918. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +59 -0
  919. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +31 -0
  920. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +117 -0
  921. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +49 -0
  922. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +598 -0
  923. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +69 -0
  924. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +55 -0
  925. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +87 -0
  926. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +174 -0
  927. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +168 -0
  928. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +431 -0
  929. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +159 -0
  930. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +655 -0
  931. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +425 -0
  932. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +156 -0
  933. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +609 -0
  934. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +296 -0
  935. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file_buffer.h +127 -0
  936. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +86 -0
  937. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +125 -0
  938. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +238 -0
  939. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc +308 -0
  940. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +168 -0
  941. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +160 -0
  942. package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +174 -0
  943. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +360 -0
  944. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +456 -0
  945. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +286 -0
  946. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +167 -0
  947. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +339 -0
  948. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_util.h +67 -0
  949. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +140 -0
  950. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +142 -0
  951. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +285 -0
  952. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.h +231 -0
  953. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +494 -0
  954. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +356 -0
  955. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +224 -0
  956. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +122 -0
  957. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +72 -0
  958. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +244 -0
  959. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +125 -0
  960. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +48 -0
  961. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +29 -0
  962. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +82 -0
  963. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_tracker.h +209 -0
  964. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +720 -0
  965. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +223 -0
  966. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +181 -0
  967. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +319 -0
  968. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +270 -0
  969. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.h +99 -0
  970. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +30 -0
  971. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +306 -0
  972. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.AGPLv3 +661 -0
  973. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.APACHEv2 +174 -0
  974. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.GPLv2 +339 -0
  975. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +76 -0
  976. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h +138 -0
  977. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h +102 -0
  978. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc +139 -0
  979. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h +174 -0
  980. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc +222 -0
  981. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h +141 -0
  982. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc +525 -0
  983. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h +253 -0
  984. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +1007 -0
  985. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +560 -0
  986. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc +527 -0
  987. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc +265 -0
  988. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h +178 -0
  989. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc +520 -0
  990. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h +302 -0
  991. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc +120 -0
  992. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h +92 -0
  993. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc +213 -0
  994. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h +124 -0
  995. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/memory.h +215 -0
  996. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h +39 -0
  997. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h +130 -0
  998. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +82 -0
  999. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h +286 -0
  1000. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h +87 -0
  1001. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h +520 -0
  1002. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h +179 -0
  1003. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +172 -0
  1004. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h +27 -0
  1005. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc +132 -0
  1006. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +153 -0
  1007. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h +98 -0
  1008. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +144 -0
  1009. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc +201 -0
  1010. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h +141 -0
  1011. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h +794 -0
  1012. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h +1295 -0
  1013. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h +165 -0
  1014. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h +76 -0
  1015. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +479 -0
  1016. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +130 -0
  1017. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc +156 -0
  1018. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h +146 -0
  1019. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +196 -0
  1020. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +101 -0
  1021. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +111 -0
  1022. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +87 -0
  1023. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1418 -0
  1024. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +752 -0
  1025. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +232 -0
  1026. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +628 -0
  1027. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +228 -0
  1028. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +49 -0
  1029. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +678 -0
  1030. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +373 -0
  1031. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +135 -0
  1032. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +26 -0
  1033. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +6350 -0
  1034. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +522 -0
  1035. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +188 -0
  1036. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +80 -0
  1037. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3531 -0
  1038. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +483 -0
  1039. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +119 -0
  1040. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +999 -0
  1041. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +1109 -0
  1042. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +786 -0
  1043. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1039 -0
  1044. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +341 -0
  1045. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +470 -0
  1046. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +108 -0
  1047. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +332 -0
  1048. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +353 -0
  1049. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +703 -0
  1050. package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +99 -0
  1051. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +617 -0
  1052. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +345 -0
  1053. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +569 -0
  1054. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1867 -0
  1055. package/deps/rocksdb/rocksdb.gyp +475 -0
  1056. package/deps/snappy/freebsd/config.h +135 -0
  1057. package/deps/snappy/freebsd/snappy-stubs-public.h +100 -0
  1058. package/deps/snappy/linux/config.h +135 -0
  1059. package/deps/snappy/linux/snappy-stubs-public.h +100 -0
  1060. package/deps/snappy/mac/config.h +137 -0
  1061. package/deps/snappy/mac/snappy-stubs-public.h +100 -0
  1062. package/deps/snappy/openbsd/config.h +135 -0
  1063. package/deps/snappy/openbsd/snappy-stubs-public.h +100 -0
  1064. package/deps/snappy/snappy-1.1.7/COPYING +54 -0
  1065. package/deps/snappy/snappy-1.1.7/cmake/SnappyConfig.cmake +1 -0
  1066. package/deps/snappy/snappy-1.1.7/cmake/config.h.in +62 -0
  1067. package/deps/snappy/snappy-1.1.7/snappy-c.cc +90 -0
  1068. package/deps/snappy/snappy-1.1.7/snappy-c.h +138 -0
  1069. package/deps/snappy/snappy-1.1.7/snappy-internal.h +224 -0
  1070. package/deps/snappy/snappy-1.1.7/snappy-sinksource.cc +104 -0
  1071. package/deps/snappy/snappy-1.1.7/snappy-sinksource.h +182 -0
  1072. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.cc +42 -0
  1073. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.h +561 -0
  1074. package/deps/snappy/snappy-1.1.7/snappy-stubs-public.h.in +94 -0
  1075. package/deps/snappy/snappy-1.1.7/snappy-test.cc +612 -0
  1076. package/deps/snappy/snappy-1.1.7/snappy-test.h +573 -0
  1077. package/deps/snappy/snappy-1.1.7/snappy.cc +1515 -0
  1078. package/deps/snappy/snappy-1.1.7/snappy.h +203 -0
  1079. package/deps/snappy/snappy-1.1.7/snappy_unittest.cc +1410 -0
  1080. package/deps/snappy/snappy.gyp +90 -0
  1081. package/deps/snappy/solaris/config.h +135 -0
  1082. package/deps/snappy/solaris/snappy-stubs-public.h +100 -0
  1083. package/deps/snappy/win32/config.h +29 -0
  1084. package/deps/snappy/win32/snappy-stubs-public.h +100 -0
  1085. package/iterator.js +55 -0
  1086. package/leveldown.js +113 -0
  1087. package/package-lock.json +23687 -0
  1088. package/package.json +70 -0
@@ -0,0 +1,3563 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+ //
6
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
+ // Use of this source code is governed by a BSD-style license that can be
8
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
+ #include "table/block_based/block_based_table_reader.h"
10
+
11
+ #include <algorithm>
12
+ #include <array>
13
+ #include <limits>
14
+ #include <string>
15
+ #include <utility>
16
+ #include <vector>
17
+
18
+ #include "cache/sharded_cache.h"
19
+
20
+ #include "db/dbformat.h"
21
+ #include "db/pinned_iterators_manager.h"
22
+ #include "file/file_prefetch_buffer.h"
23
+ #include "file/file_util.h"
24
+ #include "file/random_access_file_reader.h"
25
+ #include "monitoring/perf_context_imp.h"
26
+ #include "options/options_helper.h"
27
+ #include "rocksdb/cache.h"
28
+ #include "rocksdb/comparator.h"
29
+ #include "rocksdb/env.h"
30
+ #include "rocksdb/file_system.h"
31
+ #include "rocksdb/filter_policy.h"
32
+ #include "rocksdb/iterator.h"
33
+ #include "rocksdb/options.h"
34
+ #include "rocksdb/statistics.h"
35
+ #include "rocksdb/table.h"
36
+ #include "rocksdb/table_properties.h"
37
+ #include "table/block_based/binary_search_index_reader.h"
38
+ #include "table/block_based/block.h"
39
+ #include "table/block_based/block_based_filter_block.h"
40
+ #include "table/block_based/block_based_table_factory.h"
41
+ #include "table/block_based/block_based_table_iterator.h"
42
+ #include "table/block_based/block_prefix_index.h"
43
+ #include "table/block_based/filter_block.h"
44
+ #include "table/block_based/full_filter_block.h"
45
+ #include "table/block_based/hash_index_reader.h"
46
+ #include "table/block_based/partitioned_filter_block.h"
47
+ #include "table/block_based/partitioned_index_reader.h"
48
+ #include "table/block_fetcher.h"
49
+ #include "table/format.h"
50
+ #include "table/get_context.h"
51
+ #include "table/internal_iterator.h"
52
+ #include "table/meta_blocks.h"
53
+ #include "table/multiget_context.h"
54
+ #include "table/persistent_cache_helper.h"
55
+ #include "table/sst_file_writer_collectors.h"
56
+ #include "table/two_level_iterator.h"
57
+
58
+ #include "monitoring/perf_context_imp.h"
59
+ #include "port/lang.h"
60
+ #include "test_util/sync_point.h"
61
+ #include "util/coding.h"
62
+ #include "util/crc32c.h"
63
+ #include "util/stop_watch.h"
64
+ #include "util/string_util.h"
65
+
66
+ namespace ROCKSDB_NAMESPACE {
67
+
68
+ extern const uint64_t kBlockBasedTableMagicNumber;
69
+ extern const std::string kHashIndexPrefixesBlock;
70
+ extern const std::string kHashIndexPrefixesMetadataBlock;
71
+
72
+
73
+ // Found that 256 KB readahead size provides the best performance, based on
74
+ // experiments, for auto readahead. Experiment data is in PR #3282.
75
+ const size_t BlockBasedTable::kMaxAutoReadaheadSize = 256 * 1024;
76
+
77
+ BlockBasedTable::~BlockBasedTable() {
78
+ delete rep_;
79
+ }
80
+
81
+ std::atomic<uint64_t> BlockBasedTable::next_cache_key_id_(0);
82
+
83
+ template <typename TBlocklike>
84
+ class BlocklikeTraits;
85
+
86
+ template <>
87
+ class BlocklikeTraits<BlockContents> {
88
+ public:
89
+ static BlockContents* Create(BlockContents&& contents,
90
+ size_t /* read_amp_bytes_per_bit */,
91
+ Statistics* /* statistics */,
92
+ bool /* using_zstd */,
93
+ const FilterPolicy* /* filter_policy */) {
94
+ return new BlockContents(std::move(contents));
95
+ }
96
+
97
+ static uint32_t GetNumRestarts(const BlockContents& /* contents */) {
98
+ return 0;
99
+ }
100
+ };
101
+
102
+ template <>
103
+ class BlocklikeTraits<ParsedFullFilterBlock> {
104
+ public:
105
+ static ParsedFullFilterBlock* Create(BlockContents&& contents,
106
+ size_t /* read_amp_bytes_per_bit */,
107
+ Statistics* /* statistics */,
108
+ bool /* using_zstd */,
109
+ const FilterPolicy* filter_policy) {
110
+ return new ParsedFullFilterBlock(filter_policy, std::move(contents));
111
+ }
112
+
113
+ static uint32_t GetNumRestarts(const ParsedFullFilterBlock& /* block */) {
114
+ return 0;
115
+ }
116
+ };
117
+
118
+ template <>
119
+ class BlocklikeTraits<Block> {
120
+ public:
121
+ static Block* Create(BlockContents&& contents, size_t read_amp_bytes_per_bit,
122
+ Statistics* statistics, bool /* using_zstd */,
123
+ const FilterPolicy* /* filter_policy */) {
124
+ return new Block(std::move(contents), read_amp_bytes_per_bit, statistics);
125
+ }
126
+
127
+ static uint32_t GetNumRestarts(const Block& block) {
128
+ return block.NumRestarts();
129
+ }
130
+ };
131
+
132
+ template <>
133
+ class BlocklikeTraits<UncompressionDict> {
134
+ public:
135
+ static UncompressionDict* Create(BlockContents&& contents,
136
+ size_t /* read_amp_bytes_per_bit */,
137
+ Statistics* /* statistics */,
138
+ bool using_zstd,
139
+ const FilterPolicy* /* filter_policy */) {
140
+ return new UncompressionDict(contents.data, std::move(contents.allocation),
141
+ using_zstd);
142
+ }
143
+
144
+ static uint32_t GetNumRestarts(const UncompressionDict& /* dict */) {
145
+ return 0;
146
+ }
147
+ };
148
+
149
+ namespace {
150
+ // Read the block identified by "handle" from "file".
151
+ // The only relevant option is options.verify_checksums for now.
152
+ // On failure return non-OK.
153
+ // On success fill *result and return OK - caller owns *result
154
+ // @param uncompression_dict Data for presetting the compression library's
155
+ // dictionary.
156
+ template <typename TBlocklike>
157
+ Status ReadBlockFromFile(
158
+ RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer,
159
+ const Footer& footer, const ReadOptions& options, const BlockHandle& handle,
160
+ std::unique_ptr<TBlocklike>* result, const ImmutableCFOptions& ioptions,
161
+ bool do_uncompress, bool maybe_compressed, BlockType block_type,
162
+ const UncompressionDict& uncompression_dict,
163
+ const PersistentCacheOptions& cache_options, size_t read_amp_bytes_per_bit,
164
+ MemoryAllocator* memory_allocator, bool for_compaction, bool using_zstd,
165
+ const FilterPolicy* filter_policy) {
166
+ assert(result);
167
+
168
+ BlockContents contents;
169
+ BlockFetcher block_fetcher(
170
+ file, prefetch_buffer, footer, options, handle, &contents, ioptions,
171
+ do_uncompress, maybe_compressed, block_type, uncompression_dict,
172
+ cache_options, memory_allocator, nullptr, for_compaction);
173
+ Status s = block_fetcher.ReadBlockContents();
174
+ if (s.ok()) {
175
+ result->reset(BlocklikeTraits<TBlocklike>::Create(
176
+ std::move(contents), read_amp_bytes_per_bit, ioptions.statistics,
177
+ using_zstd, filter_policy));
178
+ }
179
+
180
+ return s;
181
+ }
182
+
183
+ // Delete the entry resided in the cache.
184
+ template <class Entry>
185
+ void DeleteCachedEntry(const Slice& /*key*/, void* value) {
186
+ auto entry = reinterpret_cast<Entry*>(value);
187
+ delete entry;
188
+ }
189
+
190
+ // Release the cached entry and decrement its ref count.
191
+ // Do not force erase
192
+ void ReleaseCachedEntry(void* arg, void* h) {
193
+ Cache* cache = reinterpret_cast<Cache*>(arg);
194
+ Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
195
+ cache->Release(handle, false /* force_erase */);
196
+ }
197
+
198
+ // For hash based index, return true if prefix_extractor and
199
+ // prefix_extractor_block mismatch, false otherwise. This flag will be used
200
+ // as total_order_seek via NewIndexIterator
201
+ bool PrefixExtractorChanged(const TableProperties* table_properties,
202
+ const SliceTransform* prefix_extractor) {
203
+ // BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set.
204
+ // Turn off hash index in prefix_extractor is not set; if prefix_extractor
205
+ // is set but prefix_extractor_block is not set, also disable hash index
206
+ if (prefix_extractor == nullptr || table_properties == nullptr ||
207
+ table_properties->prefix_extractor_name.empty()) {
208
+ return true;
209
+ }
210
+
211
+ // prefix_extractor and prefix_extractor_block are both non-empty
212
+ if (table_properties->prefix_extractor_name.compare(
213
+ prefix_extractor->Name()) != 0) {
214
+ return true;
215
+ } else {
216
+ return false;
217
+ }
218
+ }
219
+
220
+ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
221
+ CacheAllocationPtr heap_buf;
222
+ heap_buf = AllocateBlock(buf.size(), allocator);
223
+ memcpy(heap_buf.get(), buf.data(), buf.size());
224
+ return heap_buf;
225
+ }
226
+ } // namespace
227
+
228
+ void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type,
229
+ GetContext* get_context,
230
+ size_t usage) const {
231
+ Statistics* const statistics = rep_->ioptions.statistics;
232
+
233
+ PERF_COUNTER_ADD(block_cache_hit_count, 1);
234
+ PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1,
235
+ static_cast<uint32_t>(rep_->level));
236
+
237
+ if (get_context) {
238
+ ++get_context->get_context_stats_.num_cache_hit;
239
+ get_context->get_context_stats_.num_cache_bytes_read += usage;
240
+ } else {
241
+ RecordTick(statistics, BLOCK_CACHE_HIT);
242
+ RecordTick(statistics, BLOCK_CACHE_BYTES_READ, usage);
243
+ }
244
+
245
+ switch (block_type) {
246
+ case BlockType::kFilter:
247
+ PERF_COUNTER_ADD(block_cache_filter_hit_count, 1);
248
+
249
+ if (get_context) {
250
+ ++get_context->get_context_stats_.num_cache_filter_hit;
251
+ } else {
252
+ RecordTick(statistics, BLOCK_CACHE_FILTER_HIT);
253
+ }
254
+ break;
255
+
256
+ case BlockType::kCompressionDictionary:
257
+ // TODO: introduce perf counter for compression dictionary hit count
258
+ if (get_context) {
259
+ ++get_context->get_context_stats_.num_cache_compression_dict_hit;
260
+ } else {
261
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_HIT);
262
+ }
263
+ break;
264
+
265
+ case BlockType::kIndex:
266
+ PERF_COUNTER_ADD(block_cache_index_hit_count, 1);
267
+
268
+ if (get_context) {
269
+ ++get_context->get_context_stats_.num_cache_index_hit;
270
+ } else {
271
+ RecordTick(statistics, BLOCK_CACHE_INDEX_HIT);
272
+ }
273
+ break;
274
+
275
+ default:
276
+ // TODO: introduce dedicated tickers/statistics/counters
277
+ // for range tombstones
278
+ if (get_context) {
279
+ ++get_context->get_context_stats_.num_cache_data_hit;
280
+ } else {
281
+ RecordTick(statistics, BLOCK_CACHE_DATA_HIT);
282
+ }
283
+ break;
284
+ }
285
+ }
286
+
287
+ void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type,
288
+ GetContext* get_context) const {
289
+ Statistics* const statistics = rep_->ioptions.statistics;
290
+
291
+ // TODO: introduce aggregate (not per-level) block cache miss count
292
+ PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1,
293
+ static_cast<uint32_t>(rep_->level));
294
+
295
+ if (get_context) {
296
+ ++get_context->get_context_stats_.num_cache_miss;
297
+ } else {
298
+ RecordTick(statistics, BLOCK_CACHE_MISS);
299
+ }
300
+
301
+ // TODO: introduce perf counters for misses per block type
302
+ switch (block_type) {
303
+ case BlockType::kFilter:
304
+ if (get_context) {
305
+ ++get_context->get_context_stats_.num_cache_filter_miss;
306
+ } else {
307
+ RecordTick(statistics, BLOCK_CACHE_FILTER_MISS);
308
+ }
309
+ break;
310
+
311
+ case BlockType::kCompressionDictionary:
312
+ if (get_context) {
313
+ ++get_context->get_context_stats_.num_cache_compression_dict_miss;
314
+ } else {
315
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_MISS);
316
+ }
317
+ break;
318
+
319
+ case BlockType::kIndex:
320
+ if (get_context) {
321
+ ++get_context->get_context_stats_.num_cache_index_miss;
322
+ } else {
323
+ RecordTick(statistics, BLOCK_CACHE_INDEX_MISS);
324
+ }
325
+ break;
326
+
327
+ default:
328
+ // TODO: introduce dedicated tickers/statistics/counters
329
+ // for range tombstones
330
+ if (get_context) {
331
+ ++get_context->get_context_stats_.num_cache_data_miss;
332
+ } else {
333
+ RecordTick(statistics, BLOCK_CACHE_DATA_MISS);
334
+ }
335
+ break;
336
+ }
337
+ }
338
+
339
+ void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type,
340
+ GetContext* get_context,
341
+ size_t usage,
342
+ bool redundant) const {
343
+ Statistics* const statistics = rep_->ioptions.statistics;
344
+
345
+ // TODO: introduce perf counters for block cache insertions
346
+ if (get_context) {
347
+ ++get_context->get_context_stats_.num_cache_add;
348
+ if (redundant) {
349
+ ++get_context->get_context_stats_.num_cache_add_redundant;
350
+ }
351
+ get_context->get_context_stats_.num_cache_bytes_write += usage;
352
+ } else {
353
+ RecordTick(statistics, BLOCK_CACHE_ADD);
354
+ if (redundant) {
355
+ RecordTick(statistics, BLOCK_CACHE_ADD_REDUNDANT);
356
+ }
357
+ RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage);
358
+ }
359
+
360
+ switch (block_type) {
361
+ case BlockType::kFilter:
362
+ if (get_context) {
363
+ ++get_context->get_context_stats_.num_cache_filter_add;
364
+ if (redundant) {
365
+ ++get_context->get_context_stats_.num_cache_filter_add_redundant;
366
+ }
367
+ get_context->get_context_stats_.num_cache_filter_bytes_insert += usage;
368
+ } else {
369
+ RecordTick(statistics, BLOCK_CACHE_FILTER_ADD);
370
+ if (redundant) {
371
+ RecordTick(statistics, BLOCK_CACHE_FILTER_ADD_REDUNDANT);
372
+ }
373
+ RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage);
374
+ }
375
+ break;
376
+
377
+ case BlockType::kCompressionDictionary:
378
+ if (get_context) {
379
+ ++get_context->get_context_stats_.num_cache_compression_dict_add;
380
+ if (redundant) {
381
+ ++get_context->get_context_stats_
382
+ .num_cache_compression_dict_add_redundant;
383
+ }
384
+ get_context->get_context_stats_
385
+ .num_cache_compression_dict_bytes_insert += usage;
386
+ } else {
387
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD);
388
+ if (redundant) {
389
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT);
390
+ }
391
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT,
392
+ usage);
393
+ }
394
+ break;
395
+
396
+ case BlockType::kIndex:
397
+ if (get_context) {
398
+ ++get_context->get_context_stats_.num_cache_index_add;
399
+ if (redundant) {
400
+ ++get_context->get_context_stats_.num_cache_index_add_redundant;
401
+ }
402
+ get_context->get_context_stats_.num_cache_index_bytes_insert += usage;
403
+ } else {
404
+ RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
405
+ if (redundant) {
406
+ RecordTick(statistics, BLOCK_CACHE_INDEX_ADD_REDUNDANT);
407
+ }
408
+ RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usage);
409
+ }
410
+ break;
411
+
412
+ default:
413
+ // TODO: introduce dedicated tickers/statistics/counters
414
+ // for range tombstones
415
+ if (get_context) {
416
+ ++get_context->get_context_stats_.num_cache_data_add;
417
+ if (redundant) {
418
+ ++get_context->get_context_stats_.num_cache_data_add_redundant;
419
+ }
420
+ get_context->get_context_stats_.num_cache_data_bytes_insert += usage;
421
+ } else {
422
+ RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
423
+ if (redundant) {
424
+ RecordTick(statistics, BLOCK_CACHE_DATA_ADD_REDUNDANT);
425
+ }
426
+ RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, usage);
427
+ }
428
+ break;
429
+ }
430
+ }
431
+
432
+ Cache::Handle* BlockBasedTable::GetEntryFromCache(
433
+ Cache* block_cache, const Slice& key, BlockType block_type,
434
+ GetContext* get_context) const {
435
+ auto cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics);
436
+
437
+ if (cache_handle != nullptr) {
438
+ UpdateCacheHitMetrics(block_type, get_context,
439
+ block_cache->GetUsage(cache_handle));
440
+ } else {
441
+ UpdateCacheMissMetrics(block_type, get_context);
442
+ }
443
+
444
+ return cache_handle;
445
+ }
446
+
447
+ // Helper function to setup the cache key's prefix for the Table.
448
+ void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
449
+ assert(kMaxCacheKeyPrefixSize >= 10);
450
+ rep->cache_key_prefix_size = 0;
451
+ rep->compressed_cache_key_prefix_size = 0;
452
+ if (rep->table_options.block_cache != nullptr) {
453
+ GenerateCachePrefix<Cache, FSRandomAccessFile>(
454
+ rep->table_options.block_cache.get(), rep->file->file(),
455
+ &rep->cache_key_prefix[0], &rep->cache_key_prefix_size);
456
+ }
457
+ if (rep->table_options.persistent_cache != nullptr) {
458
+ GenerateCachePrefix<PersistentCache, FSRandomAccessFile>(
459
+ rep->table_options.persistent_cache.get(), rep->file->file(),
460
+ &rep->persistent_cache_key_prefix[0],
461
+ &rep->persistent_cache_key_prefix_size);
462
+ }
463
+ if (rep->table_options.block_cache_compressed != nullptr) {
464
+ GenerateCachePrefix<Cache, FSRandomAccessFile>(
465
+ rep->table_options.block_cache_compressed.get(), rep->file->file(),
466
+ &rep->compressed_cache_key_prefix[0],
467
+ &rep->compressed_cache_key_prefix_size);
468
+ }
469
+ }
470
+
471
+ namespace {
472
+ // Return True if table_properties has `user_prop_name` has a `true` value
473
+ // or it doesn't contain this property (for backward compatible).
474
+ bool IsFeatureSupported(const TableProperties& table_properties,
475
+ const std::string& user_prop_name, Logger* info_log) {
476
+ auto& props = table_properties.user_collected_properties;
477
+ auto pos = props.find(user_prop_name);
478
+ // Older version doesn't have this value set. Skip this check.
479
+ if (pos != props.end()) {
480
+ if (pos->second == kPropFalse) {
481
+ return false;
482
+ } else if (pos->second != kPropTrue) {
483
+ ROCKS_LOG_WARN(info_log, "Property %s has invalidate value %s",
484
+ user_prop_name.c_str(), pos->second.c_str());
485
+ }
486
+ }
487
+ return true;
488
+ }
489
+
490
+ // Caller has to ensure seqno is not nullptr.
491
+ Status GetGlobalSequenceNumber(const TableProperties& table_properties,
492
+ SequenceNumber largest_seqno,
493
+ SequenceNumber* seqno) {
494
+ const auto& props = table_properties.user_collected_properties;
495
+ const auto version_pos = props.find(ExternalSstFilePropertyNames::kVersion);
496
+ const auto seqno_pos = props.find(ExternalSstFilePropertyNames::kGlobalSeqno);
497
+
498
+ *seqno = kDisableGlobalSequenceNumber;
499
+ if (version_pos == props.end()) {
500
+ if (seqno_pos != props.end()) {
501
+ std::array<char, 200> msg_buf;
502
+ // This is not an external sst file, global_seqno is not supported.
503
+ snprintf(
504
+ msg_buf.data(), msg_buf.max_size(),
505
+ "A non-external sst file have global seqno property with value %s",
506
+ seqno_pos->second.c_str());
507
+ return Status::Corruption(msg_buf.data());
508
+ }
509
+ return Status::OK();
510
+ }
511
+
512
+ uint32_t version = DecodeFixed32(version_pos->second.c_str());
513
+ if (version < 2) {
514
+ if (seqno_pos != props.end() || version != 1) {
515
+ std::array<char, 200> msg_buf;
516
+ // This is a v1 external sst file, global_seqno is not supported.
517
+ snprintf(msg_buf.data(), msg_buf.max_size(),
518
+ "An external sst file with version %u have global seqno "
519
+ "property with value %s",
520
+ version, seqno_pos->second.c_str());
521
+ return Status::Corruption(msg_buf.data());
522
+ }
523
+ return Status::OK();
524
+ }
525
+
526
+ // Since we have a plan to deprecate global_seqno, we do not return failure
527
+ // if seqno_pos == props.end(). We rely on version_pos to detect whether the
528
+ // SST is external.
529
+ SequenceNumber global_seqno(0);
530
+ if (seqno_pos != props.end()) {
531
+ global_seqno = DecodeFixed64(seqno_pos->second.c_str());
532
+ }
533
+ // SstTableReader open table reader with kMaxSequenceNumber as largest_seqno
534
+ // to denote it is unknown.
535
+ if (largest_seqno < kMaxSequenceNumber) {
536
+ if (global_seqno == 0) {
537
+ global_seqno = largest_seqno;
538
+ }
539
+ if (global_seqno != largest_seqno) {
540
+ std::array<char, 200> msg_buf;
541
+ snprintf(
542
+ msg_buf.data(), msg_buf.max_size(),
543
+ "An external sst file with version %u have global seqno property "
544
+ "with value %s, while largest seqno in the file is %llu",
545
+ version, seqno_pos->second.c_str(),
546
+ static_cast<unsigned long long>(largest_seqno));
547
+ return Status::Corruption(msg_buf.data());
548
+ }
549
+ }
550
+ *seqno = global_seqno;
551
+
552
+ if (global_seqno > kMaxSequenceNumber) {
553
+ std::array<char, 200> msg_buf;
554
+ snprintf(msg_buf.data(), msg_buf.max_size(),
555
+ "An external sst file with version %u have global seqno property "
556
+ "with value %llu, which is greater than kMaxSequenceNumber",
557
+ version, static_cast<unsigned long long>(global_seqno));
558
+ return Status::Corruption(msg_buf.data());
559
+ }
560
+
561
+ return Status::OK();
562
+ }
563
+ } // namespace
564
+
565
+ Slice BlockBasedTable::GetCacheKey(const char* cache_key_prefix,
566
+ size_t cache_key_prefix_size,
567
+ const BlockHandle& handle, char* cache_key) {
568
+ assert(cache_key != nullptr);
569
+ assert(cache_key_prefix_size != 0);
570
+ assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
571
+ memcpy(cache_key, cache_key_prefix, cache_key_prefix_size);
572
+ char* end =
573
+ EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset());
574
+ return Slice(cache_key, static_cast<size_t>(end - cache_key));
575
+ }
576
+
577
+ Status BlockBasedTable::Open(
578
+ const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
579
+ const EnvOptions& env_options, const BlockBasedTableOptions& table_options,
580
+ const InternalKeyComparator& internal_comparator,
581
+ std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
582
+ std::unique_ptr<TableReader>* table_reader,
583
+ const SliceTransform* prefix_extractor,
584
+ const bool prefetch_index_and_filter_in_cache, const bool skip_filters,
585
+ const int level, const bool immortal_table,
586
+ const SequenceNumber largest_seqno, const bool force_direct_prefetch,
587
+ TailPrefetchStats* tail_prefetch_stats,
588
+ BlockCacheTracer* const block_cache_tracer,
589
+ size_t max_file_size_for_l0_meta_pin) {
590
+ table_reader->reset();
591
+
592
+ Status s;
593
+ Footer footer;
594
+ std::unique_ptr<FilePrefetchBuffer> prefetch_buffer;
595
+
596
+ // Only retain read_options.deadline and read_options.io_timeout.
597
+ // In future, we may retain more
598
+ // options. Specifically, w ignore verify_checksums and default to
599
+ // checksum verification anyway when creating the index and filter
600
+ // readers.
601
+ ReadOptions ro;
602
+ ro.deadline = read_options.deadline;
603
+ ro.io_timeout = read_options.io_timeout;
604
+
605
+ // prefetch both index and filters, down to all partitions
606
+ const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0;
607
+ const bool preload_all = !table_options.cache_index_and_filter_blocks;
608
+
609
+ if (!ioptions.allow_mmap_reads) {
610
+ s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
611
+ tail_prefetch_stats, prefetch_all, preload_all,
612
+ &prefetch_buffer);
613
+ // Return error in prefetch path to users.
614
+ if (!s.ok()) {
615
+ return s;
616
+ }
617
+ } else {
618
+ // Should not prefetch for mmap mode.
619
+ prefetch_buffer.reset(new FilePrefetchBuffer(
620
+ nullptr, 0, 0, false /* enable */, true /* track_min_offset */));
621
+ }
622
+
623
+ // Read in the following order:
624
+ // 1. Footer
625
+ // 2. [metaindex block]
626
+ // 3. [meta block: properties]
627
+ // 4. [meta block: range deletion tombstone]
628
+ // 5. [meta block: compression dictionary]
629
+ // 6. [meta block: index]
630
+ // 7. [meta block: filter]
631
+ IOOptions opts;
632
+ s = PrepareIOFromReadOptions(ro, file->env(), opts);
633
+ if (s.ok()) {
634
+ s = ReadFooterFromFile(opts, file.get(), prefetch_buffer.get(), file_size,
635
+ &footer, kBlockBasedTableMagicNumber);
636
+ }
637
+ if (!s.ok()) {
638
+ return s;
639
+ }
640
+ if (!BlockBasedTableSupportedVersion(footer.version())) {
641
+ return Status::Corruption(
642
+ "Unknown Footer version. Maybe this file was created with newer "
643
+ "version of RocksDB?");
644
+ }
645
+
646
+ // We've successfully read the footer. We are ready to serve requests.
647
+ // Better not mutate rep_ after the creation. eg. internal_prefix_transform
648
+ // raw pointer will be used to create HashIndexReader, whose reset may
649
+ // access a dangling pointer.
650
+ BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
651
+ Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
652
+ internal_comparator, skip_filters,
653
+ file_size, level, immortal_table);
654
+ rep->file = std::move(file);
655
+ rep->footer = footer;
656
+ rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
657
+ // We need to wrap data with internal_prefix_transform to make sure it can
658
+ // handle prefix correctly.
659
+ if (prefix_extractor != nullptr) {
660
+ rep->internal_prefix_transform.reset(
661
+ new InternalKeySliceTransform(prefix_extractor));
662
+ }
663
+ SetupCacheKeyPrefix(rep);
664
+ std::unique_ptr<BlockBasedTable> new_table(
665
+ new BlockBasedTable(rep, block_cache_tracer));
666
+
667
+ // page cache options
668
+ rep->persistent_cache_options =
669
+ PersistentCacheOptions(rep->table_options.persistent_cache,
670
+ std::string(rep->persistent_cache_key_prefix,
671
+ rep->persistent_cache_key_prefix_size),
672
+ rep->ioptions.statistics);
673
+
674
+ // Meta-blocks are not dictionary compressed. Explicitly set the dictionary
675
+ // handle to null, otherwise it may be seen as uninitialized during the below
676
+ // meta-block reads.
677
+ rep->compression_dict_handle = BlockHandle::NullBlockHandle();
678
+
679
+ // Read metaindex
680
+ std::unique_ptr<Block> metaindex;
681
+ std::unique_ptr<InternalIterator> metaindex_iter;
682
+ s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
683
+ &metaindex_iter);
684
+ if (!s.ok()) {
685
+ return s;
686
+ }
687
+
688
+ // Populates table_properties and some fields that depend on it,
689
+ // such as index_type.
690
+ s = new_table->ReadPropertiesBlock(ro, prefetch_buffer.get(),
691
+ metaindex_iter.get(), largest_seqno);
692
+ if (!s.ok()) {
693
+ return s;
694
+ }
695
+ s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
696
+ metaindex_iter.get(), internal_comparator,
697
+ &lookup_context);
698
+ if (!s.ok()) {
699
+ return s;
700
+ }
701
+ s = new_table->PrefetchIndexAndFilterBlocks(
702
+ ro, prefetch_buffer.get(), metaindex_iter.get(), new_table.get(),
703
+ prefetch_all, table_options, level, file_size,
704
+ max_file_size_for_l0_meta_pin, &lookup_context);
705
+
706
+ if (s.ok()) {
707
+ // Update tail prefetch stats
708
+ assert(prefetch_buffer.get() != nullptr);
709
+ if (tail_prefetch_stats != nullptr) {
710
+ assert(prefetch_buffer->min_offset_read() < file_size);
711
+ tail_prefetch_stats->RecordEffectiveSize(
712
+ static_cast<size_t>(file_size) - prefetch_buffer->min_offset_read());
713
+ }
714
+
715
+ *table_reader = std::move(new_table);
716
+ }
717
+
718
+ return s;
719
+ }
720
+
721
+ Status BlockBasedTable::PrefetchTail(
722
+ const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
723
+ bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
724
+ const bool prefetch_all, const bool preload_all,
725
+ std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) {
726
+ size_t tail_prefetch_size = 0;
727
+ if (tail_prefetch_stats != nullptr) {
728
+ // Multiple threads may get a 0 (no history) when running in parallel,
729
+ // but it will get cleared after the first of them finishes.
730
+ tail_prefetch_size = tail_prefetch_stats->GetSuggestedPrefetchSize();
731
+ }
732
+ if (tail_prefetch_size == 0) {
733
+ // Before read footer, readahead backwards to prefetch data. Do more
734
+ // readahead if we're going to read index/filter.
735
+ // TODO: This may incorrectly select small readahead in case partitioned
736
+ // index/filter is enabled and top-level partition pinning is enabled.
737
+ // That's because we need to issue readahead before we read the properties,
738
+ // at which point we don't yet know the index type.
739
+ tail_prefetch_size = prefetch_all || preload_all ? 512 * 1024 : 4 * 1024;
740
+ }
741
+ size_t prefetch_off;
742
+ size_t prefetch_len;
743
+ if (file_size < tail_prefetch_size) {
744
+ prefetch_off = 0;
745
+ prefetch_len = static_cast<size_t>(file_size);
746
+ } else {
747
+ prefetch_off = static_cast<size_t>(file_size - tail_prefetch_size);
748
+ prefetch_len = tail_prefetch_size;
749
+ }
750
+ TEST_SYNC_POINT_CALLBACK("BlockBasedTable::Open::TailPrefetchLen",
751
+ &tail_prefetch_size);
752
+
753
+ // Try file system prefetch
754
+ if (!file->use_direct_io() && !force_direct_prefetch) {
755
+ if (!file->Prefetch(prefetch_off, prefetch_len).IsNotSupported()) {
756
+ prefetch_buffer->reset(
757
+ new FilePrefetchBuffer(nullptr, 0, 0, false, true));
758
+ return Status::OK();
759
+ }
760
+ }
761
+
762
+ // Use `FilePrefetchBuffer`
763
+ prefetch_buffer->reset(new FilePrefetchBuffer(nullptr, 0, 0, true, true));
764
+ IOOptions opts;
765
+ Status s = PrepareIOFromReadOptions(ro, file->env(), opts);
766
+ if (s.ok()) {
767
+ s = (*prefetch_buffer)->Prefetch(opts, file, prefetch_off, prefetch_len);
768
+ }
769
+ return s;
770
+ }
771
+
772
+ Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno(
773
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
774
+ const Slice& handle_value, TableProperties** table_properties) {
775
+ assert(table_properties != nullptr);
776
+ // If this is an external SST file ingested with write_global_seqno set to
777
+ // true, then we expect the checksum mismatch because checksum was written
778
+ // by SstFileWriter, but its global seqno in the properties block may have
779
+ // been changed during ingestion. In this case, we read the properties
780
+ // block, copy it to a memory buffer, change the global seqno to its
781
+ // original value, i.e. 0, and verify the checksum again.
782
+ BlockHandle props_block_handle;
783
+ CacheAllocationPtr tmp_buf;
784
+ Status s = ReadProperties(ro, handle_value, rep_->file.get(), prefetch_buffer,
785
+ rep_->footer, rep_->ioptions, table_properties,
786
+ false /* verify_checksum */, &props_block_handle,
787
+ &tmp_buf, false /* compression_type_missing */,
788
+ nullptr /* memory_allocator */);
789
+ if (s.ok() && tmp_buf) {
790
+ const auto seqno_pos_iter =
791
+ (*table_properties)
792
+ ->properties_offsets.find(
793
+ ExternalSstFilePropertyNames::kGlobalSeqno);
794
+ size_t block_size = static_cast<size_t>(props_block_handle.size());
795
+ if (seqno_pos_iter != (*table_properties)->properties_offsets.end()) {
796
+ uint64_t global_seqno_offset = seqno_pos_iter->second;
797
+ EncodeFixed64(
798
+ tmp_buf.get() + global_seqno_offset - props_block_handle.offset(), 0);
799
+ }
800
+ s = ROCKSDB_NAMESPACE::VerifyBlockChecksum(
801
+ rep_->footer.checksum(), tmp_buf.get(), block_size,
802
+ rep_->file->file_name(), props_block_handle.offset());
803
+ }
804
+ return s;
805
+ }
806
+
807
+ Status BlockBasedTable::ReadPropertiesBlock(
808
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
809
+ InternalIterator* meta_iter, const SequenceNumber largest_seqno) {
810
+ bool found_properties_block = true;
811
+ Status s;
812
+ s = SeekToPropertiesBlock(meta_iter, &found_properties_block);
813
+
814
+ if (!s.ok()) {
815
+ ROCKS_LOG_WARN(rep_->ioptions.info_log,
816
+ "Error when seeking to properties block from file: %s",
817
+ s.ToString().c_str());
818
+ } else if (found_properties_block) {
819
+ s = meta_iter->status();
820
+ TableProperties* table_properties = nullptr;
821
+ if (s.ok()) {
822
+ s = ReadProperties(
823
+ ro, meta_iter->value(), rep_->file.get(), prefetch_buffer,
824
+ rep_->footer, rep_->ioptions, &table_properties,
825
+ true /* verify_checksum */, nullptr /* ret_block_handle */,
826
+ nullptr /* ret_block_contents */,
827
+ false /* compression_type_missing */, nullptr /* memory_allocator */);
828
+ }
829
+ IGNORE_STATUS_IF_ERROR(s);
830
+
831
+ if (s.IsCorruption()) {
832
+ s = TryReadPropertiesWithGlobalSeqno(
833
+ ro, prefetch_buffer, meta_iter->value(), &table_properties);
834
+ IGNORE_STATUS_IF_ERROR(s);
835
+ }
836
+ std::unique_ptr<TableProperties> props_guard;
837
+ if (table_properties != nullptr) {
838
+ props_guard.reset(table_properties);
839
+ }
840
+
841
+ if (!s.ok()) {
842
+ ROCKS_LOG_WARN(rep_->ioptions.info_log,
843
+ "Encountered error while reading data from properties "
844
+ "block %s",
845
+ s.ToString().c_str());
846
+ } else {
847
+ assert(table_properties != nullptr);
848
+ rep_->table_properties.reset(props_guard.release());
849
+ rep_->blocks_maybe_compressed =
850
+ rep_->table_properties->compression_name !=
851
+ CompressionTypeToString(kNoCompression);
852
+ rep_->blocks_definitely_zstd_compressed =
853
+ (rep_->table_properties->compression_name ==
854
+ CompressionTypeToString(kZSTD) ||
855
+ rep_->table_properties->compression_name ==
856
+ CompressionTypeToString(kZSTDNotFinalCompression));
857
+ }
858
+ } else {
859
+ ROCKS_LOG_ERROR(rep_->ioptions.info_log,
860
+ "Cannot find Properties block from file.");
861
+ }
862
+ #ifndef ROCKSDB_LITE
863
+ if (rep_->table_properties) {
864
+ ParseSliceTransform(rep_->table_properties->prefix_extractor_name,
865
+ &(rep_->table_prefix_extractor));
866
+ }
867
+ #endif // ROCKSDB_LITE
868
+
869
+ // Read the table properties, if provided.
870
+ if (rep_->table_properties) {
871
+ rep_->whole_key_filtering &=
872
+ IsFeatureSupported(*(rep_->table_properties),
873
+ BlockBasedTablePropertyNames::kWholeKeyFiltering,
874
+ rep_->ioptions.info_log);
875
+ rep_->prefix_filtering &=
876
+ IsFeatureSupported(*(rep_->table_properties),
877
+ BlockBasedTablePropertyNames::kPrefixFiltering,
878
+ rep_->ioptions.info_log);
879
+
880
+ rep_->index_key_includes_seq =
881
+ rep_->table_properties->index_key_is_user_key == 0;
882
+ rep_->index_value_is_full =
883
+ rep_->table_properties->index_value_is_delta_encoded == 0;
884
+
885
+ // Update index_type with the true type.
886
+ // If table properties don't contain index type, we assume that the table
887
+ // is in very old format and has kBinarySearch index type.
888
+ auto& props = rep_->table_properties->user_collected_properties;
889
+ auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
890
+ if (pos != props.end()) {
891
+ rep_->index_type = static_cast<BlockBasedTableOptions::IndexType>(
892
+ DecodeFixed32(pos->second.c_str()));
893
+ }
894
+
895
+ rep_->index_has_first_key =
896
+ rep_->index_type == BlockBasedTableOptions::kBinarySearchWithFirstKey;
897
+
898
+ s = GetGlobalSequenceNumber(*(rep_->table_properties), largest_seqno,
899
+ &(rep_->global_seqno));
900
+ if (!s.ok()) {
901
+ ROCKS_LOG_ERROR(rep_->ioptions.info_log, "%s", s.ToString().c_str());
902
+ }
903
+ }
904
+ return s;
905
+ }
906
+
907
+ Status BlockBasedTable::ReadRangeDelBlock(
908
+ const ReadOptions& read_options, FilePrefetchBuffer* prefetch_buffer,
909
+ InternalIterator* meta_iter,
910
+ const InternalKeyComparator& internal_comparator,
911
+ BlockCacheLookupContext* lookup_context) {
912
+ Status s;
913
+ bool found_range_del_block;
914
+ BlockHandle range_del_handle;
915
+ s = SeekToRangeDelBlock(meta_iter, &found_range_del_block, &range_del_handle);
916
+ if (!s.ok()) {
917
+ ROCKS_LOG_WARN(
918
+ rep_->ioptions.info_log,
919
+ "Error when seeking to range delete tombstones block from file: %s",
920
+ s.ToString().c_str());
921
+ } else if (found_range_del_block && !range_del_handle.IsNull()) {
922
+ std::unique_ptr<InternalIterator> iter(NewDataBlockIterator<DataBlockIter>(
923
+ read_options, range_del_handle,
924
+ /*input_iter=*/nullptr, BlockType::kRangeDeletion,
925
+ /*get_context=*/nullptr, lookup_context, Status(), prefetch_buffer));
926
+ assert(iter != nullptr);
927
+ s = iter->status();
928
+ if (!s.ok()) {
929
+ ROCKS_LOG_WARN(
930
+ rep_->ioptions.info_log,
931
+ "Encountered error while reading data from range del block %s",
932
+ s.ToString().c_str());
933
+ IGNORE_STATUS_IF_ERROR(s);
934
+ } else {
935
+ rep_->fragmented_range_dels =
936
+ std::make_shared<FragmentedRangeTombstoneList>(std::move(iter),
937
+ internal_comparator);
938
+ }
939
+ }
940
+ return s;
941
+ }
942
+
943
+ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
944
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
945
+ InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all,
946
+ const BlockBasedTableOptions& table_options, const int level,
947
+ size_t file_size, size_t max_file_size_for_l0_meta_pin,
948
+ BlockCacheLookupContext* lookup_context) {
949
+ Status s;
950
+
951
+ // Find filter handle and filter type
952
+ if (rep_->filter_policy) {
953
+ for (auto filter_type :
954
+ {Rep::FilterType::kFullFilter, Rep::FilterType::kPartitionedFilter,
955
+ Rep::FilterType::kBlockFilter}) {
956
+ std::string prefix;
957
+ switch (filter_type) {
958
+ case Rep::FilterType::kFullFilter:
959
+ prefix = kFullFilterBlockPrefix;
960
+ break;
961
+ case Rep::FilterType::kPartitionedFilter:
962
+ prefix = kPartitionedFilterBlockPrefix;
963
+ break;
964
+ case Rep::FilterType::kBlockFilter:
965
+ prefix = kFilterBlockPrefix;
966
+ break;
967
+ default:
968
+ assert(0);
969
+ }
970
+ std::string filter_block_key = prefix;
971
+ filter_block_key.append(rep_->filter_policy->Name());
972
+ if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle)
973
+ .ok()) {
974
+ rep_->filter_type = filter_type;
975
+ break;
976
+ }
977
+ }
978
+ }
979
+ // Partition filters cannot be enabled without partition indexes
980
+ assert(rep_->filter_type != Rep::FilterType::kPartitionedFilter ||
981
+ rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch);
982
+
983
+ // Find compression dictionary handle
984
+ bool found_compression_dict = false;
985
+ s = SeekToCompressionDictBlock(meta_iter, &found_compression_dict,
986
+ &rep_->compression_dict_handle);
987
+ if (!s.ok()) {
988
+ return s;
989
+ }
990
+
991
+ BlockBasedTableOptions::IndexType index_type = rep_->index_type;
992
+
993
+ const bool use_cache = table_options.cache_index_and_filter_blocks;
994
+
995
+ const bool maybe_flushed =
996
+ level == 0 && file_size <= max_file_size_for_l0_meta_pin;
997
+ std::function<bool(PinningTier, PinningTier)> is_pinned =
998
+ [maybe_flushed, &is_pinned](PinningTier pinning_tier,
999
+ PinningTier fallback_pinning_tier) {
1000
+ // Fallback to fallback would lead to infinite recursion. Disallow it.
1001
+ assert(fallback_pinning_tier != PinningTier::kFallback);
1002
+
1003
+ switch (pinning_tier) {
1004
+ case PinningTier::kFallback:
1005
+ return is_pinned(fallback_pinning_tier,
1006
+ PinningTier::kNone /* fallback_pinning_tier */);
1007
+ case PinningTier::kNone:
1008
+ return false;
1009
+ case PinningTier::kFlushedAndSimilar:
1010
+ return maybe_flushed;
1011
+ case PinningTier::kAll:
1012
+ return true;
1013
+ };
1014
+
1015
+ // In GCC, this is needed to suppress `control reaches end of non-void
1016
+ // function [-Werror=return-type]`.
1017
+ assert(false);
1018
+ return false;
1019
+ };
1020
+ const bool pin_top_level_index = is_pinned(
1021
+ table_options.metadata_cache_options.top_level_index_pinning,
1022
+ table_options.pin_top_level_index_and_filter ? PinningTier::kAll
1023
+ : PinningTier::kNone);
1024
+ const bool pin_partition =
1025
+ is_pinned(table_options.metadata_cache_options.partition_pinning,
1026
+ table_options.pin_l0_filter_and_index_blocks_in_cache
1027
+ ? PinningTier::kFlushedAndSimilar
1028
+ : PinningTier::kNone);
1029
+ const bool pin_unpartitioned =
1030
+ is_pinned(table_options.metadata_cache_options.unpartitioned_pinning,
1031
+ table_options.pin_l0_filter_and_index_blocks_in_cache
1032
+ ? PinningTier::kFlushedAndSimilar
1033
+ : PinningTier::kNone);
1034
+
1035
+ // pin the first level of index
1036
+ const bool pin_index =
1037
+ index_type == BlockBasedTableOptions::kTwoLevelIndexSearch
1038
+ ? pin_top_level_index
1039
+ : pin_unpartitioned;
1040
+ // prefetch the first level of index
1041
+ const bool prefetch_index = prefetch_all || pin_index;
1042
+
1043
+ std::unique_ptr<IndexReader> index_reader;
1044
+ s = new_table->CreateIndexReader(ro, prefetch_buffer, meta_iter, use_cache,
1045
+ prefetch_index, pin_index, lookup_context,
1046
+ &index_reader);
1047
+ if (!s.ok()) {
1048
+ return s;
1049
+ }
1050
+
1051
+ rep_->index_reader = std::move(index_reader);
1052
+
1053
+ // The partitions of partitioned index are always stored in cache. They
1054
+ // are hence follow the configuration for pin and prefetch regardless of
1055
+ // the value of cache_index_and_filter_blocks
1056
+ if (prefetch_all || pin_partition) {
1057
+ s = rep_->index_reader->CacheDependencies(ro, pin_partition);
1058
+ }
1059
+ if (!s.ok()) {
1060
+ return s;
1061
+ }
1062
+
1063
+ // pin the first level of filter
1064
+ const bool pin_filter =
1065
+ rep_->filter_type == Rep::FilterType::kPartitionedFilter
1066
+ ? pin_top_level_index
1067
+ : pin_unpartitioned;
1068
+ // prefetch the first level of filter
1069
+ const bool prefetch_filter = prefetch_all || pin_filter;
1070
+
1071
+ if (rep_->filter_policy) {
1072
+ auto filter = new_table->CreateFilterBlockReader(
1073
+ ro, prefetch_buffer, use_cache, prefetch_filter, pin_filter,
1074
+ lookup_context);
1075
+
1076
+ if (filter) {
1077
+ // Refer to the comment above about paritioned indexes always being cached
1078
+ if (prefetch_all || pin_partition) {
1079
+ s = filter->CacheDependencies(ro, pin_partition);
1080
+ if (!s.ok()) {
1081
+ return s;
1082
+ }
1083
+ }
1084
+ rep_->filter = std::move(filter);
1085
+ }
1086
+ }
1087
+
1088
+ if (!rep_->compression_dict_handle.IsNull()) {
1089
+ std::unique_ptr<UncompressionDictReader> uncompression_dict_reader;
1090
+ s = UncompressionDictReader::Create(
1091
+ this, ro, prefetch_buffer, use_cache, prefetch_all || pin_unpartitioned,
1092
+ pin_unpartitioned, lookup_context, &uncompression_dict_reader);
1093
+ if (!s.ok()) {
1094
+ return s;
1095
+ }
1096
+
1097
+ rep_->uncompression_dict_reader = std::move(uncompression_dict_reader);
1098
+ }
1099
+
1100
+ assert(s.ok());
1101
+ return s;
1102
+ }
1103
+
1104
+ void BlockBasedTable::SetupForCompaction() {
1105
+ switch (rep_->ioptions.access_hint_on_compaction_start) {
1106
+ case Options::NONE:
1107
+ break;
1108
+ case Options::NORMAL:
1109
+ rep_->file->file()->Hint(FSRandomAccessFile::kNormal);
1110
+ break;
1111
+ case Options::SEQUENTIAL:
1112
+ rep_->file->file()->Hint(FSRandomAccessFile::kSequential);
1113
+ break;
1114
+ case Options::WILLNEED:
1115
+ rep_->file->file()->Hint(FSRandomAccessFile::kWillNeed);
1116
+ break;
1117
+ default:
1118
+ assert(false);
1119
+ }
1120
+ }
1121
+
1122
+ std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
1123
+ const {
1124
+ return rep_->table_properties;
1125
+ }
1126
+
1127
+ size_t BlockBasedTable::ApproximateMemoryUsage() const {
1128
+ size_t usage = 0;
1129
+ if (rep_->filter) {
1130
+ usage += rep_->filter->ApproximateMemoryUsage();
1131
+ }
1132
+ if (rep_->index_reader) {
1133
+ usage += rep_->index_reader->ApproximateMemoryUsage();
1134
+ }
1135
+ if (rep_->uncompression_dict_reader) {
1136
+ usage += rep_->uncompression_dict_reader->ApproximateMemoryUsage();
1137
+ }
1138
+ return usage;
1139
+ }
1140
+
1141
+ // Load the meta-index-block from the file. On success, return the loaded
1142
+ // metaindex
1143
+ // block and its iterator.
1144
+ Status BlockBasedTable::ReadMetaIndexBlock(
1145
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
1146
+ std::unique_ptr<Block>* metaindex_block,
1147
+ std::unique_ptr<InternalIterator>* iter) {
1148
+ // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
1149
+ // it is an empty block.
1150
+ std::unique_ptr<Block> metaindex;
1151
+ Status s = ReadBlockFromFile(
1152
+ rep_->file.get(), prefetch_buffer, rep_->footer, ro,
1153
+ rep_->footer.metaindex_handle(), &metaindex, rep_->ioptions,
1154
+ true /* decompress */, true /*maybe_compressed*/, BlockType::kMetaIndex,
1155
+ UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options,
1156
+ 0 /* read_amp_bytes_per_bit */, GetMemoryAllocator(rep_->table_options),
1157
+ false /* for_compaction */, rep_->blocks_definitely_zstd_compressed,
1158
+ nullptr /* filter_policy */);
1159
+
1160
+ if (!s.ok()) {
1161
+ ROCKS_LOG_ERROR(rep_->ioptions.info_log,
1162
+ "Encountered error while reading data from properties"
1163
+ " block %s",
1164
+ s.ToString().c_str());
1165
+ return s;
1166
+ }
1167
+
1168
+ *metaindex_block = std::move(metaindex);
1169
+ // meta block uses bytewise comparator.
1170
+ iter->reset(metaindex_block->get()->NewDataIterator(
1171
+ BytewiseComparator(), kDisableGlobalSequenceNumber));
1172
+ return Status::OK();
1173
+ }
1174
+
1175
+ template <typename TBlocklike>
1176
+ Status BlockBasedTable::GetDataBlockFromCache(
1177
+ const Slice& block_cache_key, const Slice& compressed_block_cache_key,
1178
+ Cache* block_cache, Cache* block_cache_compressed,
1179
+ const ReadOptions& read_options, CachableEntry<TBlocklike>* block,
1180
+ const UncompressionDict& uncompression_dict, BlockType block_type,
1181
+ GetContext* get_context) const {
1182
+ const size_t read_amp_bytes_per_bit =
1183
+ block_type == BlockType::kData
1184
+ ? rep_->table_options.read_amp_bytes_per_bit
1185
+ : 0;
1186
+ assert(block);
1187
+ assert(block->IsEmpty());
1188
+
1189
+ Status s;
1190
+ BlockContents* compressed_block = nullptr;
1191
+ Cache::Handle* block_cache_compressed_handle = nullptr;
1192
+
1193
+ // Lookup uncompressed cache first
1194
+ if (block_cache != nullptr) {
1195
+ auto cache_handle = GetEntryFromCache(block_cache, block_cache_key,
1196
+ block_type, get_context);
1197
+ if (cache_handle != nullptr) {
1198
+ block->SetCachedValue(
1199
+ reinterpret_cast<TBlocklike*>(block_cache->Value(cache_handle)),
1200
+ block_cache, cache_handle);
1201
+ return s;
1202
+ }
1203
+ }
1204
+
1205
+ // If not found, search from the compressed block cache.
1206
+ assert(block->IsEmpty());
1207
+
1208
+ if (block_cache_compressed == nullptr) {
1209
+ return s;
1210
+ }
1211
+
1212
+ assert(!compressed_block_cache_key.empty());
1213
+ block_cache_compressed_handle =
1214
+ block_cache_compressed->Lookup(compressed_block_cache_key);
1215
+
1216
+ Statistics* statistics = rep_->ioptions.statistics;
1217
+
1218
+ // if we found in the compressed cache, then uncompress and insert into
1219
+ // uncompressed cache
1220
+ if (block_cache_compressed_handle == nullptr) {
1221
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSED_MISS);
1222
+ return s;
1223
+ }
1224
+
1225
+ // found compressed block
1226
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT);
1227
+ compressed_block = reinterpret_cast<BlockContents*>(
1228
+ block_cache_compressed->Value(block_cache_compressed_handle));
1229
+ CompressionType compression_type = compressed_block->get_compression_type();
1230
+ assert(compression_type != kNoCompression);
1231
+
1232
+ // Retrieve the uncompressed contents into a new buffer
1233
+ BlockContents contents;
1234
+ UncompressionContext context(compression_type);
1235
+ UncompressionInfo info(context, uncompression_dict, compression_type);
1236
+ s = UncompressBlockContents(
1237
+ info, compressed_block->data.data(), compressed_block->data.size(),
1238
+ &contents, rep_->table_options.format_version, rep_->ioptions,
1239
+ GetMemoryAllocator(rep_->table_options));
1240
+
1241
+ // Insert uncompressed block into block cache
1242
+ if (s.ok()) {
1243
+ std::unique_ptr<TBlocklike> block_holder(
1244
+ BlocklikeTraits<TBlocklike>::Create(
1245
+ std::move(contents), read_amp_bytes_per_bit, statistics,
1246
+ rep_->blocks_definitely_zstd_compressed,
1247
+ rep_->table_options.filter_policy.get())); // uncompressed block
1248
+
1249
+ if (block_cache != nullptr && block_holder->own_bytes() &&
1250
+ read_options.fill_cache) {
1251
+ size_t charge = block_holder->ApproximateMemoryUsage();
1252
+ Cache::Handle* cache_handle = nullptr;
1253
+ s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
1254
+ &DeleteCachedEntry<TBlocklike>, &cache_handle);
1255
+ if (s.ok()) {
1256
+ assert(cache_handle != nullptr);
1257
+ block->SetCachedValue(block_holder.release(), block_cache,
1258
+ cache_handle);
1259
+
1260
+ UpdateCacheInsertionMetrics(block_type, get_context, charge,
1261
+ s.IsOkOverwritten());
1262
+ } else {
1263
+ RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
1264
+ }
1265
+ } else {
1266
+ block->SetOwnedValue(block_holder.release());
1267
+ }
1268
+ }
1269
+
1270
+ // Release hold on compressed cache entry
1271
+ block_cache_compressed->Release(block_cache_compressed_handle);
1272
+ return s;
1273
+ }
1274
+
1275
+ template <typename TBlocklike>
1276
+ Status BlockBasedTable::PutDataBlockToCache(
1277
+ const Slice& block_cache_key, const Slice& compressed_block_cache_key,
1278
+ Cache* block_cache, Cache* block_cache_compressed,
1279
+ CachableEntry<TBlocklike>* cached_block, BlockContents* raw_block_contents,
1280
+ CompressionType raw_block_comp_type,
1281
+ const UncompressionDict& uncompression_dict,
1282
+ MemoryAllocator* memory_allocator, BlockType block_type,
1283
+ GetContext* get_context) const {
1284
+ const ImmutableCFOptions& ioptions = rep_->ioptions;
1285
+ const uint32_t format_version = rep_->table_options.format_version;
1286
+ const size_t read_amp_bytes_per_bit =
1287
+ block_type == BlockType::kData
1288
+ ? rep_->table_options.read_amp_bytes_per_bit
1289
+ : 0;
1290
+ const Cache::Priority priority =
1291
+ rep_->table_options.cache_index_and_filter_blocks_with_high_priority &&
1292
+ (block_type == BlockType::kFilter ||
1293
+ block_type == BlockType::kCompressionDictionary ||
1294
+ block_type == BlockType::kIndex)
1295
+ ? Cache::Priority::HIGH
1296
+ : Cache::Priority::LOW;
1297
+ assert(cached_block);
1298
+ assert(cached_block->IsEmpty());
1299
+
1300
+ Status s;
1301
+ Statistics* statistics = ioptions.statistics;
1302
+
1303
+ std::unique_ptr<TBlocklike> block_holder;
1304
+ if (raw_block_comp_type != kNoCompression) {
1305
+ // Retrieve the uncompressed contents into a new buffer
1306
+ BlockContents uncompressed_block_contents;
1307
+ UncompressionContext context(raw_block_comp_type);
1308
+ UncompressionInfo info(context, uncompression_dict, raw_block_comp_type);
1309
+ s = UncompressBlockContents(info, raw_block_contents->data.data(),
1310
+ raw_block_contents->data.size(),
1311
+ &uncompressed_block_contents, format_version,
1312
+ ioptions, memory_allocator);
1313
+ if (!s.ok()) {
1314
+ return s;
1315
+ }
1316
+
1317
+ block_holder.reset(BlocklikeTraits<TBlocklike>::Create(
1318
+ std::move(uncompressed_block_contents), read_amp_bytes_per_bit,
1319
+ statistics, rep_->blocks_definitely_zstd_compressed,
1320
+ rep_->table_options.filter_policy.get()));
1321
+ } else {
1322
+ block_holder.reset(BlocklikeTraits<TBlocklike>::Create(
1323
+ std::move(*raw_block_contents), read_amp_bytes_per_bit, statistics,
1324
+ rep_->blocks_definitely_zstd_compressed,
1325
+ rep_->table_options.filter_policy.get()));
1326
+ }
1327
+
1328
+ // Insert compressed block into compressed block cache.
1329
+ // Release the hold on the compressed cache entry immediately.
1330
+ if (block_cache_compressed != nullptr &&
1331
+ raw_block_comp_type != kNoCompression && raw_block_contents != nullptr &&
1332
+ raw_block_contents->own_bytes()) {
1333
+ #ifndef NDEBUG
1334
+ assert(raw_block_contents->is_raw_block);
1335
+ #endif // NDEBUG
1336
+
1337
+ // We cannot directly put raw_block_contents because this could point to
1338
+ // an object in the stack.
1339
+ BlockContents* block_cont_for_comp_cache =
1340
+ new BlockContents(std::move(*raw_block_contents));
1341
+ s = block_cache_compressed->Insert(
1342
+ compressed_block_cache_key, block_cont_for_comp_cache,
1343
+ block_cont_for_comp_cache->ApproximateMemoryUsage(),
1344
+ &DeleteCachedEntry<BlockContents>);
1345
+ if (s.ok()) {
1346
+ // Avoid the following code to delete this cached block.
1347
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD);
1348
+ } else {
1349
+ RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
1350
+ delete block_cont_for_comp_cache;
1351
+ }
1352
+ }
1353
+
1354
+ // insert into uncompressed block cache
1355
+ if (block_cache != nullptr && block_holder->own_bytes()) {
1356
+ size_t charge = block_holder->ApproximateMemoryUsage();
1357
+ Cache::Handle* cache_handle = nullptr;
1358
+ s = block_cache->Insert(block_cache_key, block_holder.get(), charge,
1359
+ &DeleteCachedEntry<TBlocklike>, &cache_handle,
1360
+ priority);
1361
+ if (s.ok()) {
1362
+ assert(cache_handle != nullptr);
1363
+ cached_block->SetCachedValue(block_holder.release(), block_cache,
1364
+ cache_handle);
1365
+
1366
+ UpdateCacheInsertionMetrics(block_type, get_context, charge,
1367
+ s.IsOkOverwritten());
1368
+ } else {
1369
+ RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
1370
+ }
1371
+ } else {
1372
+ cached_block->SetOwnedValue(block_holder.release());
1373
+ }
1374
+
1375
+ return s;
1376
+ }
1377
+
1378
+ std::unique_ptr<FilterBlockReader> BlockBasedTable::CreateFilterBlockReader(
1379
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, bool use_cache,
1380
+ bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) {
1381
+ auto& rep = rep_;
1382
+ auto filter_type = rep->filter_type;
1383
+ if (filter_type == Rep::FilterType::kNoFilter) {
1384
+ return std::unique_ptr<FilterBlockReader>();
1385
+ }
1386
+
1387
+ assert(rep->filter_policy);
1388
+
1389
+ switch (filter_type) {
1390
+ case Rep::FilterType::kPartitionedFilter:
1391
+ return PartitionedFilterBlockReader::Create(
1392
+ this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
1393
+
1394
+ case Rep::FilterType::kBlockFilter:
1395
+ return BlockBasedFilterBlockReader::Create(
1396
+ this, ro, prefetch_buffer, use_cache, prefetch, pin, lookup_context);
1397
+
1398
+ case Rep::FilterType::kFullFilter:
1399
+ return FullFilterBlockReader::Create(this, ro, prefetch_buffer, use_cache,
1400
+ prefetch, pin, lookup_context);
1401
+
1402
+ default:
1403
+ // filter_type is either kNoFilter (exited the function at the first if),
1404
+ // or it must be covered in this switch block
1405
+ assert(false);
1406
+ return std::unique_ptr<FilterBlockReader>();
1407
+ }
1408
+ }
1409
+
1410
+ // disable_prefix_seek should be set to true when prefix_extractor found in SST
1411
+ // differs from the one in mutable_cf_options and index type is HashBasedIndex
1412
+ InternalIteratorBase<IndexValue>* BlockBasedTable::NewIndexIterator(
1413
+ const ReadOptions& read_options, bool disable_prefix_seek,
1414
+ IndexBlockIter* input_iter, GetContext* get_context,
1415
+ BlockCacheLookupContext* lookup_context) const {
1416
+ assert(rep_ != nullptr);
1417
+ assert(rep_->index_reader != nullptr);
1418
+
1419
+ // We don't return pinned data from index blocks, so no need
1420
+ // to set `block_contents_pinned`.
1421
+ return rep_->index_reader->NewIterator(read_options, disable_prefix_seek,
1422
+ input_iter, get_context,
1423
+ lookup_context);
1424
+ }
1425
+
1426
+ template <>
1427
+ DataBlockIter* BlockBasedTable::InitBlockIterator<DataBlockIter>(
1428
+ const Rep* rep, Block* block, BlockType block_type,
1429
+ DataBlockIter* input_iter, bool block_contents_pinned) {
1430
+ return block->NewDataIterator(rep->internal_comparator.user_comparator(),
1431
+ rep->get_global_seqno(block_type), input_iter,
1432
+ rep->ioptions.statistics,
1433
+ block_contents_pinned);
1434
+ }
1435
+
1436
+ template <>
1437
+ IndexBlockIter* BlockBasedTable::InitBlockIterator<IndexBlockIter>(
1438
+ const Rep* rep, Block* block, BlockType block_type,
1439
+ IndexBlockIter* input_iter, bool block_contents_pinned) {
1440
+ return block->NewIndexIterator(
1441
+ rep->internal_comparator.user_comparator(),
1442
+ rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics,
1443
+ /* total_order_seek */ true, rep->index_has_first_key,
1444
+ rep->index_key_includes_seq, rep->index_value_is_full,
1445
+ block_contents_pinned);
1446
+ }
1447
+
1448
+ // If contents is nullptr, this function looks up the block caches for the
1449
+ // data block referenced by handle, and read the block from disk if necessary.
1450
+ // If contents is non-null, it skips the cache lookup and disk read, since
1451
+ // the caller has already read it. In both cases, if ro.fill_cache is true,
1452
+ // it inserts the block into the block cache.
1453
+ template <typename TBlocklike>
1454
+ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
1455
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1456
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1457
+ CachableEntry<TBlocklike>* block_entry, BlockType block_type,
1458
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
1459
+ BlockContents* contents) const {
1460
+ assert(block_entry != nullptr);
1461
+ const bool no_io = (ro.read_tier == kBlockCacheTier);
1462
+ Cache* block_cache = rep_->table_options.block_cache.get();
1463
+ Cache* block_cache_compressed =
1464
+ rep_->table_options.block_cache_compressed.get();
1465
+
1466
+ // First, try to get the block from the cache
1467
+ //
1468
+ // If either block cache is enabled, we'll try to read from it.
1469
+ Status s;
1470
+ char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
1471
+ char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
1472
+ Slice key /* key to the block cache */;
1473
+ Slice ckey /* key to the compressed block cache */;
1474
+ bool is_cache_hit = false;
1475
+ if (block_cache != nullptr || block_cache_compressed != nullptr) {
1476
+ // create key for block cache
1477
+ if (block_cache != nullptr) {
1478
+ key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
1479
+ handle, cache_key);
1480
+ }
1481
+
1482
+ if (block_cache_compressed != nullptr) {
1483
+ ckey = GetCacheKey(rep_->compressed_cache_key_prefix,
1484
+ rep_->compressed_cache_key_prefix_size, handle,
1485
+ compressed_cache_key);
1486
+ }
1487
+
1488
+ if (!contents) {
1489
+ s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
1490
+ ro, block_entry, uncompression_dict, block_type,
1491
+ get_context);
1492
+ if (block_entry->GetValue()) {
1493
+ // TODO(haoyu): Differentiate cache hit on uncompressed block cache and
1494
+ // compressed block cache.
1495
+ is_cache_hit = true;
1496
+ }
1497
+ }
1498
+
1499
+ // Can't find the block from the cache. If I/O is allowed, read from the
1500
+ // file.
1501
+ if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) {
1502
+ Statistics* statistics = rep_->ioptions.statistics;
1503
+ const bool maybe_compressed =
1504
+ block_type != BlockType::kFilter &&
1505
+ block_type != BlockType::kCompressionDictionary &&
1506
+ rep_->blocks_maybe_compressed;
1507
+ const bool do_uncompress = maybe_compressed && !block_cache_compressed;
1508
+ CompressionType raw_block_comp_type;
1509
+ BlockContents raw_block_contents;
1510
+ if (!contents) {
1511
+ StopWatch sw(rep_->ioptions.env, statistics, READ_BLOCK_GET_MICROS);
1512
+ BlockFetcher block_fetcher(
1513
+ rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
1514
+ &raw_block_contents, rep_->ioptions, do_uncompress,
1515
+ maybe_compressed, block_type, uncompression_dict,
1516
+ rep_->persistent_cache_options,
1517
+ GetMemoryAllocator(rep_->table_options),
1518
+ GetMemoryAllocatorForCompressedBlock(rep_->table_options));
1519
+ s = block_fetcher.ReadBlockContents();
1520
+ raw_block_comp_type = block_fetcher.get_compression_type();
1521
+ contents = &raw_block_contents;
1522
+ if (get_context) {
1523
+ switch (block_type) {
1524
+ case BlockType::kIndex:
1525
+ ++get_context->get_context_stats_.num_index_read;
1526
+ break;
1527
+ case BlockType::kFilter:
1528
+ ++get_context->get_context_stats_.num_filter_read;
1529
+ break;
1530
+ case BlockType::kData:
1531
+ ++get_context->get_context_stats_.num_data_read;
1532
+ break;
1533
+ default:
1534
+ break;
1535
+ }
1536
+ }
1537
+ } else {
1538
+ raw_block_comp_type = contents->get_compression_type();
1539
+ }
1540
+
1541
+ if (s.ok()) {
1542
+ // If filling cache is allowed and a cache is configured, try to put the
1543
+ // block to the cache.
1544
+ s = PutDataBlockToCache(
1545
+ key, ckey, block_cache, block_cache_compressed, block_entry,
1546
+ contents, raw_block_comp_type, uncompression_dict,
1547
+ GetMemoryAllocator(rep_->table_options), block_type, get_context);
1548
+ }
1549
+ }
1550
+ }
1551
+
1552
+ // Fill lookup_context.
1553
+ if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled() &&
1554
+ lookup_context) {
1555
+ size_t usage = 0;
1556
+ uint64_t nkeys = 0;
1557
+ if (block_entry->GetValue()) {
1558
+ // Approximate the number of keys in the block using restarts.
1559
+ nkeys =
1560
+ rep_->table_options.block_restart_interval *
1561
+ BlocklikeTraits<TBlocklike>::GetNumRestarts(*block_entry->GetValue());
1562
+ usage = block_entry->GetValue()->ApproximateMemoryUsage();
1563
+ }
1564
+ TraceType trace_block_type = TraceType::kTraceMax;
1565
+ switch (block_type) {
1566
+ case BlockType::kData:
1567
+ trace_block_type = TraceType::kBlockTraceDataBlock;
1568
+ break;
1569
+ case BlockType::kFilter:
1570
+ trace_block_type = TraceType::kBlockTraceFilterBlock;
1571
+ break;
1572
+ case BlockType::kCompressionDictionary:
1573
+ trace_block_type = TraceType::kBlockTraceUncompressionDictBlock;
1574
+ break;
1575
+ case BlockType::kRangeDeletion:
1576
+ trace_block_type = TraceType::kBlockTraceRangeDeletionBlock;
1577
+ break;
1578
+ case BlockType::kIndex:
1579
+ trace_block_type = TraceType::kBlockTraceIndexBlock;
1580
+ break;
1581
+ default:
1582
+ // This cannot happen.
1583
+ assert(false);
1584
+ break;
1585
+ }
1586
+ bool no_insert = no_io || !ro.fill_cache;
1587
+ if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(
1588
+ trace_block_type, lookup_context->caller)) {
1589
+ // Defer logging the access to Get() and MultiGet() to trace additional
1590
+ // information, e.g., referenced_key_exist_in_block.
1591
+
1592
+ // Make a copy of the block key here since it will be logged later.
1593
+ lookup_context->FillLookupContext(
1594
+ is_cache_hit, no_insert, trace_block_type,
1595
+ /*block_size=*/usage, /*block_key=*/key.ToString(), nkeys);
1596
+ } else {
1597
+ // Avoid making copy of block_key and cf_name when constructing the access
1598
+ // record.
1599
+ BlockCacheTraceRecord access_record(
1600
+ rep_->ioptions.env->NowMicros(),
1601
+ /*block_key=*/"", trace_block_type,
1602
+ /*block_size=*/usage, rep_->cf_id_for_tracing(),
1603
+ /*cf_name=*/"", rep_->level_for_tracing(),
1604
+ rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit,
1605
+ no_insert, lookup_context->get_id,
1606
+ lookup_context->get_from_user_specified_snapshot,
1607
+ /*referenced_key=*/"");
1608
+ // TODO: Should handle this error?
1609
+ block_cache_tracer_
1610
+ ->WriteBlockAccess(access_record, key, rep_->cf_name_for_tracing(),
1611
+ lookup_context->referenced_key)
1612
+ .PermitUncheckedError();
1613
+ }
1614
+ }
1615
+
1616
+ assert(s.ok() || block_entry->GetValue() == nullptr);
1617
+ return s;
1618
+ }
1619
+
1620
+ // This function reads multiple data blocks from disk using Env::MultiRead()
1621
+ // and optionally inserts them into the block cache. It uses the scratch
1622
+ // buffer provided by the caller, which is contiguous. If scratch is a nullptr
1623
+ // it allocates a separate buffer for each block. Typically, if the blocks
1624
+ // need to be uncompressed and there is no compressed block cache, callers
1625
+ // can allocate a temporary scratch buffer in order to minimize memory
1626
+ // allocations.
1627
+ // If options.fill_cache is true, it inserts the blocks into cache. If its
1628
+ // false and scratch is non-null and the blocks are uncompressed, it copies
1629
+ // the buffers to heap. In any case, the CachableEntry<Block> returned will
1630
+ // own the data bytes.
1631
+ // If compression is enabled and also there is no compressed block cache,
1632
+ // the adjacent blocks are read out in one IO (combined read)
1633
+ // batch - A MultiGetRange with only those keys with unique data blocks not
1634
+ // found in cache
1635
+ // handles - A vector of block handles. Some of them me be NULL handles
1636
+ // scratch - An optional contiguous buffer to read compressed blocks into
1637
+ void BlockBasedTable::RetrieveMultipleBlocks(
1638
+ const ReadOptions& options, const MultiGetRange* batch,
1639
+ const autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE>* handles,
1640
+ autovector<Status, MultiGetContext::MAX_BATCH_SIZE>* statuses,
1641
+ autovector<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE>* results,
1642
+ char* scratch, const UncompressionDict& uncompression_dict) const {
1643
+ RandomAccessFileReader* file = rep_->file.get();
1644
+ const Footer& footer = rep_->footer;
1645
+ const ImmutableCFOptions& ioptions = rep_->ioptions;
1646
+ size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
1647
+ MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
1648
+
1649
+ if (ioptions.allow_mmap_reads) {
1650
+ size_t idx_in_batch = 0;
1651
+ for (auto mget_iter = batch->begin(); mget_iter != batch->end();
1652
+ ++mget_iter, ++idx_in_batch) {
1653
+ BlockCacheLookupContext lookup_data_block_context(
1654
+ TableReaderCaller::kUserMultiGet);
1655
+ const BlockHandle& handle = (*handles)[idx_in_batch];
1656
+ if (handle.IsNull()) {
1657
+ continue;
1658
+ }
1659
+
1660
+ (*statuses)[idx_in_batch] =
1661
+ RetrieveBlock(nullptr, options, handle, uncompression_dict,
1662
+ &(*results)[idx_in_batch], BlockType::kData,
1663
+ mget_iter->get_context, &lookup_data_block_context,
1664
+ /* for_compaction */ false, /* use_cache */ true);
1665
+ }
1666
+ return;
1667
+ }
1668
+
1669
+ // In direct IO mode, blocks share the direct io buffer.
1670
+ // Otherwise, blocks share the scratch buffer.
1671
+ const bool use_shared_buffer = file->use_direct_io() || scratch != nullptr;
1672
+
1673
+ autovector<FSReadRequest, MultiGetContext::MAX_BATCH_SIZE> read_reqs;
1674
+ size_t buf_offset = 0;
1675
+ size_t idx_in_batch = 0;
1676
+
1677
+ uint64_t prev_offset = 0;
1678
+ size_t prev_len = 0;
1679
+ autovector<size_t, MultiGetContext::MAX_BATCH_SIZE> req_idx_for_block;
1680
+ autovector<size_t, MultiGetContext::MAX_BATCH_SIZE> req_offset_for_block;
1681
+ for (auto mget_iter = batch->begin(); mget_iter != batch->end();
1682
+ ++mget_iter, ++idx_in_batch) {
1683
+ const BlockHandle& handle = (*handles)[idx_in_batch];
1684
+ if (handle.IsNull()) {
1685
+ continue;
1686
+ }
1687
+
1688
+ size_t prev_end = static_cast<size_t>(prev_offset) + prev_len;
1689
+
1690
+ // If current block is adjacent to the previous one, at the same time,
1691
+ // compression is enabled and there is no compressed cache, we combine
1692
+ // the two block read as one.
1693
+ // We don't combine block reads here in direct IO mode, because when doing
1694
+ // direct IO read, the block requests will be realigned and merged when
1695
+ // necessary.
1696
+ if (use_shared_buffer && !file->use_direct_io() &&
1697
+ prev_end == handle.offset()) {
1698
+ req_offset_for_block.emplace_back(prev_len);
1699
+ prev_len += block_size(handle);
1700
+ } else {
1701
+ // No compression or current block and previous one is not adjacent:
1702
+ // Step 1, create a new request for previous blocks
1703
+ if (prev_len != 0) {
1704
+ FSReadRequest req;
1705
+ req.offset = prev_offset;
1706
+ req.len = prev_len;
1707
+ if (file->use_direct_io()) {
1708
+ req.scratch = nullptr;
1709
+ } else if (use_shared_buffer) {
1710
+ req.scratch = scratch + buf_offset;
1711
+ buf_offset += req.len;
1712
+ } else {
1713
+ req.scratch = new char[req.len];
1714
+ }
1715
+ read_reqs.emplace_back(req);
1716
+ }
1717
+
1718
+ // Step 2, remeber the previous block info
1719
+ prev_offset = handle.offset();
1720
+ prev_len = block_size(handle);
1721
+ req_offset_for_block.emplace_back(0);
1722
+ }
1723
+ req_idx_for_block.emplace_back(read_reqs.size());
1724
+ }
1725
+ // Handle the last block and process the pending last request
1726
+ if (prev_len != 0) {
1727
+ FSReadRequest req;
1728
+ req.offset = prev_offset;
1729
+ req.len = prev_len;
1730
+ if (file->use_direct_io()) {
1731
+ req.scratch = nullptr;
1732
+ } else if (use_shared_buffer) {
1733
+ req.scratch = scratch + buf_offset;
1734
+ } else {
1735
+ req.scratch = new char[req.len];
1736
+ }
1737
+ read_reqs.emplace_back(req);
1738
+ }
1739
+
1740
+ AlignedBuf direct_io_buf;
1741
+ {
1742
+ IOOptions opts;
1743
+ IOStatus s = PrepareIOFromReadOptions(options, file->env(), opts);
1744
+ if (s.IsTimedOut()) {
1745
+ for (FSReadRequest& req : read_reqs) {
1746
+ req.status = s;
1747
+ }
1748
+ } else {
1749
+ // How to handle this status code?
1750
+ file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf)
1751
+ .PermitUncheckedError();
1752
+ }
1753
+ }
1754
+
1755
+ idx_in_batch = 0;
1756
+ size_t valid_batch_idx = 0;
1757
+ for (auto mget_iter = batch->begin(); mget_iter != batch->end();
1758
+ ++mget_iter, ++idx_in_batch) {
1759
+ const BlockHandle& handle = (*handles)[idx_in_batch];
1760
+
1761
+ if (handle.IsNull()) {
1762
+ continue;
1763
+ }
1764
+
1765
+ assert(valid_batch_idx < req_idx_for_block.size());
1766
+ assert(valid_batch_idx < req_offset_for_block.size());
1767
+ assert(req_idx_for_block[valid_batch_idx] < read_reqs.size());
1768
+ size_t& req_idx = req_idx_for_block[valid_batch_idx];
1769
+ size_t& req_offset = req_offset_for_block[valid_batch_idx];
1770
+ valid_batch_idx++;
1771
+ if (mget_iter->get_context) {
1772
+ ++(mget_iter->get_context->get_context_stats_.num_data_read);
1773
+ }
1774
+ FSReadRequest& req = read_reqs[req_idx];
1775
+ Status s = req.status;
1776
+ if (s.ok()) {
1777
+ if ((req.result.size() != req.len) ||
1778
+ (req_offset + block_size(handle) > req.result.size())) {
1779
+ s = Status::Corruption(
1780
+ "truncated block read from " + rep_->file->file_name() +
1781
+ " offset " + ToString(handle.offset()) + ", expected " +
1782
+ ToString(req.len) + " bytes, got " + ToString(req.result.size()));
1783
+ }
1784
+ }
1785
+
1786
+ BlockContents raw_block_contents;
1787
+ if (s.ok()) {
1788
+ if (!use_shared_buffer) {
1789
+ // We allocated a buffer for this block. Give ownership of it to
1790
+ // BlockContents so it can free the memory
1791
+ assert(req.result.data() == req.scratch);
1792
+ assert(req.result.size() == block_size(handle));
1793
+ assert(req_offset == 0);
1794
+ std::unique_ptr<char[]> raw_block(req.scratch);
1795
+ raw_block_contents = BlockContents(std::move(raw_block), handle.size());
1796
+ } else {
1797
+ // We used the scratch buffer or direct io buffer
1798
+ // which are shared by the blocks.
1799
+ // raw_block_contents does not have the ownership.
1800
+ raw_block_contents =
1801
+ BlockContents(Slice(req.result.data() + req_offset, handle.size()));
1802
+ }
1803
+ #ifndef NDEBUG
1804
+ raw_block_contents.is_raw_block = true;
1805
+ #endif
1806
+
1807
+ if (options.verify_checksums) {
1808
+ PERF_TIMER_GUARD(block_checksum_time);
1809
+ const char* data = req.result.data();
1810
+ // Since the scratch might be shared, the offset of the data block in
1811
+ // the buffer might not be 0. req.result.data() only point to the
1812
+ // begin address of each read request, we need to add the offset
1813
+ // in each read request. Checksum is stored in the block trailer,
1814
+ // beyond the payload size.
1815
+ s = ROCKSDB_NAMESPACE::VerifyBlockChecksum(
1816
+ footer.checksum(), data + req_offset, handle.size(),
1817
+ rep_->file->file_name(), handle.offset());
1818
+ TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
1819
+ }
1820
+ } else if (!use_shared_buffer) {
1821
+ // Free the allocated scratch buffer.
1822
+ delete[] req.scratch;
1823
+ }
1824
+
1825
+ if (s.ok()) {
1826
+ // When the blocks share the same underlying buffer (scratch or direct io
1827
+ // buffer), we may need to manually copy the block into heap if the raw
1828
+ // block has to be inserted into a cache. That falls into th following
1829
+ // cases -
1830
+ // 1. Raw block is not compressed, it needs to be inserted into the
1831
+ // uncompressed block cache if there is one
1832
+ // 2. If the raw block is compressed, it needs to be inserted into the
1833
+ // compressed block cache if there is one
1834
+ //
1835
+ // In all other cases, the raw block is either uncompressed into a heap
1836
+ // buffer or there is no cache at all.
1837
+ CompressionType compression_type =
1838
+ raw_block_contents.get_compression_type();
1839
+ if (use_shared_buffer && (compression_type == kNoCompression ||
1840
+ (compression_type != kNoCompression &&
1841
+ rep_->table_options.block_cache_compressed))) {
1842
+ Slice raw = Slice(req.result.data() + req_offset, block_size(handle));
1843
+ raw_block_contents = BlockContents(
1844
+ CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), raw),
1845
+ handle.size());
1846
+ #ifndef NDEBUG
1847
+ raw_block_contents.is_raw_block = true;
1848
+ #endif
1849
+ }
1850
+ }
1851
+
1852
+ if (s.ok()) {
1853
+ if (options.fill_cache) {
1854
+ BlockCacheLookupContext lookup_data_block_context(
1855
+ TableReaderCaller::kUserMultiGet);
1856
+ CachableEntry<Block>* block_entry = &(*results)[idx_in_batch];
1857
+ // MaybeReadBlockAndLoadToCache will insert into the block caches if
1858
+ // necessary. Since we're passing the raw block contents, it will
1859
+ // avoid looking up the block cache
1860
+ s = MaybeReadBlockAndLoadToCache(
1861
+ nullptr, options, handle, uncompression_dict, block_entry,
1862
+ BlockType::kData, mget_iter->get_context,
1863
+ &lookup_data_block_context, &raw_block_contents);
1864
+
1865
+ // block_entry value could be null if no block cache is present, i.e
1866
+ // BlockBasedTableOptions::no_block_cache is true and no compressed
1867
+ // block cache is configured. In that case, fall
1868
+ // through and set up the block explicitly
1869
+ if (block_entry->GetValue() != nullptr) {
1870
+ s.PermitUncheckedError();
1871
+ continue;
1872
+ }
1873
+ }
1874
+
1875
+ CompressionType compression_type =
1876
+ raw_block_contents.get_compression_type();
1877
+ BlockContents contents;
1878
+ if (compression_type != kNoCompression) {
1879
+ UncompressionContext context(compression_type);
1880
+ UncompressionInfo info(context, uncompression_dict, compression_type);
1881
+ s = UncompressBlockContents(info, req.result.data() + req_offset,
1882
+ handle.size(), &contents, footer.version(),
1883
+ rep_->ioptions, memory_allocator);
1884
+ } else {
1885
+ // There are two cases here:
1886
+ // 1) caller uses the shared buffer (scratch or direct io buffer);
1887
+ // 2) we use the requst buffer.
1888
+ // If scratch buffer or direct io buffer is used, we ensure that
1889
+ // all raw blocks are copyed to the heap as single blocks. If scratch
1890
+ // buffer is not used, we also have no combined read, so the raw
1891
+ // block can be used directly.
1892
+ contents = std::move(raw_block_contents);
1893
+ }
1894
+ if (s.ok()) {
1895
+ (*results)[idx_in_batch].SetOwnedValue(new Block(
1896
+ std::move(contents), read_amp_bytes_per_bit, ioptions.statistics));
1897
+ }
1898
+ }
1899
+ (*statuses)[idx_in_batch] = s;
1900
+ }
1901
+ }
1902
+
1903
+ template <typename TBlocklike>
1904
+ Status BlockBasedTable::RetrieveBlock(
1905
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1906
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1907
+ CachableEntry<TBlocklike>* block_entry, BlockType block_type,
1908
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
1909
+ bool for_compaction, bool use_cache) const {
1910
+ assert(block_entry);
1911
+ assert(block_entry->IsEmpty());
1912
+
1913
+ Status s;
1914
+ if (use_cache) {
1915
+ s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle,
1916
+ uncompression_dict, block_entry,
1917
+ block_type, get_context, lookup_context,
1918
+ /*contents=*/nullptr);
1919
+
1920
+ if (!s.ok()) {
1921
+ return s;
1922
+ }
1923
+
1924
+ if (block_entry->GetValue() != nullptr) {
1925
+ assert(s.ok());
1926
+ return s;
1927
+ }
1928
+ }
1929
+
1930
+ assert(block_entry->IsEmpty());
1931
+
1932
+ const bool no_io = ro.read_tier == kBlockCacheTier;
1933
+ if (no_io) {
1934
+ return Status::Incomplete("no blocking io");
1935
+ }
1936
+
1937
+ const bool maybe_compressed =
1938
+ block_type != BlockType::kFilter &&
1939
+ block_type != BlockType::kCompressionDictionary &&
1940
+ rep_->blocks_maybe_compressed;
1941
+ const bool do_uncompress = maybe_compressed;
1942
+ std::unique_ptr<TBlocklike> block;
1943
+
1944
+ {
1945
+ StopWatch sw(rep_->ioptions.env, rep_->ioptions.statistics,
1946
+ READ_BLOCK_GET_MICROS);
1947
+ s = ReadBlockFromFile(
1948
+ rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block,
1949
+ rep_->ioptions, do_uncompress, maybe_compressed, block_type,
1950
+ uncompression_dict, rep_->persistent_cache_options,
1951
+ block_type == BlockType::kData
1952
+ ? rep_->table_options.read_amp_bytes_per_bit
1953
+ : 0,
1954
+ GetMemoryAllocator(rep_->table_options), for_compaction,
1955
+ rep_->blocks_definitely_zstd_compressed,
1956
+ rep_->table_options.filter_policy.get());
1957
+
1958
+ if (get_context) {
1959
+ switch (block_type) {
1960
+ case BlockType::kIndex:
1961
+ ++(get_context->get_context_stats_.num_index_read);
1962
+ break;
1963
+ case BlockType::kFilter:
1964
+ ++(get_context->get_context_stats_.num_filter_read);
1965
+ break;
1966
+ case BlockType::kData:
1967
+ ++(get_context->get_context_stats_.num_data_read);
1968
+ break;
1969
+ default:
1970
+ break;
1971
+ }
1972
+ }
1973
+ }
1974
+
1975
+ if (!s.ok()) {
1976
+ return s;
1977
+ }
1978
+
1979
+ block_entry->SetOwnedValue(block.release());
1980
+
1981
+ assert(s.ok());
1982
+ return s;
1983
+ }
1984
+
1985
+ // Explicitly instantiate templates for both "blocklike" types we use.
1986
+ // This makes it possible to keep the template definitions in the .cc file.
1987
+ template Status BlockBasedTable::RetrieveBlock<BlockContents>(
1988
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1989
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1990
+ CachableEntry<BlockContents>* block_entry, BlockType block_type,
1991
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
1992
+ bool for_compaction, bool use_cache) const;
1993
+
1994
+ template Status BlockBasedTable::RetrieveBlock<ParsedFullFilterBlock>(
1995
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
1996
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
1997
+ CachableEntry<ParsedFullFilterBlock>* block_entry, BlockType block_type,
1998
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
1999
+ bool for_compaction, bool use_cache) const;
2000
+
2001
+ template Status BlockBasedTable::RetrieveBlock<Block>(
2002
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
2003
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
2004
+ CachableEntry<Block>* block_entry, BlockType block_type,
2005
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
2006
+ bool for_compaction, bool use_cache) const;
2007
+
2008
+ template Status BlockBasedTable::RetrieveBlock<UncompressionDict>(
2009
+ FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
2010
+ const BlockHandle& handle, const UncompressionDict& uncompression_dict,
2011
+ CachableEntry<UncompressionDict>* block_entry, BlockType block_type,
2012
+ GetContext* get_context, BlockCacheLookupContext* lookup_context,
2013
+ bool for_compaction, bool use_cache) const;
2014
+
2015
+ BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState(
2016
+ const BlockBasedTable* table,
2017
+ std::unordered_map<uint64_t, CachableEntry<Block>>* block_map)
2018
+ : table_(table), block_map_(block_map) {}
2019
+
2020
+ InternalIteratorBase<IndexValue>*
2021
+ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator(
2022
+ const BlockHandle& handle) {
2023
+ // Return a block iterator on the index partition
2024
+ auto block = block_map_->find(handle.offset());
2025
+ // This is a possible scenario since block cache might not have had space
2026
+ // for the partition
2027
+ if (block != block_map_->end()) {
2028
+ const Rep* rep = table_->get_rep();
2029
+ assert(rep);
2030
+
2031
+ Statistics* kNullStats = nullptr;
2032
+ // We don't return pinned data from index blocks, so no need
2033
+ // to set `block_contents_pinned`.
2034
+ return block->second.GetValue()->NewIndexIterator(
2035
+ rep->internal_comparator.user_comparator(),
2036
+ rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true,
2037
+ rep->index_has_first_key, rep->index_key_includes_seq,
2038
+ rep->index_value_is_full);
2039
+ }
2040
+ // Create an empty iterator
2041
+ // TODO(ajkr): this is not the right way to handle an unpinned partition.
2042
+ return new IndexBlockIter();
2043
+ }
2044
+
2045
+ // This will be broken if the user specifies an unusual implementation
2046
+ // of Options.comparator, or if the user specifies an unusual
2047
+ // definition of prefixes in BlockBasedTableOptions.filter_policy.
2048
+ // In particular, we require the following three properties:
2049
+ //
2050
+ // 1) key.starts_with(prefix(key))
2051
+ // 2) Compare(prefix(key), key) <= 0.
2052
+ // 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0
2053
+ //
2054
+ // If read_options.read_tier == kBlockCacheTier, this method will do no I/O and
2055
+ // will return true if the filter block is not in memory and not found in block
2056
+ // cache.
2057
+ //
2058
+ // REQUIRES: this method shouldn't be called while the DB lock is held.
2059
+ bool BlockBasedTable::PrefixMayMatch(
2060
+ const Slice& internal_key, const ReadOptions& read_options,
2061
+ const SliceTransform* options_prefix_extractor,
2062
+ const bool need_upper_bound_check,
2063
+ BlockCacheLookupContext* lookup_context) const {
2064
+ if (!rep_->filter_policy) {
2065
+ return true;
2066
+ }
2067
+
2068
+ const SliceTransform* prefix_extractor;
2069
+
2070
+ if (rep_->table_prefix_extractor == nullptr) {
2071
+ if (need_upper_bound_check) {
2072
+ return true;
2073
+ }
2074
+ prefix_extractor = options_prefix_extractor;
2075
+ } else {
2076
+ prefix_extractor = rep_->table_prefix_extractor.get();
2077
+ }
2078
+ auto ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
2079
+ auto user_key_without_ts =
2080
+ ExtractUserKeyAndStripTimestamp(internal_key, ts_sz);
2081
+ if (!prefix_extractor->InDomain(user_key_without_ts)) {
2082
+ return true;
2083
+ }
2084
+
2085
+ bool may_match = true;
2086
+
2087
+ // First, try check with full filter
2088
+ FilterBlockReader* const filter = rep_->filter.get();
2089
+ bool filter_checked = true;
2090
+ if (filter != nullptr) {
2091
+ const bool no_io = read_options.read_tier == kBlockCacheTier;
2092
+
2093
+ if (!filter->IsBlockBased()) {
2094
+ const Slice* const const_ikey_ptr = &internal_key;
2095
+ may_match = filter->RangeMayExist(
2096
+ read_options.iterate_upper_bound, user_key_without_ts,
2097
+ prefix_extractor, rep_->internal_comparator.user_comparator(),
2098
+ const_ikey_ptr, &filter_checked, need_upper_bound_check, no_io,
2099
+ lookup_context);
2100
+ } else {
2101
+ // if prefix_extractor changed for block based filter, skip filter
2102
+ if (need_upper_bound_check) {
2103
+ return true;
2104
+ }
2105
+ auto prefix = prefix_extractor->Transform(user_key_without_ts);
2106
+ InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue);
2107
+ auto internal_prefix = internal_key_prefix.Encode();
2108
+
2109
+ // To prevent any io operation in this method, we set `read_tier` to make
2110
+ // sure we always read index or filter only when they have already been
2111
+ // loaded to memory.
2112
+ ReadOptions no_io_read_options;
2113
+ no_io_read_options.read_tier = kBlockCacheTier;
2114
+
2115
+ // Then, try find it within each block
2116
+ // we already know prefix_extractor and prefix_extractor_name must match
2117
+ // because `CheckPrefixMayMatch` first checks `check_filter_ == true`
2118
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter(NewIndexIterator(
2119
+ no_io_read_options,
2120
+ /*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
2121
+ /*get_context=*/nullptr, lookup_context));
2122
+ iiter->Seek(internal_prefix);
2123
+
2124
+ if (!iiter->Valid()) {
2125
+ // we're past end of file
2126
+ // if it's incomplete, it means that we avoided I/O
2127
+ // and we're not really sure that we're past the end
2128
+ // of the file
2129
+ may_match = iiter->status().IsIncomplete();
2130
+ } else if ((rep_->index_key_includes_seq ? ExtractUserKey(iiter->key())
2131
+ : iiter->key())
2132
+ .starts_with(ExtractUserKey(internal_prefix))) {
2133
+ // we need to check for this subtle case because our only
2134
+ // guarantee is that "the key is a string >= last key in that data
2135
+ // block" according to the doc/table_format.txt spec.
2136
+ //
2137
+ // Suppose iiter->key() starts with the desired prefix; it is not
2138
+ // necessarily the case that the corresponding data block will
2139
+ // contain the prefix, since iiter->key() need not be in the
2140
+ // block. However, the next data block may contain the prefix, so
2141
+ // we return true to play it safe.
2142
+ may_match = true;
2143
+ } else if (filter->IsBlockBased()) {
2144
+ // iiter->key() does NOT start with the desired prefix. Because
2145
+ // Seek() finds the first key that is >= the seek target, this
2146
+ // means that iiter->key() > prefix. Thus, any data blocks coming
2147
+ // after the data block corresponding to iiter->key() cannot
2148
+ // possibly contain the key. Thus, the corresponding data block
2149
+ // is the only on could potentially contain the prefix.
2150
+ BlockHandle handle = iiter->value().handle;
2151
+ may_match = filter->PrefixMayMatch(
2152
+ prefix, prefix_extractor, handle.offset(), no_io,
2153
+ /*const_key_ptr=*/nullptr, /*get_context=*/nullptr, lookup_context);
2154
+ }
2155
+ }
2156
+ }
2157
+
2158
+ if (filter_checked) {
2159
+ Statistics* statistics = rep_->ioptions.statistics;
2160
+ RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
2161
+ if (!may_match) {
2162
+ RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
2163
+ }
2164
+ }
2165
+
2166
+ return may_match;
2167
+ }
2168
+
2169
+
2170
+ InternalIterator* BlockBasedTable::NewIterator(
2171
+ const ReadOptions& read_options, const SliceTransform* prefix_extractor,
2172
+ Arena* arena, bool skip_filters, TableReaderCaller caller,
2173
+ size_t compaction_readahead_size, bool allow_unprepared_value) {
2174
+ BlockCacheLookupContext lookup_context{caller};
2175
+ bool need_upper_bound_check =
2176
+ read_options.auto_prefix_mode ||
2177
+ PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
2178
+ std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter(NewIndexIterator(
2179
+ read_options,
2180
+ need_upper_bound_check &&
2181
+ rep_->index_type == BlockBasedTableOptions::kHashSearch,
2182
+ /*input_iter=*/nullptr, /*get_context=*/nullptr, &lookup_context));
2183
+ if (arena == nullptr) {
2184
+ return new BlockBasedTableIterator(
2185
+ this, read_options, rep_->internal_comparator, std::move(index_iter),
2186
+ !skip_filters && !read_options.total_order_seek &&
2187
+ prefix_extractor != nullptr,
2188
+ need_upper_bound_check, prefix_extractor, caller,
2189
+ compaction_readahead_size, allow_unprepared_value);
2190
+ } else {
2191
+ auto* mem = arena->AllocateAligned(sizeof(BlockBasedTableIterator));
2192
+ return new (mem) BlockBasedTableIterator(
2193
+ this, read_options, rep_->internal_comparator, std::move(index_iter),
2194
+ !skip_filters && !read_options.total_order_seek &&
2195
+ prefix_extractor != nullptr,
2196
+ need_upper_bound_check, prefix_extractor, caller,
2197
+ compaction_readahead_size, allow_unprepared_value);
2198
+ }
2199
+ }
2200
+
2201
+ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
2202
+ const ReadOptions& read_options) {
2203
+ if (rep_->fragmented_range_dels == nullptr) {
2204
+ return nullptr;
2205
+ }
2206
+ SequenceNumber snapshot = kMaxSequenceNumber;
2207
+ if (read_options.snapshot != nullptr) {
2208
+ snapshot = read_options.snapshot->GetSequenceNumber();
2209
+ }
2210
+ return new FragmentedRangeTombstoneIterator(
2211
+ rep_->fragmented_range_dels, rep_->internal_comparator, snapshot);
2212
+ }
2213
+
2214
+ bool BlockBasedTable::FullFilterKeyMayMatch(
2215
+ const ReadOptions& read_options, FilterBlockReader* filter,
2216
+ const Slice& internal_key, const bool no_io,
2217
+ const SliceTransform* prefix_extractor, GetContext* get_context,
2218
+ BlockCacheLookupContext* lookup_context) const {
2219
+ if (filter == nullptr || filter->IsBlockBased()) {
2220
+ return true;
2221
+ }
2222
+ Slice user_key = ExtractUserKey(internal_key);
2223
+ const Slice* const const_ikey_ptr = &internal_key;
2224
+ bool may_match = true;
2225
+ size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
2226
+ Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
2227
+ if (rep_->whole_key_filtering) {
2228
+ may_match =
2229
+ filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid,
2230
+ no_io, const_ikey_ptr, get_context, lookup_context);
2231
+ } else if (!read_options.total_order_seek && prefix_extractor &&
2232
+ rep_->table_properties->prefix_extractor_name.compare(
2233
+ prefix_extractor->Name()) == 0 &&
2234
+ prefix_extractor->InDomain(user_key_without_ts) &&
2235
+ !filter->PrefixMayMatch(
2236
+ prefix_extractor->Transform(user_key_without_ts),
2237
+ prefix_extractor, kNotValid, no_io, const_ikey_ptr,
2238
+ get_context, lookup_context)) {
2239
+ may_match = false;
2240
+ }
2241
+ if (may_match) {
2242
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
2243
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level);
2244
+ }
2245
+ return may_match;
2246
+ }
2247
+
2248
+ void BlockBasedTable::FullFilterKeysMayMatch(
2249
+ const ReadOptions& read_options, FilterBlockReader* filter,
2250
+ MultiGetRange* range, const bool no_io,
2251
+ const SliceTransform* prefix_extractor,
2252
+ BlockCacheLookupContext* lookup_context) const {
2253
+ if (filter == nullptr || filter->IsBlockBased()) {
2254
+ return;
2255
+ }
2256
+ uint64_t before_keys = range->KeysLeft();
2257
+ assert(before_keys > 0); // Caller should ensure
2258
+ if (rep_->whole_key_filtering) {
2259
+ filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io,
2260
+ lookup_context);
2261
+ uint64_t after_keys = range->KeysLeft();
2262
+ if (after_keys) {
2263
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE,
2264
+ after_keys);
2265
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys,
2266
+ rep_->level);
2267
+ }
2268
+ uint64_t filtered_keys = before_keys - after_keys;
2269
+ if (filtered_keys) {
2270
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL, filtered_keys);
2271
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys,
2272
+ rep_->level);
2273
+ }
2274
+ } else if (!read_options.total_order_seek && prefix_extractor &&
2275
+ rep_->table_properties->prefix_extractor_name.compare(
2276
+ prefix_extractor->Name()) == 0) {
2277
+ filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
2278
+ lookup_context);
2279
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_CHECKED,
2280
+ before_keys);
2281
+ uint64_t after_keys = range->KeysLeft();
2282
+ uint64_t filtered_keys = before_keys - after_keys;
2283
+ if (filtered_keys) {
2284
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_USEFUL,
2285
+ filtered_keys);
2286
+ }
2287
+ }
2288
+ }
2289
+
2290
+ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2291
+ GetContext* get_context,
2292
+ const SliceTransform* prefix_extractor,
2293
+ bool skip_filters) {
2294
+ assert(key.size() >= 8); // key must be internal key
2295
+ assert(get_context != nullptr);
2296
+ Status s;
2297
+ const bool no_io = read_options.read_tier == kBlockCacheTier;
2298
+
2299
+ FilterBlockReader* const filter =
2300
+ !skip_filters ? rep_->filter.get() : nullptr;
2301
+
2302
+ // First check the full filter
2303
+ // If full filter not useful, Then go into each block
2304
+ uint64_t tracing_get_id = get_context->get_tracing_get_id();
2305
+ BlockCacheLookupContext lookup_context{
2306
+ TableReaderCaller::kUserGet, tracing_get_id,
2307
+ /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
2308
+ if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
2309
+ // Trace the key since it contains both user key and sequence number.
2310
+ lookup_context.referenced_key = key.ToString();
2311
+ lookup_context.get_from_user_specified_snapshot =
2312
+ read_options.snapshot != nullptr;
2313
+ }
2314
+ TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
2315
+ const bool may_match =
2316
+ FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor,
2317
+ get_context, &lookup_context);
2318
+ TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
2319
+ if (!may_match) {
2320
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
2321
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
2322
+ } else {
2323
+ IndexBlockIter iiter_on_stack;
2324
+ // if prefix_extractor found in block differs from options, disable
2325
+ // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
2326
+ bool need_upper_bound_check = false;
2327
+ if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
2328
+ need_upper_bound_check = PrefixExtractorChanged(
2329
+ rep_->table_properties.get(), prefix_extractor);
2330
+ }
2331
+ auto iiter =
2332
+ NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
2333
+ get_context, &lookup_context);
2334
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
2335
+ if (iiter != &iiter_on_stack) {
2336
+ iiter_unique_ptr.reset(iiter);
2337
+ }
2338
+
2339
+ size_t ts_sz =
2340
+ rep_->internal_comparator.user_comparator()->timestamp_size();
2341
+ bool matched = false; // if such user key matched a key in SST
2342
+ bool done = false;
2343
+ for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
2344
+ IndexValue v = iiter->value();
2345
+
2346
+ bool not_exist_in_filter =
2347
+ filter != nullptr && filter->IsBlockBased() == true &&
2348
+ !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz),
2349
+ prefix_extractor, v.handle.offset(), no_io,
2350
+ /*const_ikey_ptr=*/nullptr, get_context,
2351
+ &lookup_context);
2352
+
2353
+ if (not_exist_in_filter) {
2354
+ // Not found
2355
+ // TODO: think about interaction with Merge. If a user key cannot
2356
+ // cross one data block, we should be fine.
2357
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
2358
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level);
2359
+ break;
2360
+ }
2361
+
2362
+ if (!v.first_internal_key.empty() && !skip_filters &&
2363
+ UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2364
+ .Compare(ExtractUserKey(key),
2365
+ ExtractUserKey(v.first_internal_key)) < 0) {
2366
+ // The requested key falls between highest key in previous block and
2367
+ // lowest key in current block.
2368
+ break;
2369
+ }
2370
+
2371
+ BlockCacheLookupContext lookup_data_block_context{
2372
+ TableReaderCaller::kUserGet, tracing_get_id,
2373
+ /*get_from_user_specified_snapshot=*/read_options.snapshot !=
2374
+ nullptr};
2375
+ bool does_referenced_key_exist = false;
2376
+ DataBlockIter biter;
2377
+ uint64_t referenced_data_size = 0;
2378
+ NewDataBlockIterator<DataBlockIter>(
2379
+ read_options, v.handle, &biter, BlockType::kData, get_context,
2380
+ &lookup_data_block_context,
2381
+ /*s=*/Status(), /*prefetch_buffer*/ nullptr);
2382
+
2383
+ if (no_io && biter.status().IsIncomplete()) {
2384
+ // couldn't get block from block_cache
2385
+ // Update Saver.state to Found because we are only looking for
2386
+ // whether we can guarantee the key is not there when "no_io" is set
2387
+ get_context->MarkKeyMayExist();
2388
+ break;
2389
+ }
2390
+ if (!biter.status().ok()) {
2391
+ s = biter.status();
2392
+ break;
2393
+ }
2394
+
2395
+ bool may_exist = biter.SeekForGet(key);
2396
+ // If user-specified timestamp is supported, we cannot end the search
2397
+ // just because hash index lookup indicates the key+ts does not exist.
2398
+ if (!may_exist && ts_sz == 0) {
2399
+ // HashSeek cannot find the key this block and the the iter is not
2400
+ // the end of the block, i.e. cannot be in the following blocks
2401
+ // either. In this case, the seek_key cannot be found, so we break
2402
+ // from the top level for-loop.
2403
+ done = true;
2404
+ } else {
2405
+ // Call the *saver function on each entry/block until it returns false
2406
+ for (; biter.Valid(); biter.Next()) {
2407
+ ParsedInternalKey parsed_key;
2408
+ Status pik_status = ParseInternalKey(
2409
+ biter.key(), &parsed_key, false /* log_err_key */); // TODO
2410
+ if (!pik_status.ok()) {
2411
+ s = pik_status;
2412
+ }
2413
+
2414
+ if (!get_context->SaveValue(
2415
+ parsed_key, biter.value(), &matched,
2416
+ biter.IsValuePinned() ? &biter : nullptr)) {
2417
+ if (get_context->State() == GetContext::GetState::kFound) {
2418
+ does_referenced_key_exist = true;
2419
+ referenced_data_size = biter.key().size() + biter.value().size();
2420
+ }
2421
+ done = true;
2422
+ break;
2423
+ }
2424
+ }
2425
+ s = biter.status();
2426
+ }
2427
+ // Write the block cache access record.
2428
+ if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
2429
+ // Avoid making copy of block_key, cf_name, and referenced_key when
2430
+ // constructing the access record.
2431
+ Slice referenced_key;
2432
+ if (does_referenced_key_exist) {
2433
+ referenced_key = biter.key();
2434
+ } else {
2435
+ referenced_key = key;
2436
+ }
2437
+ BlockCacheTraceRecord access_record(
2438
+ rep_->ioptions.env->NowMicros(),
2439
+ /*block_key=*/"", lookup_data_block_context.block_type,
2440
+ lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
2441
+ /*cf_name=*/"", rep_->level_for_tracing(),
2442
+ rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
2443
+ lookup_data_block_context.is_cache_hit,
2444
+ lookup_data_block_context.no_insert,
2445
+ lookup_data_block_context.get_id,
2446
+ lookup_data_block_context.get_from_user_specified_snapshot,
2447
+ /*referenced_key=*/"", referenced_data_size,
2448
+ lookup_data_block_context.num_keys_in_block,
2449
+ does_referenced_key_exist);
2450
+ // TODO: Should handle status here?
2451
+ block_cache_tracer_
2452
+ ->WriteBlockAccess(access_record,
2453
+ lookup_data_block_context.block_key,
2454
+ rep_->cf_name_for_tracing(), referenced_key)
2455
+ .PermitUncheckedError();
2456
+ }
2457
+
2458
+ if (done) {
2459
+ // Avoid the extra Next which is expensive in two-level indexes
2460
+ break;
2461
+ }
2462
+ }
2463
+ if (matched && filter != nullptr && !filter->IsBlockBased()) {
2464
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2465
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
2466
+ rep_->level);
2467
+ }
2468
+ if (s.ok() && !iiter->status().IsNotFound()) {
2469
+ s = iiter->status();
2470
+ }
2471
+ }
2472
+
2473
+ return s;
2474
+ }
2475
+
2476
+ using MultiGetRange = MultiGetContext::Range;
2477
+ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
2478
+ const MultiGetRange* mget_range,
2479
+ const SliceTransform* prefix_extractor,
2480
+ bool skip_filters) {
2481
+ if (mget_range->empty()) {
2482
+ // Caller should ensure non-empty (performance bug)
2483
+ assert(false);
2484
+ return; // Nothing to do
2485
+ }
2486
+
2487
+ FilterBlockReader* const filter =
2488
+ !skip_filters ? rep_->filter.get() : nullptr;
2489
+ MultiGetRange sst_file_range(*mget_range, mget_range->begin(),
2490
+ mget_range->end());
2491
+
2492
+ // First check the full filter
2493
+ // If full filter not useful, Then go into each block
2494
+ const bool no_io = read_options.read_tier == kBlockCacheTier;
2495
+ uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId;
2496
+ if (sst_file_range.begin()->get_context) {
2497
+ tracing_mget_id = sst_file_range.begin()->get_context->get_tracing_get_id();
2498
+ }
2499
+ BlockCacheLookupContext lookup_context{
2500
+ TableReaderCaller::kUserMultiGet, tracing_mget_id,
2501
+ /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
2502
+ FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io,
2503
+ prefix_extractor, &lookup_context);
2504
+
2505
+ if (!sst_file_range.empty()) {
2506
+ IndexBlockIter iiter_on_stack;
2507
+ // if prefix_extractor found in block differs from options, disable
2508
+ // BlockPrefixIndex. Only do this check when index_type is kHashSearch.
2509
+ bool need_upper_bound_check = false;
2510
+ if (rep_->index_type == BlockBasedTableOptions::kHashSearch) {
2511
+ need_upper_bound_check = PrefixExtractorChanged(
2512
+ rep_->table_properties.get(), prefix_extractor);
2513
+ }
2514
+ auto iiter =
2515
+ NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack,
2516
+ sst_file_range.begin()->get_context, &lookup_context);
2517
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
2518
+ if (iiter != &iiter_on_stack) {
2519
+ iiter_unique_ptr.reset(iiter);
2520
+ }
2521
+
2522
+ uint64_t offset = std::numeric_limits<uint64_t>::max();
2523
+ autovector<BlockHandle, MultiGetContext::MAX_BATCH_SIZE> block_handles;
2524
+ autovector<CachableEntry<Block>, MultiGetContext::MAX_BATCH_SIZE> results;
2525
+ autovector<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
2526
+ char stack_buf[kMultiGetReadStackBufSize];
2527
+ std::unique_ptr<char[]> block_buf;
2528
+ {
2529
+ MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(),
2530
+ sst_file_range.end());
2531
+
2532
+ CachableEntry<UncompressionDict> uncompression_dict;
2533
+ Status uncompression_dict_status;
2534
+ uncompression_dict_status.PermitUncheckedError();
2535
+ bool uncompression_dict_inited = false;
2536
+ size_t total_len = 0;
2537
+ ReadOptions ro = read_options;
2538
+ ro.read_tier = kBlockCacheTier;
2539
+
2540
+ for (auto miter = data_block_range.begin();
2541
+ miter != data_block_range.end(); ++miter) {
2542
+ const Slice& key = miter->ikey;
2543
+ iiter->Seek(miter->ikey);
2544
+
2545
+ IndexValue v;
2546
+ if (iiter->Valid()) {
2547
+ v = iiter->value();
2548
+ }
2549
+ if (!iiter->Valid() ||
2550
+ (!v.first_internal_key.empty() && !skip_filters &&
2551
+ UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2552
+ .Compare(ExtractUserKey(key),
2553
+ ExtractUserKey(v.first_internal_key)) < 0)) {
2554
+ // The requested key falls between highest key in previous block and
2555
+ // lowest key in current block.
2556
+ if (!iiter->status().IsNotFound()) {
2557
+ *(miter->s) = iiter->status();
2558
+ }
2559
+ data_block_range.SkipKey(miter);
2560
+ sst_file_range.SkipKey(miter);
2561
+ continue;
2562
+ }
2563
+
2564
+ if (!uncompression_dict_inited && rep_->uncompression_dict_reader) {
2565
+ uncompression_dict_status =
2566
+ rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
2567
+ nullptr /* prefetch_buffer */, no_io,
2568
+ sst_file_range.begin()->get_context, &lookup_context,
2569
+ &uncompression_dict);
2570
+ uncompression_dict_inited = true;
2571
+ }
2572
+
2573
+ if (!uncompression_dict_status.ok()) {
2574
+ assert(!uncompression_dict_status.IsNotFound());
2575
+ *(miter->s) = uncompression_dict_status;
2576
+ data_block_range.SkipKey(miter);
2577
+ sst_file_range.SkipKey(miter);
2578
+ continue;
2579
+ }
2580
+
2581
+ statuses.emplace_back();
2582
+ results.emplace_back();
2583
+ if (v.handle.offset() == offset) {
2584
+ // We're going to reuse the block for this key later on. No need to
2585
+ // look it up now. Place a null handle
2586
+ block_handles.emplace_back(BlockHandle::NullBlockHandle());
2587
+ continue;
2588
+ }
2589
+ // Lookup the cache for the given data block referenced by an index
2590
+ // iterator value (i.e BlockHandle). If it exists in the cache,
2591
+ // initialize block to the contents of the data block.
2592
+ offset = v.handle.offset();
2593
+ BlockHandle handle = v.handle;
2594
+ BlockCacheLookupContext lookup_data_block_context(
2595
+ TableReaderCaller::kUserMultiGet);
2596
+ const UncompressionDict& dict = uncompression_dict.GetValue()
2597
+ ? *uncompression_dict.GetValue()
2598
+ : UncompressionDict::GetEmptyDict();
2599
+ Status s = RetrieveBlock(
2600
+ nullptr, ro, handle, dict, &(results.back()), BlockType::kData,
2601
+ miter->get_context, &lookup_data_block_context,
2602
+ /* for_compaction */ false, /* use_cache */ true);
2603
+ if (s.IsIncomplete()) {
2604
+ s = Status::OK();
2605
+ }
2606
+ if (s.ok() && !results.back().IsEmpty()) {
2607
+ // Found it in the cache. Add NULL handle to indicate there is
2608
+ // nothing to read from disk
2609
+ block_handles.emplace_back(BlockHandle::NullBlockHandle());
2610
+ } else {
2611
+ block_handles.emplace_back(handle);
2612
+ total_len += block_size(handle);
2613
+ }
2614
+ }
2615
+
2616
+ if (total_len) {
2617
+ char* scratch = nullptr;
2618
+ const UncompressionDict& dict = uncompression_dict.GetValue()
2619
+ ? *uncompression_dict.GetValue()
2620
+ : UncompressionDict::GetEmptyDict();
2621
+ assert(uncompression_dict_inited || !rep_->uncompression_dict_reader);
2622
+ assert(uncompression_dict_status.ok());
2623
+ // If using direct IO, then scratch is not used, so keep it nullptr.
2624
+ // If the blocks need to be uncompressed and we don't need the
2625
+ // compressed blocks, then we can use a contiguous block of
2626
+ // memory to read in all the blocks as it will be temporary
2627
+ // storage
2628
+ // 1. If blocks are compressed and compressed block cache is there,
2629
+ // alloc heap bufs
2630
+ // 2. If blocks are uncompressed, alloc heap bufs
2631
+ // 3. If blocks are compressed and no compressed block cache, use
2632
+ // stack buf
2633
+ if (!rep_->file->use_direct_io() &&
2634
+ rep_->table_options.block_cache_compressed == nullptr &&
2635
+ rep_->blocks_maybe_compressed) {
2636
+ if (total_len <= kMultiGetReadStackBufSize) {
2637
+ scratch = stack_buf;
2638
+ } else {
2639
+ scratch = new char[total_len];
2640
+ block_buf.reset(scratch);
2641
+ }
2642
+ }
2643
+ RetrieveMultipleBlocks(read_options, &data_block_range, &block_handles,
2644
+ &statuses, &results, scratch, dict);
2645
+ if (sst_file_range.begin()->get_context) {
2646
+ ++(sst_file_range.begin()
2647
+ ->get_context->get_context_stats_.num_sst_read);
2648
+ }
2649
+ }
2650
+ }
2651
+
2652
+ DataBlockIter first_biter;
2653
+ DataBlockIter next_biter;
2654
+ size_t idx_in_batch = 0;
2655
+ for (auto miter = sst_file_range.begin(); miter != sst_file_range.end();
2656
+ ++miter) {
2657
+ Status s;
2658
+ GetContext* get_context = miter->get_context;
2659
+ const Slice& key = miter->ikey;
2660
+ bool matched = false; // if such user key matched a key in SST
2661
+ bool done = false;
2662
+ bool first_block = true;
2663
+ do {
2664
+ DataBlockIter* biter = nullptr;
2665
+ bool reusing_block = true;
2666
+ uint64_t referenced_data_size = 0;
2667
+ bool does_referenced_key_exist = false;
2668
+ BlockCacheLookupContext lookup_data_block_context(
2669
+ TableReaderCaller::kUserMultiGet, tracing_mget_id,
2670
+ /*get_from_user_specified_snapshot=*/read_options.snapshot !=
2671
+ nullptr);
2672
+ if (first_block) {
2673
+ if (!block_handles[idx_in_batch].IsNull() ||
2674
+ !results[idx_in_batch].IsEmpty()) {
2675
+ first_biter.Invalidate(Status::OK());
2676
+ NewDataBlockIterator<DataBlockIter>(
2677
+ read_options, results[idx_in_batch], &first_biter,
2678
+ statuses[idx_in_batch]);
2679
+ reusing_block = false;
2680
+ } else {
2681
+ // If handler is null and result is empty, then the status is never
2682
+ // set, which should be the initial value: ok().
2683
+ assert(statuses[idx_in_batch].ok());
2684
+ }
2685
+ biter = &first_biter;
2686
+ idx_in_batch++;
2687
+ } else {
2688
+ IndexValue v = iiter->value();
2689
+ if (!v.first_internal_key.empty() && !skip_filters &&
2690
+ UserComparatorWrapper(rep_->internal_comparator.user_comparator())
2691
+ .Compare(ExtractUserKey(key),
2692
+ ExtractUserKey(v.first_internal_key)) < 0) {
2693
+ // The requested key falls between highest key in previous block and
2694
+ // lowest key in current block.
2695
+ break;
2696
+ }
2697
+
2698
+ next_biter.Invalidate(Status::OK());
2699
+ NewDataBlockIterator<DataBlockIter>(
2700
+ read_options, iiter->value().handle, &next_biter,
2701
+ BlockType::kData, get_context, &lookup_data_block_context,
2702
+ Status(), nullptr);
2703
+ biter = &next_biter;
2704
+ reusing_block = false;
2705
+ }
2706
+
2707
+ if (read_options.read_tier == kBlockCacheTier &&
2708
+ biter->status().IsIncomplete()) {
2709
+ // couldn't get block from block_cache
2710
+ // Update Saver.state to Found because we are only looking for
2711
+ // whether we can guarantee the key is not there when "no_io" is set
2712
+ get_context->MarkKeyMayExist();
2713
+ break;
2714
+ }
2715
+ if (!biter->status().ok()) {
2716
+ s = biter->status();
2717
+ break;
2718
+ }
2719
+
2720
+ bool may_exist = biter->SeekForGet(key);
2721
+ if (!may_exist) {
2722
+ // HashSeek cannot find the key this block and the the iter is not
2723
+ // the end of the block, i.e. cannot be in the following blocks
2724
+ // either. In this case, the seek_key cannot be found, so we break
2725
+ // from the top level for-loop.
2726
+ break;
2727
+ }
2728
+
2729
+ // Call the *saver function on each entry/block until it returns false
2730
+ for (; biter->Valid(); biter->Next()) {
2731
+ ParsedInternalKey parsed_key;
2732
+ Cleanable dummy;
2733
+ Cleanable* value_pinner = nullptr;
2734
+ Status pik_status = ParseInternalKey(
2735
+ biter->key(), &parsed_key, false /* log_err_key */); // TODO
2736
+ if (!pik_status.ok()) {
2737
+ s = pik_status;
2738
+ }
2739
+ if (biter->IsValuePinned()) {
2740
+ if (reusing_block) {
2741
+ Cache* block_cache = rep_->table_options.block_cache.get();
2742
+ assert(biter->cache_handle() != nullptr);
2743
+ block_cache->Ref(biter->cache_handle());
2744
+ dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache,
2745
+ biter->cache_handle());
2746
+ value_pinner = &dummy;
2747
+ } else {
2748
+ value_pinner = biter;
2749
+ }
2750
+ }
2751
+ if (!get_context->SaveValue(parsed_key, biter->value(), &matched,
2752
+ value_pinner)) {
2753
+ if (get_context->State() == GetContext::GetState::kFound) {
2754
+ does_referenced_key_exist = true;
2755
+ referenced_data_size =
2756
+ biter->key().size() + biter->value().size();
2757
+ }
2758
+ done = true;
2759
+ break;
2760
+ }
2761
+ s = biter->status();
2762
+ }
2763
+ // Write the block cache access.
2764
+ if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) {
2765
+ // Avoid making copy of block_key, cf_name, and referenced_key when
2766
+ // constructing the access record.
2767
+ Slice referenced_key;
2768
+ if (does_referenced_key_exist) {
2769
+ referenced_key = biter->key();
2770
+ } else {
2771
+ referenced_key = key;
2772
+ }
2773
+ BlockCacheTraceRecord access_record(
2774
+ rep_->ioptions.env->NowMicros(),
2775
+ /*block_key=*/"", lookup_data_block_context.block_type,
2776
+ lookup_data_block_context.block_size, rep_->cf_id_for_tracing(),
2777
+ /*cf_name=*/"", rep_->level_for_tracing(),
2778
+ rep_->sst_number_for_tracing(), lookup_data_block_context.caller,
2779
+ lookup_data_block_context.is_cache_hit,
2780
+ lookup_data_block_context.no_insert,
2781
+ lookup_data_block_context.get_id,
2782
+ lookup_data_block_context.get_from_user_specified_snapshot,
2783
+ /*referenced_key=*/"", referenced_data_size,
2784
+ lookup_data_block_context.num_keys_in_block,
2785
+ does_referenced_key_exist);
2786
+ // TODO: Should handle status here?
2787
+ block_cache_tracer_
2788
+ ->WriteBlockAccess(access_record,
2789
+ lookup_data_block_context.block_key,
2790
+ rep_->cf_name_for_tracing(), referenced_key)
2791
+ .PermitUncheckedError();
2792
+ }
2793
+ s = biter->status();
2794
+ if (done) {
2795
+ // Avoid the extra Next which is expensive in two-level indexes
2796
+ break;
2797
+ }
2798
+ if (first_block) {
2799
+ iiter->Seek(key);
2800
+ }
2801
+ first_block = false;
2802
+ iiter->Next();
2803
+ } while (iiter->Valid());
2804
+
2805
+ if (matched && filter != nullptr && !filter->IsBlockBased()) {
2806
+ RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
2807
+ PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1,
2808
+ rep_->level);
2809
+ }
2810
+ if (s.ok() && !iiter->status().IsNotFound()) {
2811
+ s = iiter->status();
2812
+ }
2813
+ *(miter->s) = s;
2814
+ }
2815
+ #ifdef ROCKSDB_ASSERT_STATUS_CHECKED
2816
+ // Not sure why we need to do it. Should investigate more.
2817
+ for (auto& st : statuses) {
2818
+ st.PermitUncheckedError();
2819
+ }
2820
+ #endif // ROCKSDB_ASSERT_STATUS_CHECKED
2821
+ }
2822
+ }
2823
+
2824
+ Status BlockBasedTable::Prefetch(const Slice* const begin,
2825
+ const Slice* const end) {
2826
+ auto& comparator = rep_->internal_comparator;
2827
+ UserComparatorWrapper user_comparator(comparator.user_comparator());
2828
+ // pre-condition
2829
+ if (begin && end && comparator.Compare(*begin, *end) > 0) {
2830
+ return Status::InvalidArgument(*begin, *end);
2831
+ }
2832
+ BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch};
2833
+ IndexBlockIter iiter_on_stack;
2834
+ auto iiter = NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
2835
+ &iiter_on_stack, /*get_context=*/nullptr,
2836
+ &lookup_context);
2837
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
2838
+ if (iiter != &iiter_on_stack) {
2839
+ iiter_unique_ptr = std::unique_ptr<InternalIteratorBase<IndexValue>>(iiter);
2840
+ }
2841
+
2842
+ if (!iiter->status().ok()) {
2843
+ // error opening index iterator
2844
+ return iiter->status();
2845
+ }
2846
+
2847
+ // indicates if we are on the last page that need to be pre-fetched
2848
+ bool prefetching_boundary_page = false;
2849
+
2850
+ for (begin ? iiter->Seek(*begin) : iiter->SeekToFirst(); iiter->Valid();
2851
+ iiter->Next()) {
2852
+ BlockHandle block_handle = iiter->value().handle;
2853
+ const bool is_user_key = !rep_->index_key_includes_seq;
2854
+ if (end &&
2855
+ ((!is_user_key && comparator.Compare(iiter->key(), *end) >= 0) ||
2856
+ (is_user_key &&
2857
+ user_comparator.Compare(iiter->key(), ExtractUserKey(*end)) >= 0))) {
2858
+ if (prefetching_boundary_page) {
2859
+ break;
2860
+ }
2861
+
2862
+ // The index entry represents the last key in the data block.
2863
+ // We should load this page into memory as well, but no more
2864
+ prefetching_boundary_page = true;
2865
+ }
2866
+
2867
+ // Load the block specified by the block_handle into the block cache
2868
+ DataBlockIter biter;
2869
+
2870
+ NewDataBlockIterator<DataBlockIter>(
2871
+ ReadOptions(), block_handle, &biter, /*type=*/BlockType::kData,
2872
+ /*get_context=*/nullptr, &lookup_context, Status(),
2873
+ /*prefetch_buffer=*/nullptr);
2874
+
2875
+ if (!biter.status().ok()) {
2876
+ // there was an unexpected error while pre-fetching
2877
+ return biter.status();
2878
+ }
2879
+ }
2880
+
2881
+ return Status::OK();
2882
+ }
2883
+
2884
+ Status BlockBasedTable::VerifyChecksum(const ReadOptions& read_options,
2885
+ TableReaderCaller caller) {
2886
+ Status s;
2887
+ // Check Meta blocks
2888
+ std::unique_ptr<Block> metaindex;
2889
+ std::unique_ptr<InternalIterator> metaindex_iter;
2890
+ ReadOptions ro;
2891
+ s = ReadMetaIndexBlock(ro, nullptr /* prefetch buffer */, &metaindex,
2892
+ &metaindex_iter);
2893
+ if (s.ok()) {
2894
+ s = VerifyChecksumInMetaBlocks(metaindex_iter.get());
2895
+ if (!s.ok()) {
2896
+ return s;
2897
+ }
2898
+ } else {
2899
+ return s;
2900
+ }
2901
+ // Check Data blocks
2902
+ IndexBlockIter iiter_on_stack;
2903
+ BlockCacheLookupContext context{caller};
2904
+ InternalIteratorBase<IndexValue>* iiter = NewIndexIterator(
2905
+ read_options, /*disable_prefix_seek=*/false, &iiter_on_stack,
2906
+ /*get_context=*/nullptr, &context);
2907
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
2908
+ if (iiter != &iiter_on_stack) {
2909
+ iiter_unique_ptr = std::unique_ptr<InternalIteratorBase<IndexValue>>(iiter);
2910
+ }
2911
+ if (!iiter->status().ok()) {
2912
+ // error opening index iterator
2913
+ return iiter->status();
2914
+ }
2915
+ s = VerifyChecksumInBlocks(read_options, iiter);
2916
+ return s;
2917
+ }
2918
+
2919
+ Status BlockBasedTable::VerifyChecksumInBlocks(
2920
+ const ReadOptions& read_options,
2921
+ InternalIteratorBase<IndexValue>* index_iter) {
2922
+ Status s;
2923
+ // We are scanning the whole file, so no need to do exponential
2924
+ // increasing of the buffer size.
2925
+ size_t readahead_size = (read_options.readahead_size != 0)
2926
+ ? read_options.readahead_size
2927
+ : kMaxAutoReadaheadSize;
2928
+ // FilePrefetchBuffer doesn't work in mmap mode and readahead is not
2929
+ // needed there.
2930
+ FilePrefetchBuffer prefetch_buffer(
2931
+ rep_->file.get(), readahead_size /* readadhead_size */,
2932
+ readahead_size /* max_readahead_size */,
2933
+ !rep_->ioptions.allow_mmap_reads /* enable */);
2934
+
2935
+ for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) {
2936
+ s = index_iter->status();
2937
+ if (!s.ok()) {
2938
+ break;
2939
+ }
2940
+ BlockHandle handle = index_iter->value().handle;
2941
+ BlockContents contents;
2942
+ BlockFetcher block_fetcher(
2943
+ rep_->file.get(), &prefetch_buffer, rep_->footer, ReadOptions(), handle,
2944
+ &contents, rep_->ioptions, false /* decompress */,
2945
+ false /*maybe_compressed*/, BlockType::kData,
2946
+ UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
2947
+ s = block_fetcher.ReadBlockContents();
2948
+ if (!s.ok()) {
2949
+ break;
2950
+ }
2951
+ }
2952
+ if (s.ok()) {
2953
+ // In the case of two level indexes, we would have exited the above loop
2954
+ // by checking index_iter->Valid(), but Valid() might have returned false
2955
+ // due to an IO error. So check the index_iter status
2956
+ s = index_iter->status();
2957
+ }
2958
+ return s;
2959
+ }
2960
+
2961
+ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
2962
+ const Slice& meta_block_name) {
2963
+ if (meta_block_name.starts_with(kFilterBlockPrefix) ||
2964
+ meta_block_name.starts_with(kFullFilterBlockPrefix) ||
2965
+ meta_block_name.starts_with(kPartitionedFilterBlockPrefix)) {
2966
+ return BlockType::kFilter;
2967
+ }
2968
+
2969
+ if (meta_block_name == kPropertiesBlock) {
2970
+ return BlockType::kProperties;
2971
+ }
2972
+
2973
+ if (meta_block_name == kCompressionDictBlock) {
2974
+ return BlockType::kCompressionDictionary;
2975
+ }
2976
+
2977
+ if (meta_block_name == kRangeDelBlock) {
2978
+ return BlockType::kRangeDeletion;
2979
+ }
2980
+
2981
+ if (meta_block_name == kHashIndexPrefixesBlock) {
2982
+ return BlockType::kHashIndexPrefixes;
2983
+ }
2984
+
2985
+ if (meta_block_name == kHashIndexPrefixesMetadataBlock) {
2986
+ return BlockType::kHashIndexMetadata;
2987
+ }
2988
+
2989
+ assert(false);
2990
+ return BlockType::kInvalid;
2991
+ }
2992
+
2993
+ Status BlockBasedTable::VerifyChecksumInMetaBlocks(
2994
+ InternalIteratorBase<Slice>* index_iter) {
2995
+ Status s;
2996
+ for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) {
2997
+ s = index_iter->status();
2998
+ if (!s.ok()) {
2999
+ break;
3000
+ }
3001
+ BlockHandle handle;
3002
+ Slice input = index_iter->value();
3003
+ s = handle.DecodeFrom(&input);
3004
+ BlockContents contents;
3005
+ const Slice meta_block_name = index_iter->key();
3006
+ BlockFetcher block_fetcher(
3007
+ rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
3008
+ ReadOptions(), handle, &contents, rep_->ioptions,
3009
+ false /* decompress */, false /*maybe_compressed*/,
3010
+ GetBlockTypeForMetaBlockByName(meta_block_name),
3011
+ UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options);
3012
+ s = block_fetcher.ReadBlockContents();
3013
+ if (s.IsCorruption() && meta_block_name == kPropertiesBlock) {
3014
+ TableProperties* table_properties;
3015
+ ReadOptions ro;
3016
+ s = TryReadPropertiesWithGlobalSeqno(ro, nullptr /* prefetch_buffer */,
3017
+ index_iter->value(),
3018
+ &table_properties);
3019
+ delete table_properties;
3020
+ }
3021
+ if (!s.ok()) {
3022
+ break;
3023
+ }
3024
+ }
3025
+ return s;
3026
+ }
3027
+
3028
+ bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const {
3029
+ assert(rep_ != nullptr);
3030
+
3031
+ Cache* const cache = rep_->table_options.block_cache.get();
3032
+ if (cache == nullptr) {
3033
+ return false;
3034
+ }
3035
+
3036
+ char cache_key_storage[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
3037
+ Slice cache_key =
3038
+ GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle,
3039
+ cache_key_storage);
3040
+
3041
+ Cache::Handle* const cache_handle = cache->Lookup(cache_key);
3042
+ if (cache_handle == nullptr) {
3043
+ return false;
3044
+ }
3045
+
3046
+ cache->Release(cache_handle);
3047
+
3048
+ return true;
3049
+ }
3050
+
3051
+ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
3052
+ const Slice& key) {
3053
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter(NewIndexIterator(
3054
+ options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr,
3055
+ /*get_context=*/nullptr, /*lookup_context=*/nullptr));
3056
+ iiter->Seek(key);
3057
+ assert(iiter->Valid());
3058
+
3059
+ return TEST_BlockInCache(iiter->value().handle);
3060
+ }
3061
+
3062
+ // REQUIRES: The following fields of rep_ should have already been populated:
3063
+ // 1. file
3064
+ // 2. index_handle,
3065
+ // 3. options
3066
+ // 4. internal_comparator
3067
+ // 5. index_type
3068
+ Status BlockBasedTable::CreateIndexReader(
3069
+ const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer,
3070
+ InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch,
3071
+ bool pin, BlockCacheLookupContext* lookup_context,
3072
+ std::unique_ptr<IndexReader>* index_reader) {
3073
+ // kHashSearch requires non-empty prefix_extractor but bypass checking
3074
+ // prefix_extractor here since we have no access to MutableCFOptions.
3075
+ // Add need_upper_bound_check flag in BlockBasedTable::NewIndexIterator.
3076
+ // If prefix_extractor does not match prefix_extractor_name from table
3077
+ // properties, turn off Hash Index by setting total_order_seek to true
3078
+
3079
+ switch (rep_->index_type) {
3080
+ case BlockBasedTableOptions::kTwoLevelIndexSearch: {
3081
+ return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache,
3082
+ prefetch, pin, lookup_context,
3083
+ index_reader);
3084
+ }
3085
+ case BlockBasedTableOptions::kBinarySearch:
3086
+ FALLTHROUGH_INTENDED;
3087
+ case BlockBasedTableOptions::kBinarySearchWithFirstKey: {
3088
+ return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
3089
+ use_cache, prefetch, pin,
3090
+ lookup_context, index_reader);
3091
+ }
3092
+ case BlockBasedTableOptions::kHashSearch: {
3093
+ std::unique_ptr<Block> metaindex_guard;
3094
+ std::unique_ptr<InternalIterator> metaindex_iter_guard;
3095
+ auto meta_index_iter = preloaded_meta_index_iter;
3096
+ bool should_fallback = false;
3097
+ if (rep_->internal_prefix_transform.get() == nullptr) {
3098
+ ROCKS_LOG_WARN(rep_->ioptions.info_log,
3099
+ "No prefix extractor passed in. Fall back to binary"
3100
+ " search index.");
3101
+ should_fallback = true;
3102
+ } else if (meta_index_iter == nullptr) {
3103
+ auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard,
3104
+ &metaindex_iter_guard);
3105
+ if (!s.ok()) {
3106
+ // we simply fall back to binary search in case there is any
3107
+ // problem with prefix hash index loading.
3108
+ ROCKS_LOG_WARN(rep_->ioptions.info_log,
3109
+ "Unable to read the metaindex block."
3110
+ " Fall back to binary search index.");
3111
+ should_fallback = true;
3112
+ }
3113
+ meta_index_iter = metaindex_iter_guard.get();
3114
+ }
3115
+
3116
+ if (should_fallback) {
3117
+ return BinarySearchIndexReader::Create(this, ro, prefetch_buffer,
3118
+ use_cache, prefetch, pin,
3119
+ lookup_context, index_reader);
3120
+ } else {
3121
+ return HashIndexReader::Create(this, ro, prefetch_buffer,
3122
+ meta_index_iter, use_cache, prefetch,
3123
+ pin, lookup_context, index_reader);
3124
+ }
3125
+ }
3126
+ default: {
3127
+ std::string error_message =
3128
+ "Unrecognized index type: " + ToString(rep_->index_type);
3129
+ return Status::InvalidArgument(error_message.c_str());
3130
+ }
3131
+ }
3132
+ }
3133
+
3134
+ uint64_t BlockBasedTable::ApproximateDataOffsetOf(
3135
+ const InternalIteratorBase<IndexValue>& index_iter,
3136
+ uint64_t data_size) const {
3137
+ if (index_iter.Valid()) {
3138
+ BlockHandle handle = index_iter.value().handle;
3139
+ return handle.offset();
3140
+ } else {
3141
+ // The iterator is past the last key in the file.
3142
+ return data_size;
3143
+ }
3144
+ }
3145
+
3146
+ uint64_t BlockBasedTable::GetApproximateDataSize() {
3147
+ // Should be in table properties unless super old version
3148
+ if (rep_->table_properties) {
3149
+ return rep_->table_properties->data_size;
3150
+ }
3151
+ // Fall back to rough estimate from footer
3152
+ return rep_->footer.metaindex_handle().offset();
3153
+ }
3154
+
3155
+ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key,
3156
+ TableReaderCaller caller) {
3157
+ uint64_t data_size = GetApproximateDataSize();
3158
+ if (UNLIKELY(data_size == 0)) {
3159
+ // Hmm. Let's just split in half to avoid skewing one way or another,
3160
+ // since we don't know whether we're operating on lower bound or
3161
+ // upper bound.
3162
+ return rep_->file_size / 2;
3163
+ }
3164
+
3165
+ BlockCacheLookupContext context(caller);
3166
+ IndexBlockIter iiter_on_stack;
3167
+ ReadOptions ro;
3168
+ ro.total_order_seek = true;
3169
+ auto index_iter =
3170
+ NewIndexIterator(ro, /*disable_prefix_seek=*/true,
3171
+ /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
3172
+ /*lookup_context=*/&context);
3173
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
3174
+ if (index_iter != &iiter_on_stack) {
3175
+ iiter_unique_ptr.reset(index_iter);
3176
+ }
3177
+
3178
+ index_iter->Seek(key);
3179
+
3180
+ uint64_t offset = ApproximateDataOffsetOf(*index_iter, data_size);
3181
+ // Pro-rate file metadata (incl filters) size-proportionally across data
3182
+ // blocks.
3183
+ double size_ratio =
3184
+ static_cast<double>(offset) / static_cast<double>(data_size);
3185
+ return static_cast<uint64_t>(size_ratio *
3186
+ static_cast<double>(rep_->file_size));
3187
+ }
3188
+
3189
+ uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end,
3190
+ TableReaderCaller caller) {
3191
+ assert(rep_->internal_comparator.Compare(start, end) <= 0);
3192
+
3193
+ uint64_t data_size = GetApproximateDataSize();
3194
+ if (UNLIKELY(data_size == 0)) {
3195
+ // Hmm. Assume whole file is involved, since we have lower and upper
3196
+ // bound.
3197
+ return rep_->file_size;
3198
+ }
3199
+
3200
+ BlockCacheLookupContext context(caller);
3201
+ IndexBlockIter iiter_on_stack;
3202
+ ReadOptions ro;
3203
+ ro.total_order_seek = true;
3204
+ auto index_iter =
3205
+ NewIndexIterator(ro, /*disable_prefix_seek=*/true,
3206
+ /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr,
3207
+ /*lookup_context=*/&context);
3208
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
3209
+ if (index_iter != &iiter_on_stack) {
3210
+ iiter_unique_ptr.reset(index_iter);
3211
+ }
3212
+
3213
+ index_iter->Seek(start);
3214
+ uint64_t start_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3215
+ index_iter->Seek(end);
3216
+ uint64_t end_offset = ApproximateDataOffsetOf(*index_iter, data_size);
3217
+
3218
+ assert(end_offset >= start_offset);
3219
+ // Pro-rate file metadata (incl filters) size-proportionally across data
3220
+ // blocks.
3221
+ double size_ratio = static_cast<double>(end_offset - start_offset) /
3222
+ static_cast<double>(data_size);
3223
+ return static_cast<uint64_t>(size_ratio *
3224
+ static_cast<double>(rep_->file_size));
3225
+ }
3226
+
3227
+ bool BlockBasedTable::TEST_FilterBlockInCache() const {
3228
+ assert(rep_ != nullptr);
3229
+ return TEST_BlockInCache(rep_->filter_handle);
3230
+ }
3231
+
3232
+ bool BlockBasedTable::TEST_IndexBlockInCache() const {
3233
+ assert(rep_ != nullptr);
3234
+
3235
+ return TEST_BlockInCache(rep_->footer.index_handle());
3236
+ }
3237
+
3238
+ Status BlockBasedTable::GetKVPairsFromDataBlocks(
3239
+ std::vector<KVPairBlock>* kv_pair_blocks) {
3240
+ std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
3241
+ NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
3242
+ /*input_iter=*/nullptr, /*get_context=*/nullptr,
3243
+ /*lookup_contex=*/nullptr));
3244
+
3245
+ Status s = blockhandles_iter->status();
3246
+ if (!s.ok()) {
3247
+ // Cannot read Index Block
3248
+ return s;
3249
+ }
3250
+
3251
+ for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
3252
+ blockhandles_iter->Next()) {
3253
+ s = blockhandles_iter->status();
3254
+
3255
+ if (!s.ok()) {
3256
+ break;
3257
+ }
3258
+
3259
+ std::unique_ptr<InternalIterator> datablock_iter;
3260
+ datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
3261
+ ReadOptions(), blockhandles_iter->value().handle,
3262
+ /*input_iter=*/nullptr, /*type=*/BlockType::kData,
3263
+ /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(),
3264
+ /*prefetch_buffer=*/nullptr));
3265
+ s = datablock_iter->status();
3266
+
3267
+ if (!s.ok()) {
3268
+ // Error reading the block - Skipped
3269
+ continue;
3270
+ }
3271
+
3272
+ KVPairBlock kv_pair_block;
3273
+ for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
3274
+ datablock_iter->Next()) {
3275
+ s = datablock_iter->status();
3276
+ if (!s.ok()) {
3277
+ // Error reading the block - Skipped
3278
+ break;
3279
+ }
3280
+ const Slice& key = datablock_iter->key();
3281
+ const Slice& value = datablock_iter->value();
3282
+ std::string key_copy = std::string(key.data(), key.size());
3283
+ std::string value_copy = std::string(value.data(), value.size());
3284
+
3285
+ kv_pair_block.push_back(
3286
+ std::make_pair(std::move(key_copy), std::move(value_copy)));
3287
+ }
3288
+ kv_pair_blocks->push_back(std::move(kv_pair_block));
3289
+ }
3290
+ return Status::OK();
3291
+ }
3292
+
3293
+ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
3294
+ WritableFileStringStreamAdapter out_file_wrapper(out_file);
3295
+ std::ostream out_stream(&out_file_wrapper);
3296
+ // Output Footer
3297
+ out_stream << "Footer Details:\n"
3298
+ "--------------------------------------\n";
3299
+ out_stream << " " << rep_->footer.ToString() << "\n";
3300
+
3301
+ // Output MetaIndex
3302
+ out_stream << "Metaindex Details:\n"
3303
+ "--------------------------------------\n";
3304
+ std::unique_ptr<Block> metaindex;
3305
+ std::unique_ptr<InternalIterator> metaindex_iter;
3306
+ ReadOptions ro;
3307
+ Status s = ReadMetaIndexBlock(ro, nullptr /* prefetch_buffer */, &metaindex,
3308
+ &metaindex_iter);
3309
+ if (s.ok()) {
3310
+ for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
3311
+ metaindex_iter->Next()) {
3312
+ s = metaindex_iter->status();
3313
+ if (!s.ok()) {
3314
+ return s;
3315
+ }
3316
+ if (metaindex_iter->key() == kPropertiesBlock) {
3317
+ out_stream << " Properties block handle: "
3318
+ << metaindex_iter->value().ToString(true) << "\n";
3319
+ } else if (metaindex_iter->key() == kCompressionDictBlock) {
3320
+ out_stream << " Compression dictionary block handle: "
3321
+ << metaindex_iter->value().ToString(true) << "\n";
3322
+ } else if (strstr(metaindex_iter->key().ToString().c_str(),
3323
+ "filter.rocksdb.") != nullptr) {
3324
+ out_stream << " Filter block handle: "
3325
+ << metaindex_iter->value().ToString(true) << "\n";
3326
+ } else if (metaindex_iter->key() == kRangeDelBlock) {
3327
+ out_stream << " Range deletion block handle: "
3328
+ << metaindex_iter->value().ToString(true) << "\n";
3329
+ }
3330
+ }
3331
+ out_stream << "\n";
3332
+ } else {
3333
+ return s;
3334
+ }
3335
+
3336
+ // Output TableProperties
3337
+ const ROCKSDB_NAMESPACE::TableProperties* table_properties;
3338
+ table_properties = rep_->table_properties.get();
3339
+
3340
+ if (table_properties != nullptr) {
3341
+ out_stream << "Table Properties:\n"
3342
+ "--------------------------------------\n";
3343
+ out_stream << " " << table_properties->ToString("\n ", ": ") << "\n";
3344
+ }
3345
+
3346
+ if (rep_->filter) {
3347
+ out_stream << "Filter Details:\n"
3348
+ "--------------------------------------\n";
3349
+ out_stream << " " << rep_->filter->ToString() << "\n";
3350
+ }
3351
+
3352
+ // Output Index block
3353
+ s = DumpIndexBlock(out_stream);
3354
+ if (!s.ok()) {
3355
+ return s;
3356
+ }
3357
+
3358
+ // Output compression dictionary
3359
+ if (rep_->uncompression_dict_reader) {
3360
+ CachableEntry<UncompressionDict> uncompression_dict;
3361
+ s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
3362
+ nullptr /* prefetch_buffer */, false /* no_io */,
3363
+ nullptr /* get_context */, nullptr /* lookup_context */,
3364
+ &uncompression_dict);
3365
+ if (!s.ok()) {
3366
+ return s;
3367
+ }
3368
+
3369
+ assert(uncompression_dict.GetValue());
3370
+
3371
+ const Slice& raw_dict = uncompression_dict.GetValue()->GetRawDict();
3372
+ out_stream << "Compression Dictionary:\n"
3373
+ "--------------------------------------\n";
3374
+ out_stream << " size (bytes): " << raw_dict.size() << "\n\n";
3375
+ out_stream << " HEX " << raw_dict.ToString(true) << "\n\n";
3376
+ }
3377
+
3378
+ // Output range deletions block
3379
+ auto* range_del_iter = NewRangeTombstoneIterator(ReadOptions());
3380
+ if (range_del_iter != nullptr) {
3381
+ range_del_iter->SeekToFirst();
3382
+ if (range_del_iter->Valid()) {
3383
+ out_stream << "Range deletions:\n"
3384
+ "--------------------------------------\n";
3385
+ for (; range_del_iter->Valid(); range_del_iter->Next()) {
3386
+ DumpKeyValue(range_del_iter->key(), range_del_iter->value(),
3387
+ out_stream);
3388
+ }
3389
+ out_stream << "\n";
3390
+ }
3391
+ delete range_del_iter;
3392
+ }
3393
+ // Output Data blocks
3394
+ s = DumpDataBlocks(out_stream);
3395
+
3396
+ if (!s.ok()) {
3397
+ return s;
3398
+ }
3399
+
3400
+ if (!out_stream.good()) {
3401
+ return Status::IOError("Failed to write to output file");
3402
+ }
3403
+ return Status::OK();
3404
+ }
3405
+
3406
+ Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) {
3407
+ out_stream << "Index Details:\n"
3408
+ "--------------------------------------\n";
3409
+ std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
3410
+ NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
3411
+ /*input_iter=*/nullptr, /*get_context=*/nullptr,
3412
+ /*lookup_contex=*/nullptr));
3413
+ Status s = blockhandles_iter->status();
3414
+ if (!s.ok()) {
3415
+ out_stream << "Can not read Index Block \n\n";
3416
+ return s;
3417
+ }
3418
+
3419
+ out_stream << " Block key hex dump: Data block handle\n";
3420
+ out_stream << " Block key ascii\n\n";
3421
+ for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
3422
+ blockhandles_iter->Next()) {
3423
+ s = blockhandles_iter->status();
3424
+ if (!s.ok()) {
3425
+ break;
3426
+ }
3427
+ Slice key = blockhandles_iter->key();
3428
+ Slice user_key;
3429
+ InternalKey ikey;
3430
+ if (!rep_->index_key_includes_seq) {
3431
+ user_key = key;
3432
+ } else {
3433
+ ikey.DecodeFrom(key);
3434
+ user_key = ikey.user_key();
3435
+ }
3436
+
3437
+ out_stream << " HEX " << user_key.ToString(true) << ": "
3438
+ << blockhandles_iter->value().ToString(true,
3439
+ rep_->index_has_first_key)
3440
+ << "\n";
3441
+
3442
+ std::string str_key = user_key.ToString();
3443
+ std::string res_key("");
3444
+ char cspace = ' ';
3445
+ for (size_t i = 0; i < str_key.size(); i++) {
3446
+ res_key.append(&str_key[i], 1);
3447
+ res_key.append(1, cspace);
3448
+ }
3449
+ out_stream << " ASCII " << res_key << "\n";
3450
+ out_stream << " ------\n";
3451
+ }
3452
+ out_stream << "\n";
3453
+ return Status::OK();
3454
+ }
3455
+
3456
+ Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) {
3457
+ std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
3458
+ NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false,
3459
+ /*input_iter=*/nullptr, /*get_context=*/nullptr,
3460
+ /*lookup_contex=*/nullptr));
3461
+ Status s = blockhandles_iter->status();
3462
+ if (!s.ok()) {
3463
+ out_stream << "Can not read Index Block \n\n";
3464
+ return s;
3465
+ }
3466
+
3467
+ uint64_t datablock_size_min = std::numeric_limits<uint64_t>::max();
3468
+ uint64_t datablock_size_max = 0;
3469
+ uint64_t datablock_size_sum = 0;
3470
+
3471
+ size_t block_id = 1;
3472
+ for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
3473
+ block_id++, blockhandles_iter->Next()) {
3474
+ s = blockhandles_iter->status();
3475
+ if (!s.ok()) {
3476
+ break;
3477
+ }
3478
+
3479
+ BlockHandle bh = blockhandles_iter->value().handle;
3480
+ uint64_t datablock_size = bh.size();
3481
+ datablock_size_min = std::min(datablock_size_min, datablock_size);
3482
+ datablock_size_max = std::max(datablock_size_max, datablock_size);
3483
+ datablock_size_sum += datablock_size;
3484
+
3485
+ out_stream << "Data Block # " << block_id << " @ "
3486
+ << blockhandles_iter->value().handle.ToString(true) << "\n";
3487
+ out_stream << "--------------------------------------\n";
3488
+
3489
+ std::unique_ptr<InternalIterator> datablock_iter;
3490
+ datablock_iter.reset(NewDataBlockIterator<DataBlockIter>(
3491
+ ReadOptions(), blockhandles_iter->value().handle,
3492
+ /*input_iter=*/nullptr, /*type=*/BlockType::kData,
3493
+ /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(),
3494
+ /*prefetch_buffer=*/nullptr));
3495
+ s = datablock_iter->status();
3496
+
3497
+ if (!s.ok()) {
3498
+ out_stream << "Error reading the block - Skipped \n\n";
3499
+ continue;
3500
+ }
3501
+
3502
+ for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
3503
+ datablock_iter->Next()) {
3504
+ s = datablock_iter->status();
3505
+ if (!s.ok()) {
3506
+ out_stream << "Error reading the block - Skipped \n";
3507
+ break;
3508
+ }
3509
+ DumpKeyValue(datablock_iter->key(), datablock_iter->value(), out_stream);
3510
+ }
3511
+ out_stream << "\n";
3512
+ }
3513
+
3514
+ uint64_t num_datablocks = block_id - 1;
3515
+ if (num_datablocks) {
3516
+ double datablock_size_avg =
3517
+ static_cast<double>(datablock_size_sum) / num_datablocks;
3518
+ out_stream << "Data Block Summary:\n";
3519
+ out_stream << "--------------------------------------\n";
3520
+ out_stream << " # data blocks: " << num_datablocks << "\n";
3521
+ out_stream << " min data block size: " << datablock_size_min << "\n";
3522
+ out_stream << " max data block size: " << datablock_size_max << "\n";
3523
+ out_stream << " avg data block size: " << ToString(datablock_size_avg)
3524
+ << "\n";
3525
+ }
3526
+
3527
+ return Status::OK();
3528
+ }
3529
+
3530
+ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value,
3531
+ std::ostream& out_stream) {
3532
+ InternalKey ikey;
3533
+ ikey.DecodeFrom(key);
3534
+
3535
+ out_stream << " HEX " << ikey.user_key().ToString(true) << ": "
3536
+ << value.ToString(true) << "\n";
3537
+
3538
+ std::string str_key = ikey.user_key().ToString();
3539
+ std::string str_value = value.ToString();
3540
+ std::string res_key(""), res_value("");
3541
+ char cspace = ' ';
3542
+ for (size_t i = 0; i < str_key.size(); i++) {
3543
+ if (str_key[i] == '\0') {
3544
+ res_key.append("\\0", 2);
3545
+ } else {
3546
+ res_key.append(&str_key[i], 1);
3547
+ }
3548
+ res_key.append(1, cspace);
3549
+ }
3550
+ for (size_t i = 0; i < str_value.size(); i++) {
3551
+ if (str_value[i] == '\0') {
3552
+ res_value.append("\\0", 2);
3553
+ } else {
3554
+ res_value.append(&str_value[i], 1);
3555
+ }
3556
+ res_value.append(1, cspace);
3557
+ }
3558
+
3559
+ out_stream << " ASCII " << res_key << ": " << res_value << "\n";
3560
+ out_stream << " ------\n";
3561
+ }
3562
+
3563
+ } // namespace ROCKSDB_NAMESPACE