@nxtedition/rocksdb 6.0.2 → 7.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/BUILDING.md +12 -4
  2. package/binding.cc +589 -128
  3. package/chained-batch.js +6 -6
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +9 -0
  5. package/deps/rocksdb/rocksdb/Makefile +16 -5
  6. package/deps/rocksdb/rocksdb/TARGETS +23 -2
  7. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  8. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  9. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  10. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  11. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  12. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  13. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  14. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
  15. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  16. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  17. package/deps/rocksdb/rocksdb/db/builder.cc +12 -4
  18. package/deps/rocksdb/rocksdb/db/c.cc +26 -0
  19. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  20. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -2
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +29 -6
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +8 -2
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +16 -4
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2 -1
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +16 -0
  26. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +402 -30
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +14 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -5
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +33 -7
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +54 -23
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +8 -1
  35. package/deps/rocksdb/rocksdb/db/db_options_test.cc +16 -0
  36. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +14 -15
  37. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +331 -0
  38. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +5 -0
  39. package/deps/rocksdb/rocksdb/db/db_test.cc +16 -0
  40. package/deps/rocksdb/rocksdb/db/db_test2.cc +221 -92
  41. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -2
  42. package/deps/rocksdb/rocksdb/db/db_test_util.h +4 -2
  43. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +1 -171
  44. package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.cc +96 -0
  45. package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.h +126 -0
  46. package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
  47. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +57 -0
  48. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -2
  49. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +2 -0
  50. package/deps/rocksdb/rocksdb/db/flush_job.cc +10 -11
  51. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +11 -1
  52. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
  53. package/deps/rocksdb/rocksdb/db/repair.cc +12 -1
  54. package/deps/rocksdb/rocksdb/db/repair_test.cc +32 -10
  55. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +3 -1
  56. package/deps/rocksdb/rocksdb/db/table_cache.cc +19 -127
  57. package/deps/rocksdb/rocksdb/db/table_cache.h +3 -2
  58. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +140 -0
  59. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +130 -128
  60. package/deps/rocksdb/rocksdb/db/version_edit.cc +20 -0
  61. package/deps/rocksdb/rocksdb/db/version_edit.h +13 -4
  62. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +14 -14
  63. package/deps/rocksdb/rocksdb/db/version_set.cc +205 -212
  64. package/deps/rocksdb/rocksdb/db/version_set.h +11 -0
  65. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +154 -0
  66. package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -9
  67. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
  68. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  69. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +15 -0
  70. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +159 -65
  71. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +43 -21
  72. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +142 -17
  73. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +23 -27
  74. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -3
  75. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +23 -5
  76. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
  77. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  78. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +17 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +2 -0
  80. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +4 -1
  81. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +189 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  84. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  85. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -1
  86. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -0
  87. package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
  88. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  89. package/deps/rocksdb/rocksdb/options/options.cc +7 -0
  90. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  91. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -4
  92. package/deps/rocksdb/rocksdb/options/options_test.cc +107 -9
  93. package/deps/rocksdb/rocksdb/src.mk +4 -1
  94. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +9 -4
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +80 -6
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +8 -2
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +81 -757
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +21 -15
  99. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +9 -3
  100. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +754 -0
  101. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +2 -1
  102. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +8 -0
  103. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -10
  104. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +59 -1
  105. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +18 -0
  106. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +0 -61
  107. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +0 -13
  108. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -2
  110. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +2 -2
  111. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  112. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -1
  113. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +60 -2
  114. package/deps/rocksdb/rocksdb/table/block_fetcher.h +2 -0
  115. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +39 -0
  116. package/deps/rocksdb/rocksdb/table/multiget_context.h +46 -2
  117. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +2 -1
  118. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -1
  119. package/deps/rocksdb/rocksdb/table/table_reader.h +13 -0
  120. package/deps/rocksdb/rocksdb/table/unique_id.cc +27 -0
  121. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +3 -0
  122. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
  123. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -7
  124. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +9 -1
  125. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +72 -0
  126. package/deps/rocksdb/rocksdb/util/async_file_reader.h +144 -0
  127. package/deps/rocksdb/rocksdb/util/compression.h +49 -0
  128. package/deps/rocksdb/rocksdb/util/coro_utils.h +111 -0
  129. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +55 -0
  130. package/deps/rocksdb/rocksdb.gyp +16 -15
  131. package/index.js +186 -3
  132. package/iterator.js +1 -0
  133. package/package-lock.json +23687 -0
  134. package/package.json +2 -30
  135. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  136. package/deps/liburing/liburing/README +0 -46
  137. package/deps/liburing/liburing/test/232c93d07b74-test.c +0 -305
  138. package/deps/liburing/liburing/test/35fa71a030ca-test.c +0 -329
  139. package/deps/liburing/liburing/test/500f9fbadef8-test.c +0 -89
  140. package/deps/liburing/liburing/test/7ad0e4b2f83c-test.c +0 -93
  141. package/deps/liburing/liburing/test/8a9973408177-test.c +0 -106
  142. package/deps/liburing/liburing/test/917257daa0fe-test.c +0 -53
  143. package/deps/liburing/liburing/test/Makefile +0 -312
  144. package/deps/liburing/liburing/test/a0908ae19763-test.c +0 -58
  145. package/deps/liburing/liburing/test/a4c0b3decb33-test.c +0 -180
  146. package/deps/liburing/liburing/test/accept-link.c +0 -251
  147. package/deps/liburing/liburing/test/accept-reuse.c +0 -164
  148. package/deps/liburing/liburing/test/accept-test.c +0 -79
  149. package/deps/liburing/liburing/test/accept.c +0 -476
  150. package/deps/liburing/liburing/test/across-fork.c +0 -283
  151. package/deps/liburing/liburing/test/b19062a56726-test.c +0 -53
  152. package/deps/liburing/liburing/test/b5837bd5311d-test.c +0 -77
  153. package/deps/liburing/liburing/test/ce593a6c480a-test.c +0 -135
  154. package/deps/liburing/liburing/test/close-opath.c +0 -122
  155. package/deps/liburing/liburing/test/config +0 -10
  156. package/deps/liburing/liburing/test/connect.c +0 -398
  157. package/deps/liburing/liburing/test/cq-full.c +0 -96
  158. package/deps/liburing/liburing/test/cq-overflow.c +0 -294
  159. package/deps/liburing/liburing/test/cq-peek-batch.c +0 -102
  160. package/deps/liburing/liburing/test/cq-ready.c +0 -94
  161. package/deps/liburing/liburing/test/cq-size.c +0 -58
  162. package/deps/liburing/liburing/test/d4ae271dfaae-test.c +0 -96
  163. package/deps/liburing/liburing/test/d77a67ed5f27-test.c +0 -65
  164. package/deps/liburing/liburing/test/defer.c +0 -307
  165. package/deps/liburing/liburing/test/double-poll-crash.c +0 -186
  166. package/deps/liburing/liburing/test/eeed8b54e0df-test.c +0 -114
  167. package/deps/liburing/liburing/test/empty-eownerdead.c +0 -42
  168. package/deps/liburing/liburing/test/eventfd-disable.c +0 -151
  169. package/deps/liburing/liburing/test/eventfd-ring.c +0 -97
  170. package/deps/liburing/liburing/test/eventfd.c +0 -112
  171. package/deps/liburing/liburing/test/fadvise.c +0 -202
  172. package/deps/liburing/liburing/test/fallocate.c +0 -249
  173. package/deps/liburing/liburing/test/fc2a85cb02ef-test.c +0 -138
  174. package/deps/liburing/liburing/test/file-register.c +0 -843
  175. package/deps/liburing/liburing/test/file-update.c +0 -173
  176. package/deps/liburing/liburing/test/files-exit-hang-poll.c +0 -128
  177. package/deps/liburing/liburing/test/files-exit-hang-timeout.c +0 -134
  178. package/deps/liburing/liburing/test/fixed-link.c +0 -90
  179. package/deps/liburing/liburing/test/fsync.c +0 -224
  180. package/deps/liburing/liburing/test/hardlink.c +0 -136
  181. package/deps/liburing/liburing/test/helpers.c +0 -135
  182. package/deps/liburing/liburing/test/helpers.h +0 -67
  183. package/deps/liburing/liburing/test/io-cancel.c +0 -537
  184. package/deps/liburing/liburing/test/io_uring_enter.c +0 -296
  185. package/deps/liburing/liburing/test/io_uring_register.c +0 -664
  186. package/deps/liburing/liburing/test/io_uring_setup.c +0 -192
  187. package/deps/liburing/liburing/test/iopoll.c +0 -366
  188. package/deps/liburing/liburing/test/lfs-openat-write.c +0 -117
  189. package/deps/liburing/liburing/test/lfs-openat.c +0 -273
  190. package/deps/liburing/liburing/test/link-timeout.c +0 -1107
  191. package/deps/liburing/liburing/test/link.c +0 -496
  192. package/deps/liburing/liburing/test/link_drain.c +0 -229
  193. package/deps/liburing/liburing/test/madvise.c +0 -195
  194. package/deps/liburing/liburing/test/mkdir.c +0 -108
  195. package/deps/liburing/liburing/test/multicqes_drain.c +0 -383
  196. package/deps/liburing/liburing/test/nop-all-sizes.c +0 -107
  197. package/deps/liburing/liburing/test/nop.c +0 -115
  198. package/deps/liburing/liburing/test/open-close.c +0 -146
  199. package/deps/liburing/liburing/test/openat2.c +0 -240
  200. package/deps/liburing/liburing/test/personality.c +0 -204
  201. package/deps/liburing/liburing/test/pipe-eof.c +0 -81
  202. package/deps/liburing/liburing/test/pipe-reuse.c +0 -105
  203. package/deps/liburing/liburing/test/poll-cancel-ton.c +0 -139
  204. package/deps/liburing/liburing/test/poll-cancel.c +0 -135
  205. package/deps/liburing/liburing/test/poll-link.c +0 -227
  206. package/deps/liburing/liburing/test/poll-many.c +0 -208
  207. package/deps/liburing/liburing/test/poll-mshot-update.c +0 -273
  208. package/deps/liburing/liburing/test/poll-ring.c +0 -48
  209. package/deps/liburing/liburing/test/poll-v-poll.c +0 -353
  210. package/deps/liburing/liburing/test/poll.c +0 -109
  211. package/deps/liburing/liburing/test/probe.c +0 -137
  212. package/deps/liburing/liburing/test/read-write.c +0 -876
  213. package/deps/liburing/liburing/test/register-restrictions.c +0 -633
  214. package/deps/liburing/liburing/test/rename.c +0 -134
  215. package/deps/liburing/liburing/test/ring-leak.c +0 -173
  216. package/deps/liburing/liburing/test/ring-leak2.c +0 -249
  217. package/deps/liburing/liburing/test/rsrc_tags.c +0 -449
  218. package/deps/liburing/liburing/test/runtests-loop.sh +0 -16
  219. package/deps/liburing/liburing/test/runtests.sh +0 -170
  220. package/deps/liburing/liburing/test/rw_merge_test.c +0 -97
  221. package/deps/liburing/liburing/test/self.c +0 -91
  222. package/deps/liburing/liburing/test/send_recv.c +0 -291
  223. package/deps/liburing/liburing/test/send_recvmsg.c +0 -345
  224. package/deps/liburing/liburing/test/sendmsg_fs_cve.c +0 -198
  225. package/deps/liburing/liburing/test/shared-wq.c +0 -84
  226. package/deps/liburing/liburing/test/short-read.c +0 -75
  227. package/deps/liburing/liburing/test/shutdown.c +0 -163
  228. package/deps/liburing/liburing/test/sigfd-deadlock.c +0 -74
  229. package/deps/liburing/liburing/test/socket-rw-eagain.c +0 -156
  230. package/deps/liburing/liburing/test/socket-rw.c +0 -147
  231. package/deps/liburing/liburing/test/splice.c +0 -511
  232. package/deps/liburing/liburing/test/sq-full-cpp.cc +0 -45
  233. package/deps/liburing/liburing/test/sq-full.c +0 -45
  234. package/deps/liburing/liburing/test/sq-poll-dup.c +0 -200
  235. package/deps/liburing/liburing/test/sq-poll-kthread.c +0 -168
  236. package/deps/liburing/liburing/test/sq-poll-share.c +0 -137
  237. package/deps/liburing/liburing/test/sq-space_left.c +0 -159
  238. package/deps/liburing/liburing/test/sqpoll-cancel-hang.c +0 -159
  239. package/deps/liburing/liburing/test/sqpoll-disable-exit.c +0 -195
  240. package/deps/liburing/liburing/test/sqpoll-exit-hang.c +0 -77
  241. package/deps/liburing/liburing/test/sqpoll-sleep.c +0 -68
  242. package/deps/liburing/liburing/test/statx.c +0 -172
  243. package/deps/liburing/liburing/test/stdout.c +0 -232
  244. package/deps/liburing/liburing/test/submit-link-fail.c +0 -154
  245. package/deps/liburing/liburing/test/submit-reuse.c +0 -239
  246. package/deps/liburing/liburing/test/symlink.c +0 -116
  247. package/deps/liburing/liburing/test/teardowns.c +0 -58
  248. package/deps/liburing/liburing/test/thread-exit.c +0 -131
  249. package/deps/liburing/liburing/test/timeout-new.c +0 -246
  250. package/deps/liburing/liburing/test/timeout-overflow.c +0 -204
  251. package/deps/liburing/liburing/test/timeout.c +0 -1354
  252. package/deps/liburing/liburing/test/unlink.c +0 -111
  253. package/deps/liburing/liburing/test/wakeup-hang.c +0 -162
  254. package/deps/rocksdb/rocksdb/README.md +0 -32
  255. package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
  256. package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
  257. package/deps/rocksdb/rocksdb/port/README +0 -10
  258. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  259. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -0,0 +1,154 @@
1
+ // Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "util/coro_utils.h"
7
+
8
+ #if defined(WITHOUT_COROUTINES) || \
9
+ (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
10
+
11
+ namespace ROCKSDB_NAMESPACE {
12
+
13
+ // Lookup a batch of keys in a single SST file
14
+ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
15
+ (const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
16
+ bool is_hit_file_last_in_level, FdWithKeyRange* f,
17
+ std::unordered_map<uint64_t, BlobReadRequests>& blob_rqs,
18
+ uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_data_read,
19
+ uint64_t& num_sst_read) {
20
+ bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
21
+ get_perf_context()->per_level_perf_context_enabled;
22
+
23
+ Status s;
24
+ StopWatchNano timer(clock_, timer_enabled /* auto_start */);
25
+ s = CO_AWAIT(table_cache_->MultiGet)(
26
+ read_options, *internal_comparator(), *f->file_metadata, &file_range,
27
+ mutable_cf_options_.prefix_extractor,
28
+ cfd_->internal_stats()->GetFileReadHist(hit_file_level),
29
+ IsFilterSkipped(static_cast<int>(hit_file_level),
30
+ is_hit_file_last_in_level),
31
+ hit_file_level);
32
+ // TODO: examine the behavior for corrupted key
33
+ if (timer_enabled) {
34
+ PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
35
+ hit_file_level);
36
+ }
37
+ if (!s.ok()) {
38
+ // TODO: Set status for individual keys appropriately
39
+ for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
40
+ *iter->s = s;
41
+ file_range.MarkKeyDone(iter);
42
+ }
43
+ CO_RETURN s;
44
+ }
45
+ uint64_t batch_size = 0;
46
+ for (auto iter = file_range.begin(); s.ok() && iter != file_range.end();
47
+ ++iter) {
48
+ GetContext& get_context = *iter->get_context;
49
+ Status* status = iter->s;
50
+ // The Status in the KeyContext takes precedence over GetContext state
51
+ // Status may be an error if there were any IO errors in the table
52
+ // reader. We never expect Status to be NotFound(), as that is
53
+ // determined by get_context
54
+ assert(!status->IsNotFound());
55
+ if (!status->ok()) {
56
+ file_range.MarkKeyDone(iter);
57
+ continue;
58
+ }
59
+
60
+ if (get_context.sample()) {
61
+ sample_file_read_inc(f->file_metadata);
62
+ }
63
+ batch_size++;
64
+ num_index_read += get_context.get_context_stats_.num_index_read;
65
+ num_filter_read += get_context.get_context_stats_.num_filter_read;
66
+ num_data_read += get_context.get_context_stats_.num_data_read;
67
+ num_sst_read += get_context.get_context_stats_.num_sst_read;
68
+ // Reset these stats since they're specific to a level
69
+ get_context.get_context_stats_.num_index_read = 0;
70
+ get_context.get_context_stats_.num_filter_read = 0;
71
+ get_context.get_context_stats_.num_data_read = 0;
72
+ get_context.get_context_stats_.num_sst_read = 0;
73
+
74
+ // report the counters before returning
75
+ if (get_context.State() != GetContext::kNotFound &&
76
+ get_context.State() != GetContext::kMerge &&
77
+ db_statistics_ != nullptr) {
78
+ get_context.ReportCounters();
79
+ } else {
80
+ if (iter->max_covering_tombstone_seq > 0) {
81
+ // The remaining files we look at will only contain covered keys, so
82
+ // we stop here for this key
83
+ file_range.SkipKey(iter);
84
+ }
85
+ }
86
+ switch (get_context.State()) {
87
+ case GetContext::kNotFound:
88
+ // Keep searching in other files
89
+ break;
90
+ case GetContext::kMerge:
91
+ // TODO: update per-level perfcontext user_key_return_count for kMerge
92
+ break;
93
+ case GetContext::kFound:
94
+ if (hit_file_level == 0) {
95
+ RecordTick(db_statistics_, GET_HIT_L0);
96
+ } else if (hit_file_level == 1) {
97
+ RecordTick(db_statistics_, GET_HIT_L1);
98
+ } else if (hit_file_level >= 2) {
99
+ RecordTick(db_statistics_, GET_HIT_L2_AND_UP);
100
+ }
101
+
102
+ PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, hit_file_level);
103
+
104
+ file_range.MarkKeyDone(iter);
105
+
106
+ if (iter->is_blob_index) {
107
+ if (iter->value) {
108
+ TEST_SYNC_POINT_CALLBACK("Version::MultiGet::TamperWithBlobIndex",
109
+ &(*iter));
110
+
111
+ const Slice& blob_index_slice = *(iter->value);
112
+ BlobIndex blob_index;
113
+ Status tmp_s = blob_index.DecodeFrom(blob_index_slice);
114
+ if (tmp_s.ok()) {
115
+ const uint64_t blob_file_num = blob_index.file_number();
116
+ blob_rqs[blob_file_num].emplace_back(
117
+ std::make_pair(blob_index, std::cref(*iter)));
118
+ } else {
119
+ *(iter->s) = tmp_s;
120
+ }
121
+ }
122
+ } else {
123
+ file_range.AddValueSize(iter->value->size());
124
+ if (file_range.GetValueSize() > read_options.value_size_soft_limit) {
125
+ s = Status::Aborted();
126
+ break;
127
+ }
128
+ }
129
+ continue;
130
+ case GetContext::kDeleted:
131
+ // Use empty error message for speed
132
+ *status = Status::NotFound();
133
+ file_range.MarkKeyDone(iter);
134
+ continue;
135
+ case GetContext::kCorrupt:
136
+ *status =
137
+ Status::Corruption("corrupted key for ", iter->lkey->user_key());
138
+ file_range.MarkKeyDone(iter);
139
+ continue;
140
+ case GetContext::kUnexpectedBlobIndex:
141
+ ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
142
+ *status = Status::NotSupported(
143
+ "Encounter unexpected blob index. Please open DB with "
144
+ "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
145
+ file_range.MarkKeyDone(iter);
146
+ continue;
147
+ }
148
+ }
149
+
150
+ RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
151
+ CO_RETURN s;
152
+ }
153
+ } // namespace ROCKSDB_NAMESPACE
154
+ #endif
@@ -18,6 +18,7 @@
18
18
  #include "rocksdb/file_system.h"
19
19
  #include "table/block_based/block_based_table_factory.h"
20
20
  #include "table/mock_table.h"
21
+ #include "table/unique_id_impl.h"
21
22
  #include "test_util/testharness.h"
22
23
  #include "test_util/testutil.h"
23
24
  #include "util/string_util.h"
@@ -49,7 +50,7 @@ class GenerateLevelFilesBriefTest : public testing::Test {
49
50
  kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
50
51
  kUnknownFileCreationTime, kUnknownFileChecksum,
51
52
  kUnknownFileChecksumFuncName, kDisableUserTimestamp,
52
- kDisableUserTimestamp);
53
+ kDisableUserTimestamp, kNullUniqueId64x2);
53
54
  files_.push_back(f);
54
55
  }
55
56
 
@@ -158,7 +159,7 @@ class VersionStorageInfoTestBase : public testing::Test {
158
159
  Temperature::kUnknown, oldest_blob_file_number,
159
160
  kUnknownOldestAncesterTime, kUnknownFileCreationTime,
160
161
  kUnknownFileChecksum, kUnknownFileChecksumFuncName,
161
- kDisableUserTimestamp, kDisableUserTimestamp);
162
+ kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
162
163
  f->compensated_file_size = file_size;
163
164
  vstorage_.AddFile(level, f);
164
165
  }
@@ -3222,11 +3223,11 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
3222
3223
  s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr);
3223
3224
  ASSERT_OK(s);
3224
3225
  ASSERT_NE(0, file_size);
3225
- file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
3226
- ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
3227
- 0, kUnknownFileChecksum,
3228
- kUnknownFileChecksumFuncName,
3229
- kDisableUserTimestamp, kDisableUserTimestamp);
3226
+ file_metas->emplace_back(
3227
+ file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false,
3228
+ Temperature::kUnknown, 0, 0, 0, kUnknownFileChecksum,
3229
+ kUnknownFileChecksumFuncName, kDisableUserTimestamp,
3230
+ kDisableUserTimestamp, kNullUniqueId64x2);
3230
3231
  }
3231
3232
  }
3232
3233
 
@@ -3282,7 +3283,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
3282
3283
  file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3283
3284
  largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3284
3285
  kUnknownFileChecksum, kUnknownFileChecksumFuncName,
3285
- kDisableUserTimestamp, kDisableUserTimestamp);
3286
+ kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
3286
3287
  added_files.emplace_back(0, meta);
3287
3288
  }
3288
3289
  WriteFileAdditionAndDeletionToManifest(
@@ -3338,7 +3339,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
3338
3339
  file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3339
3340
  largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3340
3341
  kUnknownFileChecksum, kUnknownFileChecksumFuncName,
3341
- kDisableUserTimestamp, kDisableUserTimestamp);
3342
+ kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
3342
3343
  added_files.emplace_back(0, meta);
3343
3344
  }
3344
3345
  WriteFileAdditionAndDeletionToManifest(
@@ -221,6 +221,7 @@ DECLARE_int32(compression_max_dict_bytes);
221
221
  DECLARE_int32(compression_zstd_max_train_bytes);
222
222
  DECLARE_int32(compression_parallel_threads);
223
223
  DECLARE_uint64(compression_max_dict_buffer_bytes);
224
+ DECLARE_bool(compression_use_zstd_dict_trainer);
224
225
  DECLARE_string(checksum_type);
225
226
  DECLARE_string(env_uri);
226
227
  DECLARE_string(fs_uri);
@@ -292,6 +293,7 @@ DECLARE_uint64(wp_commit_cache_bits);
292
293
  DECLARE_bool(adaptive_readahead);
293
294
  DECLARE_bool(async_io);
294
295
  DECLARE_string(wal_compression);
296
+ DECLARE_bool(verify_sst_unique_id_in_manifest);
295
297
 
296
298
  constexpr long KB = 1024;
297
299
  constexpr int kRandomValueMaxFactor = 3;
@@ -752,6 +752,13 @@ DEFINE_uint64(compression_max_dict_buffer_bytes, 0,
752
752
  "Buffering limit for SST file data to sample for dictionary "
753
753
  "compression.");
754
754
 
755
+ DEFINE_bool(
756
+ compression_use_zstd_dict_trainer, true,
757
+ "Use zstd's trainer to generate dictionary. If the options is false, "
758
+ "zstd's finalizeDictionary() API is used to generate dictionary. "
759
+ "ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, "
760
+ "this flag will have the default value true.");
761
+
755
762
  DEFINE_string(bottommost_compression_type, "disable",
756
763
  "Algorithm to use to compress bottommost level of the database. "
757
764
  "\"disable\" means disabling the feature");
@@ -952,4 +959,10 @@ DEFINE_bool(
952
959
  DEFINE_string(wal_compression, "none",
953
960
  "Algorithm to use for WAL compression. none to disable.");
954
961
 
962
+ DEFINE_bool(
963
+ verify_sst_unique_id_in_manifest, false,
964
+ "Enable DB options `verify_sst_unique_id_in_manifest`, if true, during "
965
+ "DB-open try verifying the SST unique id between MANIFEST and SST "
966
+ "properties.");
967
+
955
968
  #endif // GFLAGS
@@ -8,6 +8,7 @@
8
8
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
9
  //
10
10
 
11
+ #include "util/compression.h"
11
12
  #ifdef GFLAGS
12
13
  #include "db_stress_tool/db_stress_common.h"
13
14
  #include "db_stress_tool/db_stress_compaction_filter.h"
@@ -2315,6 +2316,8 @@ void StressTest::PrintEnv() const {
2315
2316
  static_cast<int>(FLAGS_user_timestamp_size));
2316
2317
  fprintf(stdout, "WAL compression : %s\n",
2317
2318
  FLAGS_wal_compression.c_str());
2319
+ fprintf(stdout, "Try verify sst unique id : %d\n",
2320
+ static_cast<int>(FLAGS_verify_sst_unique_id_in_manifest));
2318
2321
 
2319
2322
  fprintf(stdout, "------------------------------------------------\n");
2320
2323
  }
@@ -2913,6 +2916,16 @@ void InitializeOptionsFromFlags(
2913
2916
  FLAGS_compression_parallel_threads;
2914
2917
  options.compression_opts.max_dict_buffer_bytes =
2915
2918
  FLAGS_compression_max_dict_buffer_bytes;
2919
+ if (ZSTD_FinalizeDictionarySupported()) {
2920
+ options.compression_opts.use_zstd_dict_trainer =
2921
+ FLAGS_compression_use_zstd_dict_trainer;
2922
+ } else if (!FLAGS_compression_use_zstd_dict_trainer) {
2923
+ fprintf(
2924
+ stderr,
2925
+ "WARNING: use_zstd_dict_trainer is false but zstd finalizeDictionary "
2926
+ "cannot be used because ZSTD 1.4.5+ is not linked with the binary."
2927
+ " zstd dictionary trainer will be used.\n");
2928
+ }
2916
2929
  options.max_manifest_file_size = FLAGS_max_manifest_file_size;
2917
2930
  options.inplace_update_support = FLAGS_in_place_update;
2918
2931
  options.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
@@ -2941,6 +2954,8 @@ void InitializeOptionsFromFlags(
2941
2954
  options.level_compaction_dynamic_level_bytes =
2942
2955
  FLAGS_level_compaction_dynamic_level_bytes;
2943
2956
  options.track_and_verify_wals_in_manifest = true;
2957
+ options.verify_sst_unique_id_in_manifest =
2958
+ FLAGS_verify_sst_unique_id_in_manifest;
2944
2959
 
2945
2960
  // Integrated BlobDB
2946
2961
  options.enable_blob_files = FLAGS_enable_blob_files;
@@ -194,34 +194,7 @@ void FilePrefetchBuffer::CopyDataToBuffer(uint32_t src, uint64_t& offset,
194
194
  }
195
195
  }
196
196
 
197
- // If async_read = true:
198
- // async_read is enabled in case of sequential reads. So when
199
- // buffers are switched, we clear the curr_ buffer as we assume the data has
200
- // been consumed because of sequential reads.
201
- //
202
- // Scenarios for prefetching asynchronously:
203
- // Case1: If both buffers are empty, prefetch n bytes
204
- // synchronously in curr_
205
- // and prefetch readahead_size_/2 async in second buffer.
206
- // Case2: If second buffer has partial or full data, make it current and
207
- // prefetch readahead_size_/2 async in second buffer. In case of
208
- // partial data, prefetch remaining bytes from size n synchronously to
209
- // fulfill the requested bytes request.
210
- // Case3: If curr_ has partial data, prefetch remaining bytes from size n
211
- // synchronously in curr_ to fulfill the requested bytes request and
212
- // prefetch readahead_size_/2 bytes async in second buffer.
213
- // Case4: If data is in both buffers, copy requested data from curr_ and second
214
- // buffer to third buffer. If all requested bytes have been copied, do
215
- // the asynchronous prefetching in second buffer.
216
- Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
217
- RandomAccessFileReader* reader,
218
- uint64_t offset, size_t length,
219
- size_t readahead_size,
220
- Env::IOPriority rate_limiter_priority,
221
- bool& copy_to_third_buffer) {
222
- if (!enable_) {
223
- return Status::OK();
224
- }
197
+ void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) {
225
198
  if (async_read_in_progress_ && fs_ != nullptr) {
226
199
  // Wait for prefetch data to complete.
227
200
  // No mutex is needed as PrefetchAsyncCallback updates the result in second
@@ -242,11 +215,6 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
242
215
  del_fn_ = nullptr;
243
216
  }
244
217
 
245
- TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
246
- Status s;
247
- size_t prefetch_size = length + readahead_size;
248
-
249
- size_t alignment = reader->file()->GetRequiredBufferAlignment();
250
218
  // Index of second buffer.
251
219
  uint32_t second = curr_ ^ 1;
252
220
 
@@ -273,17 +241,55 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
273
241
  // outdated data and switch the buffers.
274
242
  bufs_[curr_].buffer_.Clear();
275
243
  curr_ = curr_ ^ 1;
276
- second = curr_ ^ 1;
277
244
  }
278
- // After swap check if all the requested bytes are in curr_, it will go for
279
- // async prefetching only.
245
+ }
246
+
247
+ // If async_read = true:
248
+ // async_read is enabled in case of sequential reads. So when
249
+ // buffers are switched, we clear the curr_ buffer as we assume the data has
250
+ // been consumed because of sequential reads.
251
+ //
252
+ // Scenarios for prefetching asynchronously:
253
+ // Case1: If both buffers are empty, prefetch n bytes
254
+ // synchronously in curr_
255
+ // and prefetch readahead_size_/2 async in second buffer.
256
+ // Case2: If second buffer has partial or full data, make it current and
257
+ // prefetch readahead_size_/2 async in second buffer. In case of
258
+ // partial data, prefetch remaining bytes from size n synchronously to
259
+ // fulfill the requested bytes request.
260
+ // Case3: If curr_ has partial data, prefetch remaining bytes from size n
261
+ // synchronously in curr_ to fulfill the requested bytes request and
262
+ // prefetch readahead_size_/2 bytes async in second buffer.
263
+ // Case4: If data is in both buffers, copy requested data from curr_ and second
264
+ // buffer to third buffer. If all requested bytes have been copied, do
265
+ // the asynchronous prefetching in second buffer.
266
+ Status FilePrefetchBuffer::PrefetchAsyncInternal(
267
+ const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
268
+ size_t length, size_t readahead_size, Env::IOPriority rate_limiter_priority,
269
+ bool& copy_to_third_buffer) {
270
+ if (!enable_) {
271
+ return Status::OK();
272
+ }
273
+
274
+ TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsyncInternal:Start");
275
+
276
+ PollAndUpdateBuffersIfNeeded(offset);
277
+
278
+ // If all the requested bytes are in curr_, it will go for async prefetching
279
+ // only.
280
280
  if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
281
281
  offset + length <=
282
282
  bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
283
283
  offset += length;
284
284
  length = 0;
285
- prefetch_size = readahead_size;
286
285
  }
286
+
287
+ Status s;
288
+ size_t prefetch_size = length + readahead_size;
289
+ size_t alignment = reader->file()->GetRequiredBufferAlignment();
290
+ // Index of second buffer.
291
+ uint32_t second = curr_ ^ 1;
292
+
287
293
  // Data is overlapping i.e. some of the data is in curr_ buffer and remaining
288
294
  // in second buffer.
289
295
  if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
@@ -315,9 +321,8 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
315
321
  prefetch_size = length + readahead_size;
316
322
  }
317
323
 
318
- // Update second again if swap happened.
319
- second = curr_ ^ 1;
320
324
  size_t _offset = static_cast<size_t>(offset);
325
+ second = curr_ ^ 1;
321
326
 
322
327
  // offset and size alignment for curr_ buffer with synchronous prefetching
323
328
  uint64_t rounddown_start1 = Rounddown(_offset, alignment);
@@ -442,12 +447,23 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
442
447
  bool FilePrefetchBuffer::TryReadFromCacheAsync(
443
448
  const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
444
449
  size_t n, Slice* result, Status* status,
445
- Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */
446
- ) {
450
+ Env::IOPriority rate_limiter_priority) {
451
+ assert(async_io_);
452
+
447
453
  if (track_min_offset_ && offset < min_offset_read_) {
448
454
  min_offset_read_ = static_cast<size_t>(offset);
449
455
  }
450
- if (!enable_ || (offset < bufs_[curr_].offset_)) {
456
+
457
+ if (!enable_) {
458
+ return false;
459
+ }
460
+
461
+ // In case of async_io_, offset can be less than bufs_[curr_].offset_ because
462
+ // of reads not sequential and PrefetchAsync can be called for any block and
463
+ // RocksDB will call TryReadFromCacheAsync after PrefetchAsync to Poll for
464
+ // requested bytes.
465
+ if (bufs_[curr_].buffer_.CurrentSize() > 0 && offset < bufs_[curr_].offset_ &&
466
+ prev_len_ != 0) {
451
467
  return false;
452
468
  }
453
469
 
@@ -459,35 +475,25 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
459
475
  // If readahead is not enabled: return false.
460
476
  TEST_SYNC_POINT_CALLBACK("FilePrefetchBuffer::TryReadFromCache",
461
477
  &readahead_size_);
462
- if (offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
478
+ if (offset < bufs_[curr_].offset_ ||
479
+ offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
463
480
  if (readahead_size_ > 0) {
464
481
  Status s;
465
482
  assert(reader != nullptr);
466
483
  assert(max_readahead_size_ >= readahead_size_);
467
- if (for_compaction) {
468
- s = Prefetch(opts, reader, offset, std::max(n, readahead_size_),
469
- rate_limiter_priority);
470
- } else {
471
- if (implicit_auto_readahead_) {
472
- if (!IsEligibleForPrefetch(offset, n)) {
473
- // Ignore status as Prefetch is not called.
474
- s.PermitUncheckedError();
475
- return false;
476
- }
477
- }
478
- // async prefetching is enabled if it's implicit_auto_readahead_ or
479
- // explicit readahead_size_ is passed along with ReadOptions.async_io =
480
- // true.
481
- if (async_io_) {
482
- // Prefetch n + readahead_size_/2 synchronously as remaining
483
- // readahead_size_/2 will be prefetched asynchronously.
484
- s = PrefetchAsync(opts, reader, offset, n, readahead_size_ / 2,
485
- rate_limiter_priority, copy_to_third_buffer);
486
- } else {
487
- s = Prefetch(opts, reader, offset, n + readahead_size_,
488
- rate_limiter_priority);
484
+
485
+ if (implicit_auto_readahead_) {
486
+ if (!IsEligibleForPrefetch(offset, n)) {
487
+ // Ignore status as Prefetch is not called.
488
+ s.PermitUncheckedError();
489
+ return false;
489
490
  }
490
491
  }
492
+
493
+ // Prefetch n + readahead_size_/2 synchronously as remaining
494
+ // readahead_size_/2 will be prefetched asynchronously.
495
+ s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2,
496
+ rate_limiter_priority, copy_to_third_buffer);
491
497
  if (!s.ok()) {
492
498
  if (status) {
493
499
  *status = s;
@@ -544,4 +550,92 @@ void FilePrefetchBuffer::PrefetchAsyncCallback(const FSReadRequest& req,
544
550
  bufs_[index].buffer_.Size(current_size + req.result.size());
545
551
  }
546
552
  }
553
+
554
+ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
555
+ RandomAccessFileReader* reader,
556
+ uint64_t offset, size_t n,
557
+ Env::IOPriority rate_limiter_priority,
558
+ Slice* result) {
559
+ assert(reader != nullptr);
560
+ if (!enable_) {
561
+ return Status::NotSupported();
562
+ }
563
+ TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
564
+
565
+ PollAndUpdateBuffersIfNeeded(offset);
566
+
567
+ // Index of second buffer.
568
+ uint32_t second = curr_ ^ 1;
569
+
570
+ // Since PrefetchAsync can be called on non sequential reads. So offset can
571
+ // be less than buffers' offset. In that case it clears the buffer and
572
+ // prefetch that block.
573
+ if (bufs_[curr_].buffer_.CurrentSize() > 0 && offset < bufs_[curr_].offset_) {
574
+ bufs_[curr_].buffer_.Clear();
575
+ }
576
+
577
+ // All requested bytes are already in the curr_ buffer. So no need to Read
578
+ // again.
579
+ if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
580
+ offset + n <= bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
581
+ uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
582
+ *result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
583
+ return Status::OK();
584
+ }
585
+
586
+ Status s;
587
+ size_t alignment = reader->file()->GetRequiredBufferAlignment();
588
+
589
+ // TODO akanksha: Handle the scenario if data is overlapping in 2 buffers.
590
+ // Currently, tt covers 2 scenarios. Either one buffer (curr_) has no data or
591
+ // it has partial data. It ignores the contents in second buffer (overlapping
592
+ // data in 2 buffers) and send the request to re-read that data again.
593
+
594
+ // Clear the second buffer in order to do asynchronous prefetching.
595
+ bufs_[second].buffer_.Clear();
596
+
597
+ size_t offset_to_read = static_cast<size_t>(offset);
598
+ uint64_t rounddown_start = 0;
599
+ uint64_t roundup_end = 0;
600
+
601
+ if (bufs_[curr_].buffer_.CurrentSize() == 0) {
602
+ // Prefetch full data.
603
+ rounddown_start = Rounddown(offset_to_read, alignment);
604
+ roundup_end = Roundup(offset_to_read + n, alignment);
605
+ } else {
606
+ // Prefetch remaining data.
607
+ size_t rem_length = n - (bufs_[curr_].buffer_.CurrentSize() -
608
+ (offset - bufs_[curr_].offset_));
609
+ rounddown_start = bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize();
610
+ roundup_end = Roundup(rounddown_start + rem_length, alignment);
611
+ }
612
+
613
+ uint64_t roundup_len = roundup_end - rounddown_start;
614
+ assert(roundup_len >= alignment);
615
+ assert(roundup_len % alignment == 0);
616
+
617
+ uint64_t chunk_len = 0;
618
+ CalculateOffsetAndLen(alignment, rounddown_start, roundup_len, second, false,
619
+ chunk_len);
620
+
621
+ // Update the buffer offset.
622
+ bufs_[second].offset_ = rounddown_start;
623
+ assert(roundup_len >= chunk_len);
624
+
625
+ size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
626
+
627
+ s = ReadAsync(opts, reader, rate_limiter_priority, read_len, chunk_len,
628
+ rounddown_start, second);
629
+
630
+ if (!s.ok()) {
631
+ return s;
632
+ }
633
+
634
+ // Update read pattern so that TryReadFromCacheAsync call be called to Poll
635
+ // the data. It will return without polling if blocks are not sequential.
636
+ UpdateReadPattern(offset, n, /*decrease_readaheadsize=*/false);
637
+ prev_len_ = 0;
638
+
639
+ return Status::TryAgain();
640
+ }
547
641
  } // namespace ROCKSDB_NAMESPACE