@nxtedition/rocksdb 6.0.2 → 7.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/BUILDING.md +12 -4
  2. package/binding.cc +589 -128
  3. package/chained-batch.js +6 -6
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +9 -0
  5. package/deps/rocksdb/rocksdb/Makefile +16 -5
  6. package/deps/rocksdb/rocksdb/TARGETS +23 -2
  7. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  8. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  9. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  10. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  11. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  12. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  13. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  14. package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
  15. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  16. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  17. package/deps/rocksdb/rocksdb/db/builder.cc +12 -4
  18. package/deps/rocksdb/rocksdb/db/c.cc +26 -0
  19. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  20. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -2
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +29 -6
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +8 -2
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +16 -4
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2 -1
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +16 -0
  26. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +402 -30
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +14 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -5
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +33 -7
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +54 -23
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +8 -1
  35. package/deps/rocksdb/rocksdb/db/db_options_test.cc +16 -0
  36. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +14 -15
  37. package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +331 -0
  38. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +5 -0
  39. package/deps/rocksdb/rocksdb/db/db_test.cc +16 -0
  40. package/deps/rocksdb/rocksdb/db/db_test2.cc +221 -92
  41. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -2
  42. package/deps/rocksdb/rocksdb/db/db_test_util.h +4 -2
  43. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +1 -171
  44. package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.cc +96 -0
  45. package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.h +126 -0
  46. package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
  47. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +57 -0
  48. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -2
  49. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +2 -0
  50. package/deps/rocksdb/rocksdb/db/flush_job.cc +10 -11
  51. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +11 -1
  52. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
  53. package/deps/rocksdb/rocksdb/db/repair.cc +12 -1
  54. package/deps/rocksdb/rocksdb/db/repair_test.cc +32 -10
  55. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +3 -1
  56. package/deps/rocksdb/rocksdb/db/table_cache.cc +19 -127
  57. package/deps/rocksdb/rocksdb/db/table_cache.h +3 -2
  58. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +140 -0
  59. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +130 -128
  60. package/deps/rocksdb/rocksdb/db/version_edit.cc +20 -0
  61. package/deps/rocksdb/rocksdb/db/version_edit.h +13 -4
  62. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +14 -14
  63. package/deps/rocksdb/rocksdb/db/version_set.cc +205 -212
  64. package/deps/rocksdb/rocksdb/db/version_set.h +11 -0
  65. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +154 -0
  66. package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -9
  67. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
  68. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
  69. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +15 -0
  70. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +159 -65
  71. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +43 -21
  72. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +142 -17
  73. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +23 -27
  74. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -3
  75. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +23 -5
  76. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
  77. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
  78. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +17 -0
  79. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +2 -0
  80. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +4 -1
  81. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +189 -0
  83. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  84. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  85. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -1
  86. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -0
  87. package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
  88. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  89. package/deps/rocksdb/rocksdb/options/options.cc +7 -0
  90. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
  91. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -4
  92. package/deps/rocksdb/rocksdb/options/options_test.cc +107 -9
  93. package/deps/rocksdb/rocksdb/src.mk +4 -1
  94. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +9 -4
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +80 -6
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +8 -2
  97. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +81 -757
  98. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +21 -15
  99. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +9 -3
  100. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +754 -0
  101. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +2 -1
  102. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +8 -0
  103. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -10
  104. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +59 -1
  105. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +18 -0
  106. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +0 -61
  107. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +0 -13
  108. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  109. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -2
  110. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +2 -2
  111. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  112. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -1
  113. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +60 -2
  114. package/deps/rocksdb/rocksdb/table/block_fetcher.h +2 -0
  115. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +39 -0
  116. package/deps/rocksdb/rocksdb/table/multiget_context.h +46 -2
  117. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +2 -1
  118. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -1
  119. package/deps/rocksdb/rocksdb/table/table_reader.h +13 -0
  120. package/deps/rocksdb/rocksdb/table/unique_id.cc +27 -0
  121. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +3 -0
  122. package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
  123. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -7
  124. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +9 -1
  125. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +72 -0
  126. package/deps/rocksdb/rocksdb/util/async_file_reader.h +144 -0
  127. package/deps/rocksdb/rocksdb/util/compression.h +49 -0
  128. package/deps/rocksdb/rocksdb/util/coro_utils.h +111 -0
  129. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +55 -0
  130. package/deps/rocksdb/rocksdb.gyp +16 -15
  131. package/index.js +186 -3
  132. package/iterator.js +1 -0
  133. package/package-lock.json +23687 -0
  134. package/package.json +2 -30
  135. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  136. package/deps/liburing/liburing/README +0 -46
  137. package/deps/liburing/liburing/test/232c93d07b74-test.c +0 -305
  138. package/deps/liburing/liburing/test/35fa71a030ca-test.c +0 -329
  139. package/deps/liburing/liburing/test/500f9fbadef8-test.c +0 -89
  140. package/deps/liburing/liburing/test/7ad0e4b2f83c-test.c +0 -93
  141. package/deps/liburing/liburing/test/8a9973408177-test.c +0 -106
  142. package/deps/liburing/liburing/test/917257daa0fe-test.c +0 -53
  143. package/deps/liburing/liburing/test/Makefile +0 -312
  144. package/deps/liburing/liburing/test/a0908ae19763-test.c +0 -58
  145. package/deps/liburing/liburing/test/a4c0b3decb33-test.c +0 -180
  146. package/deps/liburing/liburing/test/accept-link.c +0 -251
  147. package/deps/liburing/liburing/test/accept-reuse.c +0 -164
  148. package/deps/liburing/liburing/test/accept-test.c +0 -79
  149. package/deps/liburing/liburing/test/accept.c +0 -476
  150. package/deps/liburing/liburing/test/across-fork.c +0 -283
  151. package/deps/liburing/liburing/test/b19062a56726-test.c +0 -53
  152. package/deps/liburing/liburing/test/b5837bd5311d-test.c +0 -77
  153. package/deps/liburing/liburing/test/ce593a6c480a-test.c +0 -135
  154. package/deps/liburing/liburing/test/close-opath.c +0 -122
  155. package/deps/liburing/liburing/test/config +0 -10
  156. package/deps/liburing/liburing/test/connect.c +0 -398
  157. package/deps/liburing/liburing/test/cq-full.c +0 -96
  158. package/deps/liburing/liburing/test/cq-overflow.c +0 -294
  159. package/deps/liburing/liburing/test/cq-peek-batch.c +0 -102
  160. package/deps/liburing/liburing/test/cq-ready.c +0 -94
  161. package/deps/liburing/liburing/test/cq-size.c +0 -58
  162. package/deps/liburing/liburing/test/d4ae271dfaae-test.c +0 -96
  163. package/deps/liburing/liburing/test/d77a67ed5f27-test.c +0 -65
  164. package/deps/liburing/liburing/test/defer.c +0 -307
  165. package/deps/liburing/liburing/test/double-poll-crash.c +0 -186
  166. package/deps/liburing/liburing/test/eeed8b54e0df-test.c +0 -114
  167. package/deps/liburing/liburing/test/empty-eownerdead.c +0 -42
  168. package/deps/liburing/liburing/test/eventfd-disable.c +0 -151
  169. package/deps/liburing/liburing/test/eventfd-ring.c +0 -97
  170. package/deps/liburing/liburing/test/eventfd.c +0 -112
  171. package/deps/liburing/liburing/test/fadvise.c +0 -202
  172. package/deps/liburing/liburing/test/fallocate.c +0 -249
  173. package/deps/liburing/liburing/test/fc2a85cb02ef-test.c +0 -138
  174. package/deps/liburing/liburing/test/file-register.c +0 -843
  175. package/deps/liburing/liburing/test/file-update.c +0 -173
  176. package/deps/liburing/liburing/test/files-exit-hang-poll.c +0 -128
  177. package/deps/liburing/liburing/test/files-exit-hang-timeout.c +0 -134
  178. package/deps/liburing/liburing/test/fixed-link.c +0 -90
  179. package/deps/liburing/liburing/test/fsync.c +0 -224
  180. package/deps/liburing/liburing/test/hardlink.c +0 -136
  181. package/deps/liburing/liburing/test/helpers.c +0 -135
  182. package/deps/liburing/liburing/test/helpers.h +0 -67
  183. package/deps/liburing/liburing/test/io-cancel.c +0 -537
  184. package/deps/liburing/liburing/test/io_uring_enter.c +0 -296
  185. package/deps/liburing/liburing/test/io_uring_register.c +0 -664
  186. package/deps/liburing/liburing/test/io_uring_setup.c +0 -192
  187. package/deps/liburing/liburing/test/iopoll.c +0 -366
  188. package/deps/liburing/liburing/test/lfs-openat-write.c +0 -117
  189. package/deps/liburing/liburing/test/lfs-openat.c +0 -273
  190. package/deps/liburing/liburing/test/link-timeout.c +0 -1107
  191. package/deps/liburing/liburing/test/link.c +0 -496
  192. package/deps/liburing/liburing/test/link_drain.c +0 -229
  193. package/deps/liburing/liburing/test/madvise.c +0 -195
  194. package/deps/liburing/liburing/test/mkdir.c +0 -108
  195. package/deps/liburing/liburing/test/multicqes_drain.c +0 -383
  196. package/deps/liburing/liburing/test/nop-all-sizes.c +0 -107
  197. package/deps/liburing/liburing/test/nop.c +0 -115
  198. package/deps/liburing/liburing/test/open-close.c +0 -146
  199. package/deps/liburing/liburing/test/openat2.c +0 -240
  200. package/deps/liburing/liburing/test/personality.c +0 -204
  201. package/deps/liburing/liburing/test/pipe-eof.c +0 -81
  202. package/deps/liburing/liburing/test/pipe-reuse.c +0 -105
  203. package/deps/liburing/liburing/test/poll-cancel-ton.c +0 -139
  204. package/deps/liburing/liburing/test/poll-cancel.c +0 -135
  205. package/deps/liburing/liburing/test/poll-link.c +0 -227
  206. package/deps/liburing/liburing/test/poll-many.c +0 -208
  207. package/deps/liburing/liburing/test/poll-mshot-update.c +0 -273
  208. package/deps/liburing/liburing/test/poll-ring.c +0 -48
  209. package/deps/liburing/liburing/test/poll-v-poll.c +0 -353
  210. package/deps/liburing/liburing/test/poll.c +0 -109
  211. package/deps/liburing/liburing/test/probe.c +0 -137
  212. package/deps/liburing/liburing/test/read-write.c +0 -876
  213. package/deps/liburing/liburing/test/register-restrictions.c +0 -633
  214. package/deps/liburing/liburing/test/rename.c +0 -134
  215. package/deps/liburing/liburing/test/ring-leak.c +0 -173
  216. package/deps/liburing/liburing/test/ring-leak2.c +0 -249
  217. package/deps/liburing/liburing/test/rsrc_tags.c +0 -449
  218. package/deps/liburing/liburing/test/runtests-loop.sh +0 -16
  219. package/deps/liburing/liburing/test/runtests.sh +0 -170
  220. package/deps/liburing/liburing/test/rw_merge_test.c +0 -97
  221. package/deps/liburing/liburing/test/self.c +0 -91
  222. package/deps/liburing/liburing/test/send_recv.c +0 -291
  223. package/deps/liburing/liburing/test/send_recvmsg.c +0 -345
  224. package/deps/liburing/liburing/test/sendmsg_fs_cve.c +0 -198
  225. package/deps/liburing/liburing/test/shared-wq.c +0 -84
  226. package/deps/liburing/liburing/test/short-read.c +0 -75
  227. package/deps/liburing/liburing/test/shutdown.c +0 -163
  228. package/deps/liburing/liburing/test/sigfd-deadlock.c +0 -74
  229. package/deps/liburing/liburing/test/socket-rw-eagain.c +0 -156
  230. package/deps/liburing/liburing/test/socket-rw.c +0 -147
  231. package/deps/liburing/liburing/test/splice.c +0 -511
  232. package/deps/liburing/liburing/test/sq-full-cpp.cc +0 -45
  233. package/deps/liburing/liburing/test/sq-full.c +0 -45
  234. package/deps/liburing/liburing/test/sq-poll-dup.c +0 -200
  235. package/deps/liburing/liburing/test/sq-poll-kthread.c +0 -168
  236. package/deps/liburing/liburing/test/sq-poll-share.c +0 -137
  237. package/deps/liburing/liburing/test/sq-space_left.c +0 -159
  238. package/deps/liburing/liburing/test/sqpoll-cancel-hang.c +0 -159
  239. package/deps/liburing/liburing/test/sqpoll-disable-exit.c +0 -195
  240. package/deps/liburing/liburing/test/sqpoll-exit-hang.c +0 -77
  241. package/deps/liburing/liburing/test/sqpoll-sleep.c +0 -68
  242. package/deps/liburing/liburing/test/statx.c +0 -172
  243. package/deps/liburing/liburing/test/stdout.c +0 -232
  244. package/deps/liburing/liburing/test/submit-link-fail.c +0 -154
  245. package/deps/liburing/liburing/test/submit-reuse.c +0 -239
  246. package/deps/liburing/liburing/test/symlink.c +0 -116
  247. package/deps/liburing/liburing/test/teardowns.c +0 -58
  248. package/deps/liburing/liburing/test/thread-exit.c +0 -131
  249. package/deps/liburing/liburing/test/timeout-new.c +0 -246
  250. package/deps/liburing/liburing/test/timeout-overflow.c +0 -204
  251. package/deps/liburing/liburing/test/timeout.c +0 -1354
  252. package/deps/liburing/liburing/test/unlink.c +0 -111
  253. package/deps/liburing/liburing/test/wakeup-hang.c +0 -162
  254. package/deps/rocksdb/rocksdb/README.md +0 -32
  255. package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
  256. package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
  257. package/deps/rocksdb/rocksdb/port/README +0 -10
  258. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  259. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -89,6 +89,7 @@ class FilePrefetchBuffer {
89
89
  // while curr_ is being consumed. If data is overlapping in two buffers,
90
90
  // data is copied to third buffer to return continuous buffer.
91
91
  bufs_.resize(3);
92
+ (void)async_io_;
92
93
  }
93
94
 
94
95
  ~FilePrefetchBuffer() {
@@ -131,10 +132,21 @@ class FilePrefetchBuffer {
131
132
  uint64_t offset, size_t n,
132
133
  Env::IOPriority rate_limiter_priority);
133
134
 
135
+ // Request for reading the data from a file asynchronously.
136
+ // If data already exists in the buffer, result will be updated.
137
+ // reader : the file reader.
138
+ // offset : the file offset to start reading from.
139
+ // n : the number of bytes to read.
140
+ // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
141
+ // bypass.
142
+ // result : if data already exists in the buffer, result will
143
+ // be updated with the data.
144
+ //
145
+ // If data already exist in the buffer, it will return Status::OK, otherwise
146
+ // it will send asynchronous request and return Status::TryAgain.
134
147
  Status PrefetchAsync(const IOOptions& opts, RandomAccessFileReader* reader,
135
- uint64_t offset, size_t length, size_t readahead_size,
136
- Env::IOPriority rate_limiter_priority,
137
- bool& copy_to_third_buffer);
148
+ uint64_t offset, size_t n,
149
+ Env::IOPriority rate_limiter_priority, Slice* result);
138
150
 
139
151
  // Tries returning the data for a file read from this buffer if that data is
140
152
  // in the buffer.
@@ -159,8 +171,7 @@ class FilePrefetchBuffer {
159
171
  bool TryReadFromCacheAsync(const IOOptions& opts,
160
172
  RandomAccessFileReader* reader, uint64_t offset,
161
173
  size_t n, Slice* result, Status* status,
162
- Env::IOPriority rate_limiter_priority,
163
- bool for_compaction /* = false */);
174
+ Env::IOPriority rate_limiter_priority);
164
175
 
165
176
  // The minimum `offset` ever passed to TryReadFromCache(). This will nly be
166
177
  // tracked if track_min_offset = true.
@@ -207,22 +218,6 @@ class FilePrefetchBuffer {
207
218
  }
208
219
  }
209
220
 
210
- bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
211
- // Prefetch only if this read is sequential otherwise reset readahead_size_
212
- // to initial value.
213
- if (!IsBlockSequential(offset)) {
214
- UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
215
- ResetValues();
216
- return false;
217
- }
218
- num_file_reads_++;
219
- if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
220
- UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
221
- return false;
222
- }
223
- return true;
224
- }
225
-
226
221
  // Callback function passed to underlying FS in case of asynchronous reads.
227
222
  void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg);
228
223
 
@@ -234,6 +229,17 @@ class FilePrefetchBuffer {
234
229
  size_t roundup_len, size_t index, bool refit_tail,
235
230
  uint64_t& chunk_len);
236
231
 
232
+ // It calls Poll API if any there is any pending asynchronous request. It then
233
+ // checks if data is in any buffer. It clears the outdated data and swaps the
234
+ // buffers if required.
235
+ void PollAndUpdateBuffersIfNeeded(uint64_t offset);
236
+
237
+ Status PrefetchAsyncInternal(const IOOptions& opts,
238
+ RandomAccessFileReader* reader, uint64_t offset,
239
+ size_t length, size_t readahead_size,
240
+ Env::IOPriority rate_limiter_priority,
241
+ bool& copy_to_third_buffer);
242
+
237
243
  Status Read(const IOOptions& opts, RandomAccessFileReader* reader,
238
244
  Env::IOPriority rate_limiter_priority, uint64_t read_len,
239
245
  uint64_t chunk_len, uint64_t rounddown_start, uint32_t index);
@@ -256,6 +262,22 @@ class FilePrefetchBuffer {
256
262
  readahead_size_ = initial_auto_readahead_size_;
257
263
  }
258
264
 
265
+ bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
266
+ // Prefetch only if this read is sequential otherwise reset readahead_size_
267
+ // to initial value.
268
+ if (!IsBlockSequential(offset)) {
269
+ UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
270
+ ResetValues();
271
+ return false;
272
+ }
273
+ num_file_reads_++;
274
+ if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
275
+ UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
276
+ return false;
277
+ }
278
+ return true;
279
+ }
280
+
259
281
  std::vector<BufferInfo> bufs_;
260
282
  // curr_ represents the index for bufs_ indicating which buffer is being
261
283
  // consumed currently.
@@ -534,15 +534,24 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
534
534
  * initially (2 more data blocks).
535
535
  */
536
536
  iter->Seek(BuildKey(0));
537
+ ASSERT_TRUE(iter->Valid());
537
538
  iter->Seek(BuildKey(1000));
539
+ ASSERT_TRUE(iter->Valid());
538
540
  iter->Seek(BuildKey(1004)); // Prefetch Data
541
+ ASSERT_TRUE(iter->Valid());
539
542
  iter->Seek(BuildKey(1008));
543
+ ASSERT_TRUE(iter->Valid());
540
544
  iter->Seek(BuildKey(1011));
545
+ ASSERT_TRUE(iter->Valid());
541
546
  iter->Seek(BuildKey(1015)); // Prefetch Data
547
+ ASSERT_TRUE(iter->Valid());
542
548
  iter->Seek(BuildKey(1019));
549
+ ASSERT_TRUE(iter->Valid());
543
550
  // Missed 2 blocks but they are already in buffer so no reset.
544
551
  iter->Seek(BuildKey(103)); // Already in buffer.
552
+ ASSERT_TRUE(iter->Valid());
545
553
  iter->Seek(BuildKey(1033)); // Prefetch Data
554
+ ASSERT_TRUE(iter->Valid());
546
555
  if (support_prefetch && !use_direct_io) {
547
556
  ASSERT_EQ(fs->GetPrefetchCount(), 3);
548
557
  fs->ClearPrefetchCount();
@@ -558,10 +567,15 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
558
567
  */
559
568
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
560
569
  iter->Seek(BuildKey(0));
570
+ ASSERT_TRUE(iter->Valid());
561
571
  iter->Seek(BuildKey(1008));
572
+ ASSERT_TRUE(iter->Valid());
562
573
  iter->Seek(BuildKey(1019));
574
+ ASSERT_TRUE(iter->Valid());
563
575
  iter->Seek(BuildKey(1033));
576
+ ASSERT_TRUE(iter->Valid());
564
577
  iter->Seek(BuildKey(1048));
578
+ ASSERT_TRUE(iter->Valid());
565
579
  if (support_prefetch && !use_direct_io) {
566
580
  ASSERT_EQ(fs->GetPrefetchCount(), 0);
567
581
  fs->ClearPrefetchCount();
@@ -576,9 +590,13 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
576
590
  */
577
591
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
578
592
  iter->Seek(BuildKey(0));
593
+ ASSERT_TRUE(iter->Valid());
579
594
  iter->Seek(BuildKey(1));
595
+ ASSERT_TRUE(iter->Valid());
580
596
  iter->Seek(BuildKey(10));
597
+ ASSERT_TRUE(iter->Valid());
581
598
  iter->Seek(BuildKey(100));
599
+ ASSERT_TRUE(iter->Valid());
582
600
  if (support_prefetch && !use_direct_io) {
583
601
  ASSERT_EQ(fs->GetPrefetchCount(), 0);
584
602
  fs->ClearPrefetchCount();
@@ -596,14 +614,21 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
596
614
  */
597
615
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
598
616
  iter->Seek(BuildKey(0));
617
+ ASSERT_TRUE(iter->Valid());
599
618
  iter->Seek(BuildKey(1000));
619
+ ASSERT_TRUE(iter->Valid());
600
620
  iter->Seek(BuildKey(1004)); // This iteration will prefetch buffer
621
+ ASSERT_TRUE(iter->Valid());
601
622
  iter->Seek(BuildKey(1008));
623
+ ASSERT_TRUE(iter->Valid());
602
624
  iter->Seek(
603
625
  BuildKey(996)); // Reseek won't prefetch any data and
604
626
  // readahead_size will be initiallized to 8*1024.
627
+ ASSERT_TRUE(iter->Valid());
605
628
  iter->Seek(BuildKey(992));
629
+ ASSERT_TRUE(iter->Valid());
606
630
  iter->Seek(BuildKey(989));
631
+ ASSERT_TRUE(iter->Valid());
607
632
  if (support_prefetch && !use_direct_io) {
608
633
  ASSERT_EQ(fs->GetPrefetchCount(), 1);
609
634
  fs->ClearPrefetchCount();
@@ -615,11 +640,17 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
615
640
  // Read sequentially to confirm readahead_size is reset to initial value (2
616
641
  // more data blocks)
617
642
  iter->Seek(BuildKey(1011));
643
+ ASSERT_TRUE(iter->Valid());
618
644
  iter->Seek(BuildKey(1015));
645
+ ASSERT_TRUE(iter->Valid());
619
646
  iter->Seek(BuildKey(1019)); // Prefetch Data
647
+ ASSERT_TRUE(iter->Valid());
620
648
  iter->Seek(BuildKey(1022));
649
+ ASSERT_TRUE(iter->Valid());
621
650
  iter->Seek(BuildKey(1026));
651
+ ASSERT_TRUE(iter->Valid());
622
652
  iter->Seek(BuildKey(103)); // Prefetch Data
653
+ ASSERT_TRUE(iter->Valid());
623
654
  if (support_prefetch && !use_direct_io) {
624
655
  ASSERT_EQ(fs->GetPrefetchCount(), 2);
625
656
  fs->ClearPrefetchCount();
@@ -634,12 +665,19 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
634
665
  */
635
666
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
636
667
  iter->Seek(BuildKey(0));
668
+ ASSERT_TRUE(iter->Valid());
637
669
  iter->Seek(BuildKey(1167));
670
+ ASSERT_TRUE(iter->Valid());
638
671
  iter->Seek(BuildKey(1334)); // This iteration will prefetch buffer
672
+ ASSERT_TRUE(iter->Valid());
639
673
  iter->Seek(BuildKey(1499));
674
+ ASSERT_TRUE(iter->Valid());
640
675
  iter->Seek(BuildKey(1667));
676
+ ASSERT_TRUE(iter->Valid());
641
677
  iter->Seek(BuildKey(1847));
678
+ ASSERT_TRUE(iter->Valid());
642
679
  iter->Seek(BuildKey(1999));
680
+ ASSERT_TRUE(iter->Valid());
643
681
  if (support_prefetch && !use_direct_io) {
644
682
  ASSERT_EQ(fs->GetPrefetchCount(), 1);
645
683
  fs->ClearPrefetchCount();
@@ -766,8 +804,11 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
766
804
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
767
805
  // Warm up the cache
768
806
  iter->Seek(BuildKey(1011));
807
+ ASSERT_TRUE(iter->Valid());
769
808
  iter->Seek(BuildKey(1015));
809
+ ASSERT_TRUE(iter->Valid());
770
810
  iter->Seek(BuildKey(1019));
811
+ ASSERT_TRUE(iter->Valid());
771
812
  if (support_prefetch && !use_direct_io) {
772
813
  ASSERT_EQ(fs->GetPrefetchCount(), 1);
773
814
  fs->ClearPrefetchCount();
@@ -780,20 +821,31 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
780
821
  // After caching, blocks will be read from cache (Sequential blocks)
781
822
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
782
823
  iter->Seek(BuildKey(0));
824
+ ASSERT_TRUE(iter->Valid());
783
825
  iter->Seek(BuildKey(1000));
826
+ ASSERT_TRUE(iter->Valid());
784
827
  iter->Seek(BuildKey(1004)); // Prefetch data (not in cache).
828
+ ASSERT_TRUE(iter->Valid());
785
829
  // Missed one sequential block but next is in already in buffer so readahead
786
830
  // will not be reset.
787
831
  iter->Seek(BuildKey(1011));
832
+ ASSERT_TRUE(iter->Valid());
788
833
  // Prefetch data but blocks are in cache so no prefetch and reset.
789
834
  iter->Seek(BuildKey(1015));
835
+ ASSERT_TRUE(iter->Valid());
790
836
  iter->Seek(BuildKey(1019));
837
+ ASSERT_TRUE(iter->Valid());
791
838
  iter->Seek(BuildKey(1022));
839
+ ASSERT_TRUE(iter->Valid());
792
840
  // Prefetch data with readahead_size = 4 blocks.
793
841
  iter->Seek(BuildKey(1026));
842
+ ASSERT_TRUE(iter->Valid());
794
843
  iter->Seek(BuildKey(103));
844
+ ASSERT_TRUE(iter->Valid());
795
845
  iter->Seek(BuildKey(1033));
846
+ ASSERT_TRUE(iter->Valid());
796
847
  iter->Seek(BuildKey(1037));
848
+ ASSERT_TRUE(iter->Valid());
797
849
 
798
850
  if (support_prefetch && !use_direct_io) {
799
851
  ASSERT_EQ(fs->GetPrefetchCount(), 3);
@@ -881,7 +933,7 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
881
933
  [&](void*) { buff_prefetch_count++; });
882
934
 
883
935
  SyncPoint::GetInstance()->SetCallBack(
884
- "FilePrefetchBuffer::PrefetchAsync:Start",
936
+ "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
885
937
  [&](void*) { buff_async_prefetch_count++; });
886
938
 
887
939
  // The callback checks, since reads are sequential, readahead_size doesn't
@@ -955,7 +1007,7 @@ class PrefetchTest2 : public DBTestBase,
955
1007
  INSTANTIATE_TEST_CASE_P(PrefetchTest2, PrefetchTest2, ::testing::Bool());
956
1008
 
957
1009
  #ifndef ROCKSDB_LITE
958
- TEST_P(PrefetchTest2, NonSequentialReads) {
1010
+ TEST_P(PrefetchTest2, NonSequentialReadsWithAdaptiveReadahead) {
959
1011
  const int kNumKeys = 1000;
960
1012
  // Set options
961
1013
  std::shared_ptr<MockFS> fs =
@@ -1002,9 +1054,8 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
1002
1054
  int set_readahead = 0;
1003
1055
  size_t readahead_size = 0;
1004
1056
 
1005
- SyncPoint::GetInstance()->SetCallBack(
1006
- "FilePrefetchBuffer::PrefetchAsync:Start",
1007
- [&](void*) { buff_prefetch_count++; });
1057
+ SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
1058
+ [&](void*) { buff_prefetch_count++; });
1008
1059
  SyncPoint::GetInstance()->SetCallBack(
1009
1060
  "BlockPrefetcher::SetReadaheadState",
1010
1061
  [&](void* /*arg*/) { set_readahead++; });
@@ -1018,13 +1069,15 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
1018
1069
  // Iterate until prefetch is done.
1019
1070
  ReadOptions ro;
1020
1071
  ro.adaptive_readahead = true;
1021
- // TODO akanksha: Remove after adding new units.
1022
- ro.async_io = true;
1023
1072
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
1073
+
1024
1074
  iter->SeekToFirst();
1075
+ ASSERT_TRUE(iter->Valid());
1076
+
1025
1077
  while (iter->Valid() && buff_prefetch_count == 0) {
1026
1078
  iter->Next();
1027
1079
  }
1080
+
1028
1081
  ASSERT_EQ(readahead_size, 8 * 1024);
1029
1082
  ASSERT_EQ(buff_prefetch_count, 1);
1030
1083
  ASSERT_EQ(set_readahead, 0);
@@ -1033,9 +1086,12 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
1033
1086
  // Move to last file and check readahead size fallbacks to 8KB. So next
1034
1087
  // readahead size after prefetch should be 8 * 1024;
1035
1088
  iter->Seek(BuildKey(4004));
1089
+ ASSERT_TRUE(iter->Valid());
1090
+
1036
1091
  while (iter->Valid() && buff_prefetch_count == 0) {
1037
1092
  iter->Next();
1038
1093
  }
1094
+
1039
1095
  ASSERT_EQ(readahead_size, 8 * 1024);
1040
1096
  ASSERT_EQ(set_readahead, 0);
1041
1097
  ASSERT_EQ(buff_prefetch_count, 1);
@@ -1099,7 +1155,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
1099
1155
  size_t decrease_readahead_size = 8 * 1024;
1100
1156
 
1101
1157
  SyncPoint::GetInstance()->SetCallBack(
1102
- "FilePrefetchBuffer::PrefetchAsync:Start",
1158
+ "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
1103
1159
  [&](void*) { buff_prefetch_count++; });
1104
1160
  SyncPoint::GetInstance()->SetCallBack(
1105
1161
  "FilePrefetchBuffer::TryReadFromCache", [&](void* arg) {
@@ -1120,8 +1176,11 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
1120
1176
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
1121
1177
  // Warm up the cache
1122
1178
  iter->Seek(BuildKey(1011));
1179
+ ASSERT_TRUE(iter->Valid());
1123
1180
  iter->Seek(BuildKey(1015));
1181
+ ASSERT_TRUE(iter->Valid());
1124
1182
  iter->Seek(BuildKey(1019));
1183
+ ASSERT_TRUE(iter->Valid());
1125
1184
  buff_prefetch_count = 0;
1126
1185
  }
1127
1186
 
@@ -1129,26 +1188,39 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
1129
1188
  ASSERT_OK(options.statistics->Reset());
1130
1189
  // After caching, blocks will be read from cache (Sequential blocks)
1131
1190
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
1132
- iter->Seek(BuildKey(0));
1191
+ iter->Seek(
1192
+ BuildKey(0)); // In cache so it will decrease the readahead_size.
1133
1193
  ASSERT_TRUE(iter->Valid());
1134
- iter->Seek(BuildKey(1000));
1194
+ expected_current_readahead_size = std::max(
1195
+ decrease_readahead_size,
1196
+ (expected_current_readahead_size >= decrease_readahead_size
1197
+ ? (expected_current_readahead_size - decrease_readahead_size)
1198
+ : 0));
1199
+
1200
+ iter->Seek(BuildKey(1000)); // Prefetch the block.
1135
1201
  ASSERT_TRUE(iter->Valid());
1136
- iter->Seek(BuildKey(1004)); // Prefetch data (not in cache).
1202
+ ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
1203
+ expected_current_readahead_size *= 2;
1204
+
1205
+ iter->Seek(BuildKey(1004)); // Prefetch the block.
1137
1206
  ASSERT_TRUE(iter->Valid());
1138
1207
  ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
1208
+ expected_current_readahead_size *= 2;
1139
1209
 
1140
- // Missed one sequential block but 1011 is already in buffer so
1141
- // readahead will not be reset.
1210
+ // 1011 is already in cache but won't reset??
1142
1211
  iter->Seek(BuildKey(1011));
1143
1212
  ASSERT_TRUE(iter->Valid());
1144
- ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
1145
1213
 
1146
1214
  // Eligible to Prefetch data (not in buffer) but block is in cache so no
1147
1215
  // prefetch will happen and will result in decrease in readahead_size.
1148
1216
  // readahead_size will be 8 * 1024
1149
1217
  iter->Seek(BuildKey(1015));
1150
1218
  ASSERT_TRUE(iter->Valid());
1151
- expected_current_readahead_size -= decrease_readahead_size;
1219
+ expected_current_readahead_size = std::max(
1220
+ decrease_readahead_size,
1221
+ (expected_current_readahead_size >= decrease_readahead_size
1222
+ ? (expected_current_readahead_size - decrease_readahead_size)
1223
+ : 0));
1152
1224
 
1153
1225
  // 1016 is the same block as 1015. So no change in readahead_size.
1154
1226
  iter->Seek(BuildKey(1016));
@@ -1169,7 +1241,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
1169
1241
  iter->Seek(BuildKey(1022));
1170
1242
  ASSERT_TRUE(iter->Valid());
1171
1243
  ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
1172
- ASSERT_EQ(buff_prefetch_count, 2);
1244
+ ASSERT_EQ(buff_prefetch_count, 3);
1173
1245
 
1174
1246
  // Check stats to make sure async prefetch is done.
1175
1247
  {
@@ -1179,6 +1251,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
1179
1251
  ASSERT_EQ(async_read_bytes.count, 0);
1180
1252
  } else {
1181
1253
  ASSERT_GT(async_read_bytes.count, 0);
1254
+ ASSERT_GT(get_perf_context()->number_async_seek, 0);
1182
1255
  }
1183
1256
  }
1184
1257
 
@@ -1264,7 +1337,7 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
1264
1337
  }
1265
1338
 
1266
1339
  SyncPoint::GetInstance()->SetCallBack(
1267
- "FilePrefetchBuffer::PrefetchAsync:Start",
1340
+ "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
1268
1341
  [&](void*) { buff_prefetch_count++; });
1269
1342
 
1270
1343
  SyncPoint::GetInstance()->SetCallBack(
@@ -1275,12 +1348,15 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
1275
1348
  // Read the keys.
1276
1349
  {
1277
1350
  ASSERT_OK(options.statistics->Reset());
1351
+ get_perf_context()->Reset();
1352
+
1278
1353
  auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
1279
1354
  int num_keys = 0;
1280
1355
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1281
1356
  ASSERT_OK(iter->status());
1282
1357
  num_keys++;
1283
1358
  }
1359
+
1284
1360
  ASSERT_EQ(num_keys, total_keys);
1285
1361
  ASSERT_GT(buff_prefetch_count, 0);
1286
1362
 
@@ -1301,6 +1377,55 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
1301
1377
  }
1302
1378
  ASSERT_GT(prefetched_bytes_discarded.count, 0);
1303
1379
  }
1380
+ ASSERT_EQ(get_perf_context()->number_async_seek, 0);
1381
+ }
1382
+
1383
+ {
1384
+ // Read the keys using seek.
1385
+ {
1386
+ ASSERT_OK(options.statistics->Reset());
1387
+ get_perf_context()->Reset();
1388
+
1389
+ auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
1390
+ int num_keys = 0;
1391
+ iter->Seek(BuildKey(450));
1392
+ while (iter->Valid()) {
1393
+ ASSERT_OK(iter->status());
1394
+ num_keys++;
1395
+ iter->Next();
1396
+ }
1397
+ ASSERT_OK(iter->status());
1398
+
1399
+ iter->Seek(BuildKey(450));
1400
+ while (iter->Valid()) {
1401
+ ASSERT_OK(iter->status());
1402
+ num_keys++;
1403
+ iter->Prev();
1404
+ }
1405
+
1406
+ ASSERT_EQ(num_keys, total_keys + 1);
1407
+ ASSERT_GT(buff_prefetch_count, 0);
1408
+
1409
+ // Check stats to make sure async prefetch is done.
1410
+ {
1411
+ HistogramData async_read_bytes;
1412
+ options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
1413
+ HistogramData prefetched_bytes_discarded;
1414
+ options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED,
1415
+ &prefetched_bytes_discarded);
1416
+
1417
+ // Not all platforms support iouring. In that case, ReadAsync in posix
1418
+ // won't submit async requests.
1419
+ if (read_async_called) {
1420
+ ASSERT_GT(async_read_bytes.count, 0);
1421
+ ASSERT_GT(get_perf_context()->number_async_seek, 0);
1422
+ } else {
1423
+ ASSERT_EQ(async_read_bytes.count, 0);
1424
+ ASSERT_EQ(get_perf_context()->number_async_seek, 0);
1425
+ }
1426
+ ASSERT_GT(prefetched_bytes_discarded.count, 0);
1427
+ }
1428
+ }
1304
1429
  }
1305
1430
 
1306
1431
  SyncPoint::GetInstance()->DisableProcessing();
@@ -55,9 +55,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
55
55
 
56
56
  {
57
57
  IOOptions io_options;
58
- WritableFileWriter::DecideRateLimiterPriority(
59
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
60
- io_options.rate_limiter_priority);
58
+ io_options.rate_limiter_priority =
59
+ WritableFileWriter::DecideRateLimiterPriority(
60
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
61
61
  IOSTATS_TIMER_GUARD(prepare_write_nanos);
62
62
  TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
63
63
  writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
@@ -338,9 +338,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
338
338
  }
339
339
  #endif
340
340
  IOOptions io_options;
341
- WritableFileWriter::DecideRateLimiterPriority(
342
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
343
- io_options.rate_limiter_priority);
341
+ io_options.rate_limiter_priority =
342
+ WritableFileWriter::DecideRateLimiterPriority(
343
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
344
344
  s = writable_file_->Flush(io_options, nullptr);
345
345
  #ifndef ROCKSDB_LITE
346
346
  if (ShouldNotifyListeners()) {
@@ -507,11 +507,11 @@ IOStatus WritableFileWriter::WriteBuffered(
507
507
  size_t left = size;
508
508
  DataVerificationInfo v_info;
509
509
  char checksum_buf[sizeof(uint32_t)];
510
- IOOptions io_options;
511
510
  Env::IOPriority rate_limiter_priority_used =
512
511
  WritableFileWriter::DecideRateLimiterPriority(
513
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
514
- io_options.rate_limiter_priority);
512
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
513
+ IOOptions io_options;
514
+ io_options.rate_limiter_priority = rate_limiter_priority_used;
515
515
 
516
516
  while (left > 0) {
517
517
  size_t allowed = left;
@@ -596,11 +596,11 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
596
596
  size_t left = size;
597
597
  DataVerificationInfo v_info;
598
598
  char checksum_buf[sizeof(uint32_t)];
599
- IOOptions io_options;
600
599
  Env::IOPriority rate_limiter_priority_used =
601
600
  WritableFileWriter::DecideRateLimiterPriority(
602
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
603
- io_options.rate_limiter_priority);
601
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
602
+ IOOptions io_options;
603
+ io_options.rate_limiter_priority = rate_limiter_priority_used;
604
604
  // Check how much is allowed. Here, we loop until the rate limiter allows to
605
605
  // write the entire buffer.
606
606
  // TODO: need to be improved since it sort of defeats the purpose of the rate
@@ -726,11 +726,11 @@ IOStatus WritableFileWriter::WriteDirect(
726
726
  size_t left = buf_.CurrentSize();
727
727
  DataVerificationInfo v_info;
728
728
  char checksum_buf[sizeof(uint32_t)];
729
- IOOptions io_options;
730
729
  Env::IOPriority rate_limiter_priority_used =
731
730
  WritableFileWriter::DecideRateLimiterPriority(
732
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
733
- io_options.rate_limiter_priority);
731
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
732
+ IOOptions io_options;
733
+ io_options.rate_limiter_priority = rate_limiter_priority_used;
734
734
 
735
735
  while (left > 0) {
736
736
  // Check how much is allowed
@@ -827,11 +827,11 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
827
827
  DataVerificationInfo v_info;
828
828
  char checksum_buf[sizeof(uint32_t)];
829
829
 
830
- IOOptions io_options;
831
830
  Env::IOPriority rate_limiter_priority_used =
832
831
  WritableFileWriter::DecideRateLimiterPriority(
833
- writable_file_->GetIOPriority(), op_rate_limiter_priority,
834
- io_options.rate_limiter_priority);
832
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
833
+ IOOptions io_options;
834
+ io_options.rate_limiter_priority = rate_limiter_priority_used;
835
835
  // Check how much is allowed. Here, we loop until the rate limiter allows to
836
836
  // write the entire buffer.
837
837
  // TODO: need to be improved since it sort of defeats the purpose of the rate
@@ -901,21 +901,17 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
901
901
  #endif // !ROCKSDB_LITE
902
902
  Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
903
903
  Env::IOPriority writable_file_io_priority,
904
- Env::IOPriority op_rate_limiter_priority,
905
- Env::IOPriority& iooptions_io_priority) {
906
- Env::IOPriority rate_limiter_priority{Env::IO_TOTAL};
904
+ Env::IOPriority op_rate_limiter_priority) {
907
905
  if (writable_file_io_priority == Env::IO_TOTAL &&
908
906
  op_rate_limiter_priority == Env::IO_TOTAL) {
909
- rate_limiter_priority = Env::IO_TOTAL;
907
+ return Env::IO_TOTAL;
910
908
  } else if (writable_file_io_priority == Env::IO_TOTAL) {
911
- rate_limiter_priority = op_rate_limiter_priority;
909
+ return op_rate_limiter_priority;
912
910
  } else if (op_rate_limiter_priority == Env::IO_TOTAL) {
913
- rate_limiter_priority = writable_file_io_priority;
911
+ return writable_file_io_priority;
914
912
  } else {
915
- rate_limiter_priority = op_rate_limiter_priority;
913
+ return op_rate_limiter_priority;
916
914
  }
917
- iooptions_io_priority = rate_limiter_priority;
918
- return rate_limiter_priority;
919
915
  }
920
916
 
921
917
  } // namespace ROCKSDB_NAMESPACE
@@ -277,11 +277,10 @@ class WritableFileWriter {
277
277
  const char* GetFileChecksumFuncName() const;
278
278
 
279
279
  private:
280
- // Decide the Rate Limiter priority and update io_options.io_priority.
280
+ // Decide the Rate Limiter priority.
281
281
  static Env::IOPriority DecideRateLimiterPriority(
282
282
  Env::IOPriority writable_file_io_priority,
283
- Env::IOPriority op_rate_limiter_priority,
284
- Env::IOPriority& iooptions_io_priority);
283
+ Env::IOPriority op_rate_limiter_priority);
285
284
 
286
285
  // Used when os buffering is OFF and we are writing
287
286
  // DMA such as in Direct I/O mode
@@ -100,8 +100,9 @@ struct CompressionOptions {
100
100
  //
101
101
  // The dictionary is created by sampling the SST file data. If
102
102
  // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
103
- // dictionary generator. Otherwise, the random samples are used directly as
104
- // the dictionary.
103
+ // dictionary generator (see comments for option `use_zstd_dict_trainer` for
104
+ // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
105
+ // random samples are used directly as the dictionary.
105
106
  //
106
107
  // When compression dictionary is disabled, we compress and write each block
107
108
  // before buffering data for the next one. When compression dictionary is
@@ -173,6 +174,20 @@ struct CompressionOptions {
173
174
  // Default: 0 (unlimited)
174
175
  uint64_t max_dict_buffer_bytes;
175
176
 
177
+ // Use zstd trainer to generate dictionaries. When this option is set to true,
178
+ // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
179
+ // buffered data will be passed to zstd dictionary trainer to generate a
180
+ // dictionary of size max_dict_bytes.
181
+ //
182
+ // When this option is false, zstd's API ZDICT_finalizeDictionary() will be
183
+ // called to generate dictionaries. zstd_max_train_bytes of training sampled
184
+ // data will be passed to this API. Using this API should save CPU time on
185
+ // dictionary training, but the compression ratio may not be as good as using
186
+ // a dictionary trainer.
187
+ //
188
+ // Default: true
189
+ bool use_zstd_dict_trainer;
190
+
176
191
  CompressionOptions()
177
192
  : window_bits(-14),
178
193
  level(kDefaultCompressionLevel),
@@ -181,11 +196,13 @@ struct CompressionOptions {
181
196
  zstd_max_train_bytes(0),
182
197
  parallel_threads(1),
183
198
  enabled(false),
184
- max_dict_buffer_bytes(0) {}
199
+ max_dict_buffer_bytes(0),
200
+ use_zstd_dict_trainer(true) {}
185
201
  CompressionOptions(int wbits, int _lev, int _strategy,
186
202
  uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
187
203
  uint32_t _parallel_threads, bool _enabled,
188
- uint64_t _max_dict_buffer_bytes)
204
+ uint64_t _max_dict_buffer_bytes,
205
+ bool _use_zstd_dict_trainer)
189
206
  : window_bits(wbits),
190
207
  level(_lev),
191
208
  strategy(_strategy),
@@ -193,7 +210,8 @@ struct CompressionOptions {
193
210
  zstd_max_train_bytes(_zstd_max_train_bytes),
194
211
  parallel_threads(_parallel_threads),
195
212
  enabled(_enabled),
196
- max_dict_buffer_bytes(_max_dict_buffer_bytes) {}
213
+ max_dict_buffer_bytes(_max_dict_buffer_bytes),
214
+ use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
197
215
  };
198
216
 
199
217
  // Temperature of a file. Used to pass to FileSystem for a different