@nxtedition/rocksdb 8.2.7 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -70,7 +70,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
70
70
  const ImmutableCFOptions& /*opt*/, const MutableCFOptions& mopt,
71
71
  const FilterBuildingContext& context,
72
72
  const bool use_delta_encoding_for_index_values,
73
- PartitionedIndexBuilder* const p_index_builder) {
73
+ PartitionedIndexBuilder* const p_index_builder, size_t ts_sz,
74
+ const bool persist_user_defined_timestamps) {
74
75
  const BlockBasedTableOptions& table_opt = context.table_options;
75
76
  assert(table_opt.filter_policy); // precondition
76
77
 
@@ -95,7 +96,8 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
95
96
  return new PartitionedFilterBlockBuilder(
96
97
  mopt.prefix_extractor.get(), table_opt.whole_key_filtering,
97
98
  filter_bits_builder, table_opt.index_block_restart_interval,
98
- use_delta_encoding_for_index_values, p_index_builder, partition_size);
99
+ use_delta_encoding_for_index_values, p_index_builder, partition_size,
100
+ ts_sz, persist_user_defined_timestamps);
99
101
  } else {
100
102
  return new FullFilterBlockBuilder(mopt.prefix_extractor.get(),
101
103
  table_opt.whole_key_filtering,
@@ -136,8 +138,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
136
138
  if (sampled_output_fast && (LZ4_Supported() || Snappy_Supported())) {
137
139
  CompressionType c =
138
140
  LZ4_Supported() ? kLZ4Compression : kSnappyCompression;
139
- CompressionContext context(c);
140
141
  CompressionOptions options;
142
+ CompressionContext context(c, options);
141
143
  CompressionInfo info_tmp(options, context,
142
144
  CompressionDict::GetEmptyDict(), c,
143
145
  info.SampleForCompression());
@@ -150,8 +152,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
150
152
  // Sampling with a slow but high-compression algorithm
151
153
  if (sampled_output_slow && (ZSTD_Supported() || Zlib_Supported())) {
152
154
  CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression;
153
- CompressionContext context(c);
154
155
  CompressionOptions options;
156
+ CompressionContext context(c, options);
155
157
  CompressionInfo info_tmp(options, context,
156
158
  CompressionDict::GetEmptyDict(), c,
157
159
  info.SampleForCompression());
@@ -261,9 +263,25 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
261
263
 
262
264
  struct BlockBasedTableBuilder::Rep {
263
265
  const ImmutableOptions ioptions;
264
- const MutableCFOptions moptions;
266
+ // BEGIN from MutableCFOptions
267
+ std::shared_ptr<const SliceTransform> prefix_extractor;
268
+ // END from MutableCFOptions
265
269
  const BlockBasedTableOptions table_options;
266
270
  const InternalKeyComparator& internal_comparator;
271
+ // Size in bytes for the user-defined timestamps.
272
+ size_t ts_sz;
273
+ // When `ts_sz` > 0 and this flag is false, the user-defined timestamp in the
274
+ // user key will be stripped when creating the block based table. This
275
+ // stripping happens for all user keys, including the keys in data block,
276
+ // index block for data block, index block for index block (if index type is
277
+ // `kTwoLevelIndexSearch`), index for filter blocks (if using partitioned
278
+ // filters), the `first_internal_key` in `IndexValue`, the `end_key` for range
279
+ // deletion entries.
280
+ // As long as the user keys are sorted when added via `Add` API, their logic
281
+ // ordering won't change after timestamps are stripped. However, for each user
282
+ // key to be logically equivalent before and after timestamp is stripped, the
283
+ // user key should contain the minimum timestamp.
284
+ bool persist_user_defined_timestamps;
267
285
  WritableFileWriter* file;
268
286
  std::atomic<uint64_t> offset;
269
287
  size_t alignment;
@@ -345,6 +363,9 @@ struct BlockBasedTableBuilder::Rep {
345
363
  // all blocks after data blocks till the end of the SST file.
346
364
  uint64_t tail_size;
347
365
 
366
+ // See class Footer
367
+ uint32_t base_context_checksum;
368
+
348
369
  uint64_t get_offset() { return offset.load(std::memory_order_relaxed); }
349
370
  void set_offset(uint64_t o) { offset.store(o, std::memory_order_relaxed); }
350
371
 
@@ -373,6 +394,12 @@ struct BlockBasedTableBuilder::Rep {
373
394
  // to false, and this is ensured by io_status_mutex, so no special memory
374
395
  // order for io_status_ok is required.
375
396
  if (io_status_ok.load(std::memory_order_relaxed)) {
397
+ #ifdef ROCKSDB_ASSERT_STATUS_CHECKED // Avoid unnecessary lock acquisition
398
+ auto ios = CopyIOStatus();
399
+ ios.PermitUncheckedError();
400
+ // Assume no races in unit tests
401
+ assert(ios.ok());
402
+ #endif // ROCKSDB_ASSERT_STATUS_CHECKED
376
403
  return IOStatus::OK();
377
404
  } else {
378
405
  return CopyIOStatus();
@@ -413,9 +440,12 @@ struct BlockBasedTableBuilder::Rep {
413
440
  Rep(const BlockBasedTableOptions& table_opt, const TableBuilderOptions& tbo,
414
441
  WritableFileWriter* f)
415
442
  : ioptions(tbo.ioptions),
416
- moptions(tbo.moptions),
443
+ prefix_extractor(tbo.moptions.prefix_extractor),
417
444
  table_options(table_opt),
418
445
  internal_comparator(tbo.internal_comparator),
446
+ ts_sz(tbo.internal_comparator.user_comparator()->timestamp_size()),
447
+ persist_user_defined_timestamps(
448
+ tbo.ioptions.persist_user_defined_timestamps),
419
449
  file(f),
420
450
  offset(0),
421
451
  alignment(table_options.block_align
@@ -429,9 +459,15 @@ struct BlockBasedTableBuilder::Rep {
429
459
  ->CanKeysWithDifferentByteContentsBeEqual()
430
460
  ? BlockBasedTableOptions::kDataBlockBinarySearch
431
461
  : table_options.data_block_index_type,
432
- table_options.data_block_hash_table_util_ratio),
433
- range_del_block(1 /* block_restart_interval */),
434
- internal_prefix_transform(tbo.moptions.prefix_extractor.get()),
462
+ table_options.data_block_hash_table_util_ratio, ts_sz,
463
+ persist_user_defined_timestamps),
464
+ range_del_block(
465
+ 1 /* block_restart_interval */, true /* use_delta_encoding */,
466
+ false /* use_value_delta_encoding */,
467
+ BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */,
468
+ 0.75 /* data_block_hash_table_util_ratio */, ts_sz,
469
+ persist_user_defined_timestamps),
470
+ internal_prefix_transform(prefix_extractor.get()),
435
471
  compression_type(tbo.compression_type),
436
472
  sample_for_compression(tbo.moptions.sample_for_compression),
437
473
  compressible_input_data_bytes(0),
@@ -489,20 +525,22 @@ struct BlockBasedTableBuilder::Rep {
489
525
  compression_dict_buffer_cache_res_mgr = nullptr;
490
526
  }
491
527
 
528
+ assert(compression_ctxs.size() >= compression_opts.parallel_threads);
492
529
  for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
493
- compression_ctxs[i].reset(new CompressionContext(compression_type));
530
+ compression_ctxs[i].reset(
531
+ new CompressionContext(compression_type, compression_opts));
494
532
  }
495
533
  if (table_options.index_type ==
496
534
  BlockBasedTableOptions::kTwoLevelIndexSearch) {
497
535
  p_index_builder_ = PartitionedIndexBuilder::CreateIndexBuilder(
498
536
  &internal_comparator, use_delta_encoding_for_index_values,
499
- table_options);
537
+ table_options, ts_sz, persist_user_defined_timestamps);
500
538
  index_builder.reset(p_index_builder_);
501
539
  } else {
502
540
  index_builder.reset(IndexBuilder::CreateIndexBuilder(
503
541
  table_options.index_type, &internal_comparator,
504
542
  &this->internal_prefix_transform, use_delta_encoding_for_index_values,
505
- table_options));
543
+ table_options, ts_sz, persist_user_defined_timestamps));
506
544
  }
507
545
  if (ioptions.optimize_filters_for_hits && tbo.is_bottommost) {
508
546
  // Apply optimize_filters_for_hits setting here when applicable by
@@ -532,8 +570,9 @@ struct BlockBasedTableBuilder::Rep {
532
570
  }
533
571
 
534
572
  filter_builder.reset(CreateFilterBlockBuilder(
535
- ioptions, moptions, filter_context,
536
- use_delta_encoding_for_index_values, p_index_builder_));
573
+ ioptions, tbo.moptions, filter_context,
574
+ use_delta_encoding_for_index_values, p_index_builder_, ts_sz,
575
+ persist_user_defined_timestamps));
537
576
  }
538
577
 
539
578
  assert(tbo.int_tbl_prop_collector_factories);
@@ -547,12 +586,11 @@ struct BlockBasedTableBuilder::Rep {
547
586
  table_properties_collectors.emplace_back(
548
587
  new BlockBasedTablePropertiesCollector(
549
588
  table_options.index_type, table_options.whole_key_filtering,
550
- moptions.prefix_extractor != nullptr));
551
- const Comparator* ucmp = tbo.internal_comparator.user_comparator();
552
- assert(ucmp);
553
- if (ucmp->timestamp_size() > 0) {
589
+ prefix_extractor != nullptr));
590
+ if (ts_sz > 0 && persist_user_defined_timestamps) {
554
591
  table_properties_collectors.emplace_back(
555
- new TimestampTablePropertiesCollector(ucmp));
592
+ new TimestampTablePropertiesCollector(
593
+ tbo.internal_comparator.user_comparator()));
556
594
  }
557
595
  if (table_options.verify_compression) {
558
596
  for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
@@ -572,6 +610,17 @@ struct BlockBasedTableBuilder::Rep {
572
610
  if (!ReifyDbHostIdProperty(ioptions.env, &props.db_host_id).ok()) {
573
611
  ROCKS_LOG_INFO(ioptions.logger, "db_host_id property will not be set");
574
612
  }
613
+
614
+ if (FormatVersionUsesContextChecksum(table_options.format_version)) {
615
+ // Must be non-zero and semi- or quasi-random
616
+ // TODO: ideally guaranteed different for related files (e.g. use file
617
+ // number and db_session, for benefit of SstFileWriter)
618
+ do {
619
+ base_context_checksum = Random::GetTLSInstance()->Next();
620
+ } while (UNLIKELY(base_context_checksum == 0));
621
+ } else {
622
+ base_context_checksum = 0;
623
+ }
575
624
  }
576
625
 
577
626
  Rep(const Rep&) = delete;
@@ -910,7 +959,9 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
910
959
  // behavior
911
960
  sanitized_table_options.format_version = 1;
912
961
  }
913
-
962
+ auto ucmp = tbo.internal_comparator.user_comparator();
963
+ assert(ucmp);
964
+ (void)ucmp; // avoids unused variable error.
914
965
  rep_ = new Rep(sanitized_table_options, tbo, file);
915
966
 
916
967
  TEST_SYNC_POINT_CALLBACK(
@@ -994,9 +1045,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
994
1045
  r->pc_rep->curr_block_keys->PushBack(key);
995
1046
  } else {
996
1047
  if (r->filter_builder != nullptr) {
997
- size_t ts_sz =
998
- r->internal_comparator.user_comparator()->timestamp_size();
999
- r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
1048
+ r->filter_builder->Add(
1049
+ ExtractUserKeyAndStripTimestamp(key, r->ts_sz));
1000
1050
  }
1001
1051
  }
1002
1052
  }
@@ -1017,6 +1067,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
1017
1067
  r->ioptions.logger);
1018
1068
 
1019
1069
  } else if (value_type == kTypeRangeDeletion) {
1070
+ // TODO(yuzhangyu): handle range deletion entries for UDT in memtable only.
1020
1071
  r->range_del_block.Add(key, value);
1021
1072
  // TODO offset passed in is not accurate for parallel compression case
1022
1073
  NotifyCollectTableCollectorsOnAdd(key, value, r->get_offset(),
@@ -1028,6 +1079,9 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
1028
1079
 
1029
1080
  r->props.num_entries++;
1030
1081
  r->props.raw_key_size += key.size();
1082
+ if (!r->persist_user_defined_timestamps) {
1083
+ r->props.raw_key_size -= r->ts_sz;
1084
+ }
1031
1085
  r->props.raw_value_size += value.size();
1032
1086
  if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion ||
1033
1087
  value_type == kTypeDeletionWithTimestamp) {
@@ -1093,6 +1147,9 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
1093
1147
  return;
1094
1148
  }
1095
1149
 
1150
+ TEST_SYNC_POINT_CALLBACK(
1151
+ "BlockBasedTableBuilder::WriteBlock:TamperWithCompressedData",
1152
+ &r->compressed_output);
1096
1153
  WriteMaybeCompressedBlock(block_contents, type, handle, block_type,
1097
1154
  &uncompressed_block_data);
1098
1155
  r->compressed_output.clear();
@@ -1255,7 +1312,8 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
1255
1312
  bool is_data_block = block_type == BlockType::kData;
1256
1313
  // Old, misleading name of this function: WriteRawBlock
1257
1314
  StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS);
1258
- handle->set_offset(r->get_offset());
1315
+ const uint64_t offset = r->get_offset();
1316
+ handle->set_offset(offset);
1259
1317
  handle->set_size(block_contents.size());
1260
1318
  assert(status().ok());
1261
1319
  assert(io_status().ok());
@@ -1277,6 +1335,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
1277
1335
  uint32_t checksum = ComputeBuiltinChecksumWithLastByte(
1278
1336
  r->table_options.checksum, block_contents.data(), block_contents.size(),
1279
1337
  /*last_byte*/ comp_type);
1338
+ checksum += ChecksumModifierForContext(r->base_context_checksum, offset);
1280
1339
 
1281
1340
  if (block_type == BlockType::kFilter) {
1282
1341
  Status s = r->filter_builder->MaybePostVerifyFilter(block_contents);
@@ -1367,9 +1426,7 @@ void BlockBasedTableBuilder::BGWorkWriteMaybeCompressedBlock() {
1367
1426
  for (size_t i = 0; i < block_rep->keys->Size(); i++) {
1368
1427
  auto& key = (*block_rep->keys)[i];
1369
1428
  if (r->filter_builder != nullptr) {
1370
- size_t ts_sz =
1371
- r->internal_comparator.user_comparator()->timestamp_size();
1372
- r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
1429
+ r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, r->ts_sz));
1373
1430
  }
1374
1431
  r->index_builder->OnKeyAdded(key);
1375
1432
  }
@@ -1572,6 +1629,11 @@ void BlockBasedTableBuilder::WriteIndexBlock(
1572
1629
  // The last index_block_handle will be for the partition index block
1573
1630
  }
1574
1631
  }
1632
+ // If success and need to record in metaindex rather than footer...
1633
+ if (!FormatVersionUsesIndexHandleInFooter(
1634
+ rep_->table_options.format_version)) {
1635
+ meta_index_builder->Add(kIndexBlockName, *index_block_handle);
1636
+ }
1575
1637
  }
1576
1638
 
1577
1639
  void BlockBasedTableBuilder::WritePropertiesBlock(
@@ -1597,9 +1659,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
1597
1659
  rep_->props.compression_options =
1598
1660
  CompressionOptionsToString(rep_->compression_opts);
1599
1661
  rep_->props.prefix_extractor_name =
1600
- rep_->moptions.prefix_extractor != nullptr
1601
- ? rep_->moptions.prefix_extractor->AsString()
1602
- : "nullptr";
1662
+ rep_->prefix_extractor ? rep_->prefix_extractor->AsString() : "nullptr";
1603
1663
  std::string property_collectors_names = "[";
1604
1664
  for (size_t i = 0;
1605
1665
  i < rep_->ioptions.table_properties_collector_factories.size(); ++i) {
@@ -1643,6 +1703,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
1643
1703
  rep_->compressible_input_data_bytes +
1644
1704
  rep_->uncompressible_input_data_bytes;
1645
1705
  }
1706
+ rep_->props.user_defined_timestamps_persisted =
1707
+ rep_->persist_user_defined_timestamps;
1646
1708
 
1647
1709
  // Add basic properties
1648
1710
  property_block_builder.AddTableProperty(rep_->props);
@@ -1716,16 +1778,20 @@ void BlockBasedTableBuilder::WriteRangeDelBlock(
1716
1778
 
1717
1779
  void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
1718
1780
  BlockHandle& index_block_handle) {
1781
+ assert(ok());
1719
1782
  Rep* r = rep_;
1720
1783
  // this is guaranteed by BlockBasedTableBuilder's constructor
1721
1784
  assert(r->table_options.checksum == kCRC32c ||
1722
1785
  r->table_options.format_version != 0);
1723
- assert(ok());
1724
-
1725
1786
  FooterBuilder footer;
1726
- footer.Build(kBlockBasedTableMagicNumber, r->table_options.format_version,
1727
- r->get_offset(), r->table_options.checksum,
1728
- metaindex_block_handle, index_block_handle);
1787
+ Status s = footer.Build(kBlockBasedTableMagicNumber,
1788
+ r->table_options.format_version, r->get_offset(),
1789
+ r->table_options.checksum, metaindex_block_handle,
1790
+ index_block_handle, r->base_context_checksum);
1791
+ if (!s.ok()) {
1792
+ r->SetStatus(s);
1793
+ return;
1794
+ }
1729
1795
  IOStatus ios = r->file->Append(footer.GetSlice());
1730
1796
  if (ios.ok()) {
1731
1797
  r->set_offset(r->get_offset() + footer.GetSlice().size());
@@ -1811,7 +1877,9 @@ void BlockBasedTableBuilder::EnterUnbuffered() {
1811
1877
 
1812
1878
  Block reader{BlockContents{data_block}};
1813
1879
  DataBlockIter* iter = reader.NewDataIterator(
1814
- r->internal_comparator.user_comparator(), kDisableGlobalSequenceNumber);
1880
+ r->internal_comparator.user_comparator(), kDisableGlobalSequenceNumber,
1881
+ nullptr /* iter */, nullptr /* stats */,
1882
+ false /* block_contents_pinned */, r->persist_user_defined_timestamps);
1815
1883
 
1816
1884
  iter->SeekToFirst();
1817
1885
  assert(iter->Valid());
@@ -1857,9 +1925,8 @@ void BlockBasedTableBuilder::EnterUnbuffered() {
1857
1925
  for (; iter->Valid(); iter->Next()) {
1858
1926
  Slice key = iter->key();
1859
1927
  if (r->filter_builder != nullptr) {
1860
- size_t ts_sz =
1861
- r->internal_comparator.user_comparator()->timestamp_size();
1862
- r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz));
1928
+ r->filter_builder->Add(
1929
+ ExtractUserKeyAndStripTimestamp(key, r->ts_sz));
1863
1930
  }
1864
1931
  r->index_builder->OnKeyAdded(key);
1865
1932
  }
@@ -1939,10 +2006,14 @@ Status BlockBasedTableBuilder::Finish() {
1939
2006
  WriteFooter(metaindex_block_handle, index_block_handle);
1940
2007
  }
1941
2008
  r->state = Rep::State::kClosed;
1942
- r->SetStatus(r->CopyIOStatus());
1943
- Status ret_status = r->CopyStatus();
1944
- assert(!ret_status.ok() || io_status().ok());
1945
2009
  r->tail_size = r->offset - r->props.tail_start_offset;
2010
+
2011
+ Status ret_status = r->CopyStatus();
2012
+ IOStatus ios = r->GetIOStatus();
2013
+ if (!ios.ok() && ret_status.ok()) {
2014
+ // Let io_status supersede ok status (otherwise status takes precedennce)
2015
+ ret_status = ios;
2016
+ }
1946
2017
  return ret_status;
1947
2018
  }
1948
2019
 
@@ -1952,8 +2023,10 @@ void BlockBasedTableBuilder::Abandon() {
1952
2023
  StopParallelCompression();
1953
2024
  }
1954
2025
  rep_->state = Rep::State::kClosed;
2026
+ #ifdef ROCKSDB_ASSERT_STATUS_CHECKED // Avoid unnecessary lock acquisition
1955
2027
  rep_->CopyStatus().PermitUncheckedError();
1956
2028
  rep_->CopyIOStatus().PermitUncheckedError();
2029
+ #endif // ROCKSDB_ASSERT_STATUS_CHECKED
1957
2030
  }
1958
2031
 
1959
2032
  uint64_t BlockBasedTableBuilder::NumEntries() const {
@@ -83,7 +83,7 @@ size_t TailPrefetchStats::GetSuggestedPrefetchSize() {
83
83
  //
84
84
  // and we use every of the value as a candidate, and estimate how much we
85
85
  // wasted, compared to read. For example, when we use the 3rd record
86
- // as candiate. This area is what we read:
86
+ // as candidate. This area is what we read:
87
87
  // +---+
88
88
  // +---+ | |
89
89
  // | | | |
@@ -123,7 +123,7 @@ size_t TailPrefetchStats::GetSuggestedPrefetchSize() {
123
123
  // +---+ +---+ +---+ +---+ +---+
124
124
  //
125
125
  // Which can be calculated iteratively.
126
- // The difference between wasted using 4st and 3rd record, will
126
+ // The difference between wasted using 4th and 3rd record, will
127
127
  // be following area:
128
128
  // +---+
129
129
  // +--+ +-+ ++ +-+ +-+ +---+ | |
@@ -143,8 +143,8 @@ size_t TailPrefetchStats::GetSuggestedPrefetchSize() {
143
143
  // | | | | | | | | | |
144
144
  // +---+ +---+ +---+ +---+ +---+
145
145
  //
146
- // which will be the size difference between 4st and 3rd record,
147
- // times 3, which is number of records before the 4st.
146
+ // which will be the size difference between 4th and 3rd record,
147
+ // times 3, which is number of records before the 4th.
148
148
  // Here we assume that all data within the prefetch range will be useful. In
149
149
  // reality, it may not be the case when a partial block is inside the range,
150
150
  // or there are data in the middle that is not read. We ignore those cases
@@ -576,7 +576,8 @@ Status BlockBasedTableFactory::NewTableReader(
576
576
  table_reader_options.block_cache_tracer,
577
577
  table_reader_options.max_file_size_for_l0_meta_pin,
578
578
  table_reader_options.cur_db_session_id, table_reader_options.cur_file_num,
579
- table_reader_options.unique_id);
579
+ table_reader_options.unique_id,
580
+ table_reader_options.user_defined_timestamps_persisted);
580
581
  }
581
582
 
582
583
  TableBuilder* BlockBasedTableFactory::NewTableBuilder(
@@ -79,6 +79,22 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target,
79
79
  }
80
80
  }
81
81
 
82
+ if (read_options_.auto_readahead_size && read_options_.iterate_upper_bound &&
83
+ is_first_pass) {
84
+ FindReadAheadSizeUpperBound();
85
+ if (target) {
86
+ index_iter_->Seek(*target);
87
+ } else {
88
+ index_iter_->SeekToFirst();
89
+ }
90
+
91
+ // Check for IO error.
92
+ if (!index_iter_->Valid()) {
93
+ ResetDataIter();
94
+ return;
95
+ }
96
+ }
97
+
82
98
  IndexValue v = index_iter_->value();
83
99
  const bool same_block = block_iter_points_to_real_block_ &&
84
100
  v.handle.offset() == prev_block_offset_;
@@ -281,7 +297,7 @@ void BlockBasedTableIterator::InitDataBlock() {
281
297
  // Enabled from the very first IO when ReadOptions.readahead_size is set.
282
298
  block_prefetcher_.PrefetchIfNeeded(
283
299
  rep, data_block_handle, read_options_.readahead_size, is_for_compaction,
284
- /*no_sequential_checking=*/false, read_options_.rate_limiter_priority);
300
+ /*no_sequential_checking=*/false, read_options_);
285
301
  Status s;
286
302
  table_->NewDataBlockIterator<DataBlockIter>(
287
303
  read_options_, data_block_handle, &block_iter_, BlockType::kData,
@@ -326,7 +342,7 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) {
326
342
  block_prefetcher_.PrefetchIfNeeded(
327
343
  rep, data_block_handle, read_options_.readahead_size,
328
344
  is_for_compaction, /*no_sequential_checking=*/read_options_.async_io,
329
- read_options_.rate_limiter_priority);
345
+ read_options_);
330
346
 
331
347
  Status s;
332
348
  table_->NewDataBlockIterator<DataBlockIter>(
@@ -497,4 +513,46 @@ void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() {
497
513
  : BlockUpperBound::kUpperBoundInCurBlock;
498
514
  }
499
515
  }
516
+
517
+ void BlockBasedTableIterator::FindReadAheadSizeUpperBound() {
518
+ size_t total_bytes_till_upper_bound = 0;
519
+ size_t footer = table_->get_rep()->footer.GetBlockTrailerSize();
520
+ uint64_t start_offset = index_iter_->value().handle.offset();
521
+
522
+ do {
523
+ BlockHandle block_handle = index_iter_->value().handle;
524
+ total_bytes_till_upper_bound += block_handle.size();
525
+ total_bytes_till_upper_bound += footer;
526
+
527
+ // Can't figure out for current block if current block
528
+ // is out of bound. But for next block we can find that.
529
+ // If curr block's index key >= iterate_upper_bound, it
530
+ // means all the keys in next block or above are out of
531
+ // bound.
532
+ bool next_block_out_of_bound =
533
+ (user_comparator_.CompareWithoutTimestamp(
534
+ index_iter_->user_key(),
535
+ /*a_has_ts=*/true, *read_options_.iterate_upper_bound,
536
+ /*b_has_ts=*/false) >= 0
537
+ ? true
538
+ : false);
539
+
540
+ if (next_block_out_of_bound) {
541
+ break;
542
+ }
543
+
544
+ // Since next block is not out of bound, iterate to that
545
+ // index block and add it's Data block size to
546
+ // readahead_size.
547
+ index_iter_->Next();
548
+
549
+ if (!index_iter_->Valid()) {
550
+ break;
551
+ }
552
+
553
+ } while (true);
554
+
555
+ block_prefetcher_.SetUpperBoundOffset(start_offset +
556
+ total_bytes_till_upper_bound);
557
+ }
500
558
  } // namespace ROCKSDB_NAMESPACE
@@ -306,5 +306,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
306
306
  }
307
307
  return true;
308
308
  }
309
+
310
+ void FindReadAheadSizeUpperBound();
309
311
  };
310
312
  } // namespace ROCKSDB_NAMESPACE