@nxtedition/rocksdb 7.1.20 → 7.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -6
  2. package/deps/rocksdb/rocksdb/Makefile +1 -1
  3. package/deps/rocksdb/rocksdb/TARGETS +2 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +1 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +4 -4
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +139 -161
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +92 -82
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +16 -3
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -3
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +73 -30
  11. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +25 -67
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +41 -40
  13. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +109 -155
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.h +127 -149
  15. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +75 -80
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +22 -172
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +272 -85
  18. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -4
  19. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +1 -0
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +1 -0
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -0
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +1 -0
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +1 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +13 -4
  27. package/deps/rocksdb/rocksdb/db/builder.cc +1 -1
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +15 -1
  29. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +1 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +1 -0
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +25 -7
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +10 -0
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +22 -8
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +14 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +38 -12
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -6
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +408 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +244 -54
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +27 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +25 -30
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +87 -26
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -4
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +61 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +294 -21
  46. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -0
  47. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  48. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -10
  49. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +4 -4
  50. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +272 -0
  51. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +38 -0
  52. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +69 -25
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +7 -3
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +29 -12
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +0 -12
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +10 -4
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +35 -22
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +5 -1
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +40 -5
  60. package/deps/rocksdb/rocksdb/db/db_iter.cc +1 -0
  61. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -0
  62. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +22 -0
  64. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +1 -0
  65. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +1 -0
  66. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +72 -5
  67. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +60 -21
  68. package/deps/rocksdb/rocksdb/db/db_test.cc +170 -1
  69. package/deps/rocksdb/rocksdb/db/db_test2.cc +9 -3
  70. package/deps/rocksdb/rocksdb/db/db_test_util.cc +19 -0
  71. package/deps/rocksdb/rocksdb/db/db_test_util.h +32 -0
  72. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +444 -3
  73. package/deps/rocksdb/rocksdb/db/db_write_test.cc +8 -8
  74. package/deps/rocksdb/rocksdb/db/dbformat.cc +13 -0
  75. package/deps/rocksdb/rocksdb/db/dbformat.h +59 -4
  76. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +1 -0
  77. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -1
  78. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +24 -3
  79. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -0
  80. package/deps/rocksdb/rocksdb/db/filename_test.cc +1 -0
  81. package/deps/rocksdb/rocksdb/db/flush_job.cc +4 -3
  82. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -0
  83. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +85 -43
  84. package/deps/rocksdb/rocksdb/db/forward_iterator.h +3 -1
  85. package/deps/rocksdb/rocksdb/db/internal_stats.cc +33 -6
  86. package/deps/rocksdb/rocksdb/db/internal_stats.h +6 -0
  87. package/deps/rocksdb/rocksdb/db/listener_test.cc +1 -0
  88. package/deps/rocksdb/rocksdb/db/log_test.cc +1 -0
  89. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  90. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  91. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +1 -0
  92. package/deps/rocksdb/rocksdb/db/memtable.cc +158 -56
  93. package/deps/rocksdb/rocksdb/db/memtable.h +2 -0
  94. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +1 -0
  95. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +1 -0
  96. package/deps/rocksdb/rocksdb/db/options_file_test.cc +1 -0
  97. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +1 -0
  98. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +1 -0
  99. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  100. package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
  101. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +52 -9
  102. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +31 -2
  103. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +1 -0
  104. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +81 -42
  105. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +78 -12
  106. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -0
  107. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -0
  108. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +154 -27
  109. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +21 -4
  110. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +4 -1
  111. package/deps/rocksdb/rocksdb/db/table_cache.cc +18 -6
  112. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -0
  113. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1 -0
  114. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +1 -0
  115. package/deps/rocksdb/rocksdb/db/version_set.cc +15 -7
  116. package/deps/rocksdb/rocksdb/db/version_set.h +2 -1
  117. package/deps/rocksdb/rocksdb/db/version_set_test.cc +1 -0
  118. package/deps/rocksdb/rocksdb/db/version_util.h +3 -1
  119. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -0
  120. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +28 -9
  121. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +21 -0
  122. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +30 -0
  123. package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +4 -0
  124. package/deps/rocksdb/rocksdb/db/write_batch.cc +30 -7
  125. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +24 -13
  126. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +5 -4
  127. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +1 -0
  128. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +1 -0
  129. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +104 -60
  130. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +199 -108
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +39 -0
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -0
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -1
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +19 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +26 -0
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -118
  137. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +24 -4
  138. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -0
  139. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +129 -1
  140. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +22 -0
  141. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
  142. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +312 -117
  143. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +1 -0
  144. package/deps/rocksdb/rocksdb/env/fs_posix.cc +10 -2
  145. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +1 -0
  146. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +1 -0
  147. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +5 -1
  148. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +1 -0
  149. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1 -0
  150. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +1 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +49 -1
  152. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +44 -18
  153. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +8 -7
  154. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -1
  155. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  156. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -4
  157. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +4 -0
  158. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +7 -0
  159. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +1 -1
  160. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +9 -0
  161. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -6
  162. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -0
  163. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +1 -0
  164. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +1 -0
  165. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -88
  166. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  167. package/deps/rocksdb/rocksdb/memory/arena_test.cc +61 -0
  168. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -0
  169. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +1 -0
  170. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -0
  171. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -0
  172. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +1 -0
  173. package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +1 -0
  174. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  175. package/deps/rocksdb/rocksdb/options/cf_options.h +8 -0
  176. package/deps/rocksdb/rocksdb/options/configurable_test.cc +1 -0
  177. package/deps/rocksdb/rocksdb/options/options.cc +7 -0
  178. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  179. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -0
  180. package/deps/rocksdb/rocksdb/options/options_test.cc +63 -40
  181. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  182. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  183. package/deps/rocksdb/rocksdb/port/stack_trace.cc +7 -0
  184. package/deps/rocksdb/rocksdb/port/stack_trace.h +4 -1
  185. package/deps/rocksdb/rocksdb/port/win/port_win.h +2 -7
  186. package/deps/rocksdb/rocksdb/src.mk +1 -0
  187. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +7 -7
  188. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3 -3
  189. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +1 -0
  190. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -0
  191. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +1 -0
  192. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -0
  193. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -0
  194. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +1 -0
  195. package/deps/rocksdb/rocksdb/table/get_context.cc +19 -1
  196. package/deps/rocksdb/rocksdb/table/get_context.h +9 -0
  197. package/deps/rocksdb/rocksdb/table/merger_test.cc +1 -0
  198. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -11
  199. package/deps/rocksdb/rocksdb/table/mock_table.cc +37 -19
  200. package/deps/rocksdb/rocksdb/table/mock_table.h +5 -1
  201. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +6 -0
  202. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +33 -0
  203. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +45 -6
  204. package/deps/rocksdb/rocksdb/test_util/testharness.h +2 -0
  205. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +1 -0
  206. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +5 -0
  207. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +1 -0
  208. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +1 -0
  209. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +36 -0
  210. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  211. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +1 -0
  212. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +1 -0
  213. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +1 -0
  214. package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -0
  215. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -0
  216. package/deps/rocksdb/rocksdb/util/coding_test.cc +1 -0
  217. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +1 -0
  218. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +1 -0
  219. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -0
  220. package/deps/rocksdb/rocksdb/util/filelock_test.cc +1 -0
  221. package/deps/rocksdb/rocksdb/util/gflags_compat.h +12 -7
  222. package/deps/rocksdb/rocksdb/util/hash_test.cc +1 -0
  223. package/deps/rocksdb/rocksdb/util/heap_test.cc +4 -2
  224. package/deps/rocksdb/rocksdb/util/random_test.cc +1 -0
  225. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +1 -0
  226. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +1 -0
  227. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +1 -0
  228. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +1 -0
  229. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +1 -0
  230. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +1 -0
  231. package/deps/rocksdb/rocksdb/util/timer_test.cc +1 -0
  232. package/deps/rocksdb/rocksdb/util/work_queue_test.cc +4 -0
  233. package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +1 -0
  234. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +13 -0
  235. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -3
  236. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +1 -0
  237. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +1 -0
  238. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +1 -0
  239. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +1 -0
  240. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +1 -0
  241. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +1 -0
  242. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +1 -0
  243. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +8 -0
  244. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -0
  245. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +1 -0
  246. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +1 -0
  247. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +1 -0
  248. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -0
  249. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +1 -0
  250. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -0
  251. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +1 -0
  252. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1 -0
  253. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +1 -0
  254. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +1 -0
  255. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -0
  256. package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +1 -0
  257. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +7 -0
  258. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +20 -0
  259. package/index.js +12 -4
  260. package/package.json +1 -1
  261. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  262. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -76,14 +76,131 @@ IOStatus CompactionOutputs::WriterSyncClose(const Status& input_status,
76
76
  return io_s;
77
77
  }
78
78
 
79
+ size_t CompactionOutputs::UpdateGrandparentBoundaryInfo(
80
+ const Slice& internal_key) {
81
+ size_t curr_key_boundary_switched_num = 0;
82
+ const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
83
+
84
+ if (grandparents.empty()) {
85
+ return curr_key_boundary_switched_num;
86
+ }
87
+ assert(!internal_key.empty());
88
+ InternalKey ikey;
89
+ ikey.DecodeFrom(internal_key);
90
+ assert(ikey.Valid());
91
+
92
+ const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
93
+
94
+ // Move the grandparent_index_ to the file containing the current user_key.
95
+ // If there are multiple files containing the same user_key, make sure the
96
+ // index points to the last file containing the key.
97
+ while (grandparent_index_ < grandparents.size()) {
98
+ if (being_grandparent_gap_) {
99
+ if (sstableKeyCompare(ucmp, ikey,
100
+ grandparents[grandparent_index_]->smallest) < 0) {
101
+ break;
102
+ }
103
+ if (seen_key_) {
104
+ curr_key_boundary_switched_num++;
105
+ grandparent_overlapped_bytes_ +=
106
+ grandparents[grandparent_index_]->fd.GetFileSize();
107
+ grandparent_boundary_switched_num_++;
108
+ }
109
+ being_grandparent_gap_ = false;
110
+ } else {
111
+ int cmp_result = sstableKeyCompare(
112
+ ucmp, ikey, grandparents[grandparent_index_]->largest);
113
+ // If it's same key, make sure grandparent_index_ is pointing to the last
114
+ // one.
115
+ if (cmp_result < 0 ||
116
+ (cmp_result == 0 &&
117
+ (grandparent_index_ == grandparents.size() - 1 ||
118
+ sstableKeyCompare(ucmp, ikey,
119
+ grandparents[grandparent_index_ + 1]->smallest) <
120
+ 0))) {
121
+ break;
122
+ }
123
+ if (seen_key_) {
124
+ curr_key_boundary_switched_num++;
125
+ grandparent_boundary_switched_num_++;
126
+ }
127
+ being_grandparent_gap_ = true;
128
+ grandparent_index_++;
129
+ }
130
+ }
131
+
132
+ // If the first key is in the middle of a grandparent file, adding it to the
133
+ // overlap
134
+ if (!seen_key_ && !being_grandparent_gap_) {
135
+ assert(grandparent_overlapped_bytes_ == 0);
136
+ grandparent_overlapped_bytes_ =
137
+ GetCurrentKeyGrandparentOverlappedBytes(internal_key);
138
+ }
139
+
140
+ seen_key_ = true;
141
+ return curr_key_boundary_switched_num;
142
+ }
143
+
144
+ uint64_t CompactionOutputs::GetCurrentKeyGrandparentOverlappedBytes(
145
+ const Slice& internal_key) const {
146
+ // no overlap with any grandparent file
147
+ if (being_grandparent_gap_) {
148
+ return 0;
149
+ }
150
+ uint64_t overlapped_bytes = 0;
151
+
152
+ const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
153
+ const Comparator* ucmp = compaction_->column_family_data()->user_comparator();
154
+ InternalKey ikey;
155
+ ikey.DecodeFrom(internal_key);
156
+ #ifndef NDEBUG
157
+ // make sure the grandparent_index_ is pointing to the last files containing
158
+ // the current key.
159
+ int cmp_result =
160
+ sstableKeyCompare(ucmp, ikey, grandparents[grandparent_index_]->largest);
161
+ assert(
162
+ cmp_result < 0 ||
163
+ (cmp_result == 0 &&
164
+ (grandparent_index_ == grandparents.size() - 1 ||
165
+ sstableKeyCompare(
166
+ ucmp, ikey, grandparents[grandparent_index_ + 1]->smallest) < 0)));
167
+ assert(sstableKeyCompare(ucmp, ikey,
168
+ grandparents[grandparent_index_]->smallest) >= 0);
169
+ #endif
170
+ overlapped_bytes += grandparents[grandparent_index_]->fd.GetFileSize();
171
+
172
+ // go backwards to find all overlapped files, one key can overlap multiple
173
+ // files. In the following example, if the current output key is `c`, and one
174
+ // compaction file was cut before `c`, current `c` can overlap with 3 files:
175
+ // [a b] [c...
176
+ // [b, b] [c, c] [c, c] [c, d]
177
+ for (int64_t i = static_cast<int64_t>(grandparent_index_) - 1;
178
+ i >= 0 && sstableKeyCompare(ucmp, ikey, grandparents[i]->largest) == 0;
179
+ i--) {
180
+ overlapped_bytes += grandparents[i]->fd.GetFileSize();
181
+ }
182
+
183
+ return overlapped_bytes;
184
+ }
185
+
79
186
  bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
80
187
  assert(c_iter.Valid());
81
188
 
189
+ // always update grandparent information like overlapped file number, size
190
+ // etc.
191
+ const Slice& internal_key = c_iter.key();
192
+ const uint64_t previous_overlapped_bytes = grandparent_overlapped_bytes_;
193
+ size_t num_grandparent_boundaries_crossed =
194
+ UpdateGrandparentBoundaryInfo(internal_key);
195
+
196
+ if (!HasBuilder()) {
197
+ return false;
198
+ }
199
+
82
200
  // If there's user defined partitioner, check that first
83
- if (HasBuilder() && partitioner_ &&
84
- partitioner_->ShouldPartition(
85
- PartitionerRequest(last_key_for_partitioner_, c_iter.user_key(),
86
- current_output_file_size_)) == kRequired) {
201
+ if (partitioner_ && partitioner_->ShouldPartition(PartitionerRequest(
202
+ last_key_for_partitioner_, c_iter.user_key(),
203
+ current_output_file_size_)) == kRequired) {
87
204
  return true;
88
205
  }
89
206
 
@@ -92,12 +209,11 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
92
209
  return false;
93
210
  }
94
211
 
95
- // reach the target file size
212
+ // reach the max file size
96
213
  if (current_output_file_size_ >= compaction_->max_output_file_size()) {
97
214
  return true;
98
215
  }
99
216
 
100
- const Slice& internal_key = c_iter.key();
101
217
  const InternalKeyComparator* icmp =
102
218
  &compaction_->column_family_data()->internal_comparator();
103
219
 
@@ -111,32 +227,67 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {
111
227
  }
112
228
  }
113
229
 
114
- // Update grandparent information
115
- const std::vector<FileMetaData*>& grandparents = compaction_->grandparents();
116
- bool grandparant_file_switched = false;
117
- // Scan to find the earliest grandparent file that contains key.
118
- while (grandparent_index_ < grandparents.size() &&
119
- icmp->Compare(internal_key,
120
- grandparents[grandparent_index_]->largest.Encode()) >
121
- 0) {
122
- if (seen_key_) {
123
- overlapped_bytes_ += grandparents[grandparent_index_]->fd.GetFileSize();
124
- grandparant_file_switched = true;
230
+ // only check if the current key is going to cross the grandparents file
231
+ // boundary (either the file beginning or ending).
232
+ if (num_grandparent_boundaries_crossed > 0) {
233
+ // Cut the file before the current key if the size of the current output
234
+ // file + its overlapped grandparent files is bigger than
235
+ // max_compaction_bytes. Which is to prevent future bigger than
236
+ // max_compaction_bytes compaction from the current output level.
237
+ if (grandparent_overlapped_bytes_ + current_output_file_size_ >
238
+ compaction_->max_compaction_bytes()) {
239
+ return true;
125
240
  }
126
- assert(grandparent_index_ + 1 >= grandparents.size() ||
127
- icmp->Compare(
128
- grandparents[grandparent_index_]->largest.Encode(),
129
- grandparents[grandparent_index_ + 1]->smallest.Encode()) <= 0);
130
- grandparent_index_++;
131
- }
132
- seen_key_ = true;
133
241
 
134
- if (grandparant_file_switched &&
135
- overlapped_bytes_ + current_output_file_size_ >
136
- compaction_->max_compaction_bytes()) {
137
- // Too much overlap for current output; start new output
138
- overlapped_bytes_ = 0;
139
- return true;
242
+ // Cut the file if including the key is going to add a skippable file on
243
+ // the grandparent level AND its size is reasonably big (1/8 of target file
244
+ // size). For example, if it's compacting the files L0 + L1:
245
+ // L0: [1, 21]
246
+ // L1: [3, 23]
247
+ // L2: [2, 4] [11, 15] [22, 24]
248
+ // Without this break, it will output as:
249
+ // L1: [1,3, 21,23]
250
+ // With this break, it will output as (assuming [11, 15] at L2 is bigger
251
+ // than 1/8 of target size):
252
+ // L1: [1,3] [21,23]
253
+ // Then for the future compactions, [11,15] won't be included.
254
+ // For random datasets (either evenly distributed or skewed), it rarely
255
+ // triggers this condition, but if the user is adding 2 different datasets
256
+ // without any overlap, it may likely happen.
257
+ // More details, check PR #1963
258
+ const size_t num_skippable_boundaries_crossed =
259
+ being_grandparent_gap_ ? 2 : 3;
260
+ if (compaction_->immutable_options()->compaction_style ==
261
+ kCompactionStyleLevel &&
262
+ compaction_->immutable_options()->level_compaction_dynamic_file_size &&
263
+ num_grandparent_boundaries_crossed >=
264
+ num_skippable_boundaries_crossed &&
265
+ grandparent_overlapped_bytes_ - previous_overlapped_bytes >
266
+ compaction_->target_output_file_size() / 8) {
267
+ return true;
268
+ }
269
+
270
+ // Pre-cut the output file if it's reaching a certain size AND it's at the
271
+ // boundary of a grandparent file. It can reduce the future compaction size,
272
+ // the cost is having smaller files.
273
+ // The pre-cut size threshold is based on how many grandparent boundaries
274
+ // it has seen before. Basically, if it has seen no boundary at all, then it
275
+ // will pre-cut at 50% target file size. Every boundary it has seen
276
+ // increases the threshold by 5%, max at 90%, which it will always cut.
277
+ // The idea is based on if it has seen more boundaries before, it will more
278
+ // likely to see another boundary (file cutting opportunity) before the
279
+ // target file size. The test shows it can generate larger files than a
280
+ // static threshold like 75% and has a similar write amplification
281
+ // improvement.
282
+ if (compaction_->immutable_options()->compaction_style ==
283
+ kCompactionStyleLevel &&
284
+ compaction_->immutable_options()->level_compaction_dynamic_file_size &&
285
+ current_output_file_size_ >=
286
+ ((compaction_->target_output_file_size() + 99) / 100) *
287
+ (50 + std::min(grandparent_boundary_switched_num_ * 5,
288
+ size_t{40}))) {
289
+ return true;
290
+ }
140
291
  }
141
292
 
142
293
  // check ttl file boundaries if there's any
@@ -189,6 +340,10 @@ Status CompactionOutputs::AddToOutput(
189
340
  if (!s.ok()) {
190
341
  return s;
191
342
  }
343
+ // reset grandparent information
344
+ grandparent_boundary_switched_num_ = 0;
345
+ grandparent_overlapped_bytes_ =
346
+ GetCurrentKeyGrandparentOverlappedBytes(key);
192
347
  }
193
348
 
194
349
  // Open output file if necessary
@@ -199,10 +354,9 @@ Status CompactionOutputs::AddToOutput(
199
354
  }
200
355
  }
201
356
 
202
- Output& curr = current_output();
203
357
  assert(builder_ != nullptr);
204
358
  const Slice& value = c_iter.value();
205
- s = curr.validator.Add(key, value);
359
+ s = current_output().validator.Add(key, value);
206
360
  if (!s.ok()) {
207
361
  return s;
208
362
  }
@@ -232,10 +386,10 @@ Status CompactionOutputs::AddToOutput(
232
386
  }
233
387
 
234
388
  Status CompactionOutputs::AddRangeDels(
235
- const Slice* comp_start, const Slice* comp_end,
389
+ const Slice* comp_start_user_key, const Slice* comp_end_user_key,
236
390
  CompactionIterationStats& range_del_out_stats, bool bottommost_level,
237
391
  const InternalKeyComparator& icmp, SequenceNumber earliest_snapshot,
238
- const Slice& next_table_min_key) {
392
+ const Slice& next_table_min_key, const std::string& full_history_ts_low) {
239
393
  assert(HasRangeDel());
240
394
  FileMetaData& meta = current_output().meta;
241
395
  const Comparator* ucmp = icmp.user_comparator();
@@ -249,7 +403,7 @@ Status CompactionOutputs::AddRangeDels(
249
403
  if (output_size == 1) {
250
404
  // For the first output table, include range tombstones before the min
251
405
  // key but after the subcompaction boundary.
252
- lower_bound = comp_start;
406
+ lower_bound = comp_start_user_key;
253
407
  lower_bound_from_sub_compact = true;
254
408
  } else if (meta.smallest.size() > 0) {
255
409
  // For subsequent output tables, only include range tombstones from min
@@ -269,21 +423,22 @@ Status CompactionOutputs::AddRangeDels(
269
423
  // use the smaller key as the upper bound of the output file, to ensure
270
424
  // that there is no overlapping between different output files.
271
425
  upper_bound_guard = ExtractUserKey(next_table_min_key);
272
- if (comp_end != nullptr &&
273
- ucmp->Compare(upper_bound_guard, *comp_end) >= 0) {
274
- upper_bound = comp_end;
426
+ if (comp_end_user_key != nullptr &&
427
+ ucmp->CompareWithoutTimestamp(upper_bound_guard, *comp_end_user_key) >=
428
+ 0) {
429
+ upper_bound = comp_end_user_key;
275
430
  } else {
276
431
  upper_bound = &upper_bound_guard;
277
432
  }
278
433
  } else {
279
434
  // This is the last file in the subcompaction, so extend until the
280
435
  // subcompaction ends.
281
- upper_bound = comp_end;
436
+ upper_bound = comp_end_user_key;
282
437
  }
283
438
  bool has_overlapping_endpoints;
284
439
  if (upper_bound != nullptr && meta.largest.size() > 0) {
285
- has_overlapping_endpoints =
286
- ucmp->Compare(meta.largest.user_key(), *upper_bound) == 0;
440
+ has_overlapping_endpoints = ucmp->CompareWithoutTimestamp(
441
+ meta.largest.user_key(), *upper_bound) == 0;
287
442
  } else {
288
443
  has_overlapping_endpoints = false;
289
444
  }
@@ -292,8 +447,8 @@ Status CompactionOutputs::AddRangeDels(
292
447
  // bound. If the end of subcompaction is null or the upper bound is null,
293
448
  // it means that this file is the last file in the compaction. So there
294
449
  // will be no overlapping between this file and others.
295
- assert(comp_end == nullptr || upper_bound == nullptr ||
296
- ucmp->Compare(*upper_bound, *comp_end) <= 0);
450
+ assert(comp_end_user_key == nullptr || upper_bound == nullptr ||
451
+ ucmp->CompareWithoutTimestamp(*upper_bound, *comp_end_user_key) <= 0);
297
452
  auto it = range_del_agg_->NewIterator(lower_bound, upper_bound,
298
453
  has_overlapping_endpoints);
299
454
  // Position the range tombstone output iterator. There may be tombstone
@@ -307,7 +462,8 @@ Status CompactionOutputs::AddRangeDels(
307
462
  for (; it->Valid(); it->Next()) {
308
463
  auto tombstone = it->Tombstone();
309
464
  if (upper_bound != nullptr) {
310
- int cmp = ucmp->Compare(*upper_bound, tombstone.start_key_);
465
+ int cmp =
466
+ ucmp->CompareWithoutTimestamp(*upper_bound, tombstone.start_key_);
311
467
  if ((has_overlapping_endpoints && cmp < 0) ||
312
468
  (!has_overlapping_endpoints && cmp <= 0)) {
313
469
  // Tombstones starting after upper_bound only need to be included in
@@ -320,7 +476,17 @@ Status CompactionOutputs::AddRangeDels(
320
476
  }
321
477
  }
322
478
 
323
- if (bottommost_level && tombstone.seq_ <= earliest_snapshot) {
479
+ const size_t ts_sz = ucmp->timestamp_size();
480
+ // Garbage collection for range tombstones.
481
+ // If user-defined timestamp is enabled, range tombstones are dropped if
482
+ // they are at bottommost_level, below full_history_ts_low and not visible
483
+ // in any snapshot. trim_ts_ is passed to the constructor for
484
+ // range_del_agg_, and range_del_agg_ internally drops tombstones above
485
+ // trim_ts_.
486
+ if (bottommost_level && tombstone.seq_ <= earliest_snapshot &&
487
+ (ts_sz == 0 ||
488
+ (!full_history_ts_low.empty() &&
489
+ ucmp->CompareTimestamp(tombstone.ts_, full_history_ts_low) < 0))) {
324
490
  // TODO(andrewkr): tombstones that span multiple output files are
325
491
  // counted for each compaction output file, so lots of double
326
492
  // counting.
@@ -331,12 +497,13 @@ Status CompactionOutputs::AddRangeDels(
331
497
 
332
498
  auto kv = tombstone.Serialize();
333
499
  assert(lower_bound == nullptr ||
334
- ucmp->Compare(*lower_bound, kv.second) < 0);
500
+ ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0);
335
501
  // Range tombstone is not supported by output validator yet.
336
502
  builder_->Add(kv.first.Encode(), kv.second);
337
503
  InternalKey smallest_candidate = std::move(kv.first);
338
504
  if (lower_bound != nullptr &&
339
- ucmp->Compare(smallest_candidate.user_key(), *lower_bound) <= 0) {
505
+ ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(),
506
+ *lower_bound) <= 0) {
340
507
  // Pretend the smallest key has the same user key as lower_bound
341
508
  // (the max key in the previous table or subcompaction) in order for
342
509
  // files to appear key-space partitioned.
@@ -356,13 +523,23 @@ Status CompactionOutputs::AddRangeDels(
356
523
  // choose lowest seqnum so this file's smallest internal key comes
357
524
  // after the previous file's largest. The fake seqnum is OK because
358
525
  // the read path's file-picking code only considers user key.
359
- smallest_candidate = InternalKey(
360
- *lower_bound, lower_bound_from_sub_compact ? tombstone.seq_ : 0,
361
- kTypeRangeDeletion);
526
+ if (lower_bound_from_sub_compact) {
527
+ if (ts_sz) {
528
+ assert(tombstone.ts_.size() == ts_sz);
529
+ smallest_candidate = InternalKey(*lower_bound, tombstone.seq_,
530
+ kTypeRangeDeletion, tombstone.ts_);
531
+ } else {
532
+ smallest_candidate =
533
+ InternalKey(*lower_bound, tombstone.seq_, kTypeRangeDeletion);
534
+ }
535
+ } else {
536
+ smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
537
+ }
362
538
  }
363
539
  InternalKey largest_candidate = tombstone.SerializeEndKey();
364
540
  if (upper_bound != nullptr &&
365
- ucmp->Compare(*upper_bound, largest_candidate.user_key()) <= 0) {
541
+ ucmp->CompareWithoutTimestamp(*upper_bound,
542
+ largest_candidate.user_key()) <= 0) {
366
543
  // Pretend the largest key has the same user key as upper_bound (the
367
544
  // min key in the following table or subcompaction) in order for files
368
545
  // to appear key-space partitioned.
@@ -376,9 +553,22 @@ Status CompactionOutputs::AddRangeDels(
376
553
  // kMaxSequenceNumber), but with kTypeDeletion (0x7) instead of
377
554
  // kTypeRangeDeletion (0xF), so the range tombstone comes before the
378
555
  // Seek() key in InternalKey's ordering. So Seek() will look in the
379
- // next file for the user key.
380
- largest_candidate =
381
- InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion);
556
+ // next file for the user key
557
+ if (ts_sz) {
558
+ static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
559
+ if (ts_sz <= strlen(kTsMax)) {
560
+ largest_candidate =
561
+ InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion,
562
+ Slice(kTsMax, ts_sz));
563
+ } else {
564
+ largest_candidate =
565
+ InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion,
566
+ std::string(ts_sz, '\xff'));
567
+ }
568
+ } else {
569
+ largest_candidate =
570
+ InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion);
571
+ }
382
572
  }
383
573
  #ifndef NDEBUG
384
574
  SequenceNumber smallest_ikey_seqnum = kMaxSequenceNumber;
@@ -168,11 +168,16 @@ class CompactionOutputs {
168
168
  }
169
169
 
170
170
  // Add range-dels from the aggregator to the current output file
171
- Status AddRangeDels(const Slice* comp_start, const Slice* comp_end,
171
+ // @param comp_start_user_key and comp_end_user_key include timestamp if
172
+ // user-defined timestamp is enabled.
173
+ // @param full_history_ts_low used for range tombstone garbage collection.
174
+ Status AddRangeDels(const Slice* comp_start_user_key,
175
+ const Slice* comp_end_user_key,
172
176
  CompactionIterationStats& range_del_out_stats,
173
177
  bool bottommost_level, const InternalKeyComparator& icmp,
174
178
  SequenceNumber earliest_snapshot,
175
- const Slice& next_table_min_key);
179
+ const Slice& next_table_min_key,
180
+ const std::string& full_history_ts_low);
176
181
 
177
182
  // if the outputs have range delete, range delete is also data
178
183
  bool HasRangeDel() const {
@@ -216,9 +221,16 @@ class CompactionOutputs {
216
221
  }
217
222
  }
218
223
 
219
- uint64_t GetCurrentOutputFileSize() const {
220
- return current_output_file_size_;
221
- }
224
+ // update tracked grandparents information like grandparent index, if it's
225
+ // in the gap between 2 grandparent files, accumulated grandparent files size
226
+ // etc.
227
+ // It returns how many boundaries it crosses by including current key.
228
+ size_t UpdateGrandparentBoundaryInfo(const Slice& internal_key);
229
+
230
+ // helper function to get the overlapped grandparent files size, it's only
231
+ // used for calculating the first key's overlap.
232
+ uint64_t GetCurrentKeyGrandparentOverlappedBytes(
233
+ const Slice& internal_key) const;
222
234
 
223
235
  // Add current key from compaction_iterator to the output file. If needed
224
236
  // close and open new compaction output with the functions provided.
@@ -311,12 +323,21 @@ class CompactionOutputs {
311
323
  // An index that used to speed up ShouldStopBefore().
312
324
  size_t grandparent_index_ = 0;
313
325
 
326
+ // if the output key is being grandparent files gap, so:
327
+ // key > grandparents[grandparent_index_ - 1].largest &&
328
+ // key < grandparents[grandparent_index_].smallest
329
+ bool being_grandparent_gap_ = true;
330
+
314
331
  // The number of bytes overlapping between the current output and
315
332
  // grandparent files used in ShouldStopBefore().
316
- uint64_t overlapped_bytes_ = 0;
333
+ uint64_t grandparent_overlapped_bytes_ = 0;
317
334
 
318
335
  // A flag determines whether the key has been seen in ShouldStopBefore()
319
336
  bool seen_key_ = false;
337
+
338
+ // for the current output file, how many file boundaries has it crossed,
339
+ // basically number of files overlapped * 2
340
+ size_t grandparent_boundary_switched_num_ = 0;
320
341
  };
321
342
 
322
343
  // helper struct to concatenate the last level and penultimate level outputs
@@ -27,16 +27,6 @@
27
27
 
28
28
  namespace ROCKSDB_NAMESPACE {
29
29
 
30
- namespace {
31
- uint64_t TotalCompensatedFileSize(const std::vector<FileMetaData*>& files) {
32
- uint64_t sum = 0;
33
- for (size_t i = 0; i < files.size() && files[i]; i++) {
34
- sum += files[i]->compensated_file_size;
35
- }
36
- return sum;
37
- }
38
- } // anonymous namespace
39
-
40
30
  bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
41
31
  size_t min_files_to_compact,
42
32
  uint64_t max_compact_bytes_per_del_file,
@@ -63,8 +53,6 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
63
53
  return false;
64
54
  }
65
55
  size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
66
- uint64_t compensated_compact_bytes =
67
- level_files[start]->compensated_file_size;
68
56
  size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
69
57
  // Compaction range will be [start, limit).
70
58
  size_t limit;
@@ -73,11 +61,10 @@ bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
73
61
  size_t new_compact_bytes_per_del_file = 0;
74
62
  for (limit = start + 1; limit < level_files.size(); ++limit) {
75
63
  compact_bytes += static_cast<size_t>(level_files[limit]->fd.file_size);
76
- compensated_compact_bytes += level_files[limit]->compensated_file_size;
77
64
  new_compact_bytes_per_del_file = compact_bytes / (limit - start);
78
65
  if (level_files[limit]->being_compacted ||
79
66
  new_compact_bytes_per_del_file > compact_bytes_per_del_file ||
80
- compensated_compact_bytes > max_compaction_bytes) {
67
+ compact_bytes > max_compaction_bytes) {
81
68
  break;
82
69
  }
83
70
  compact_bytes_per_del_file = new_compact_bytes_per_del_file;
@@ -327,12 +314,19 @@ bool CompactionPicker::FilesRangeOverlapWithCompaction(
327
314
  int penultimate_level =
328
315
  Compaction::EvaluatePenultimateLevel(ioptions_, start_level, level);
329
316
  if (penultimate_level != Compaction::kInvalidLevel) {
330
- InternalKey penultimate_smallest, penultimate_largest;
331
- GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
332
- if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
333
- penultimate_largest.user_key(),
334
- penultimate_level)) {
335
- return true;
317
+ if (ioptions_.compaction_style == kCompactionStyleUniversal) {
318
+ if (RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(),
319
+ penultimate_level)) {
320
+ return true;
321
+ }
322
+ } else {
323
+ InternalKey penultimate_smallest, penultimate_largest;
324
+ GetRange(inputs, &penultimate_smallest, &penultimate_largest, level);
325
+ if (RangeOverlapWithCompaction(penultimate_smallest.user_key(),
326
+ penultimate_largest.user_key(),
327
+ penultimate_level)) {
328
+ return true;
329
+ }
336
330
  }
337
331
  }
338
332
 
@@ -507,8 +501,8 @@ bool CompactionPicker::SetupOtherInputs(
507
501
  if (!output_level_inputs->empty()) {
508
502
  const uint64_t limit = mutable_cf_options.max_compaction_bytes;
509
503
  const uint64_t output_level_inputs_size =
510
- TotalCompensatedFileSize(output_level_inputs->files);
511
- const uint64_t inputs_size = TotalCompensatedFileSize(inputs->files);
504
+ TotalFileSize(output_level_inputs->files);
505
+ const uint64_t inputs_size = TotalFileSize(inputs->files);
512
506
  bool expand_inputs = false;
513
507
 
514
508
  CompactionInputFiles expanded_inputs;
@@ -527,13 +521,13 @@ bool CompactionPicker::SetupOtherInputs(
527
521
  &expanded_inputs.files, base_index,
528
522
  nullptr);
529
523
  }
530
- uint64_t expanded_inputs_size =
531
- TotalCompensatedFileSize(expanded_inputs.files);
524
+ uint64_t expanded_inputs_size = TotalFileSize(expanded_inputs.files);
532
525
  if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) {
533
526
  try_overlapping_inputs = false;
534
527
  }
535
528
  if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() &&
536
- output_level_inputs_size + expanded_inputs_size < limit &&
529
+ (mutable_cf_options.ignore_max_compaction_bytes_for_input ||
530
+ output_level_inputs_size + expanded_inputs_size < limit) &&
537
531
  !AreFilesInCompaction(expanded_inputs.files)) {
538
532
  InternalKey new_start, new_limit;
539
533
  GetRange(expanded_inputs, &new_start, &new_limit);
@@ -554,9 +548,10 @@ bool CompactionPicker::SetupOtherInputs(
554
548
  vstorage->GetCleanInputsWithinInterval(input_level, &all_start,
555
549
  &all_limit, &expanded_inputs.files,
556
550
  base_index, nullptr);
557
- expanded_inputs_size = TotalCompensatedFileSize(expanded_inputs.files);
551
+ expanded_inputs_size = TotalFileSize(expanded_inputs.files);
558
552
  if (expanded_inputs.size() > inputs->size() &&
559
- output_level_inputs_size + expanded_inputs_size < limit &&
553
+ (mutable_cf_options.ignore_max_compaction_bytes_for_input ||
554
+ output_level_inputs_size + expanded_inputs_size < limit) &&
560
555
  !AreFilesInCompaction(expanded_inputs.files)) {
561
556
  expand_inputs = true;
562
557
  }
@@ -724,18 +719,18 @@ Compaction* CompactionPicker::CompactRange(
724
719
  }
725
720
  largest = &inputs[i]->largest;
726
721
 
727
- uint64_t s = inputs[i]->compensated_file_size;
722
+ uint64_t input_file_size = inputs[i]->fd.GetFileSize();
728
723
  uint64_t output_level_total = 0;
729
724
  if (output_level < vstorage->num_non_empty_levels()) {
730
725
  std::vector<FileMetaData*> files;
731
726
  vstorage->GetOverlappingInputsRangeBinarySearch(
732
727
  output_level, smallest, largest, &files, hint_index, &hint_index);
733
728
  for (const auto& file : files) {
734
- output_level_total += file->compensated_file_size;
729
+ output_level_total += file->fd.GetFileSize();
735
730
  }
736
731
  }
737
732
 
738
- input_level_total += s;
733
+ input_level_total += input_file_size;
739
734
 
740
735
  if (input_level_total + output_level_total >= limit) {
741
736
  covering_the_whole_range = false;