@nxtedition/rocksdb 7.1.14 → 7.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/binding.cc +1 -0
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
  3. package/deps/rocksdb/rocksdb/Makefile +91 -11
  4. package/deps/rocksdb/rocksdb/TARGETS +8 -4
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
  14. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
  19. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
  20. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
  21. package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
  22. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
  23. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
  24. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
  25. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
  27. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
  30. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
  31. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
  33. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
  34. package/deps/rocksdb/rocksdb/db/c.cc +29 -0
  35. package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
  36. package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
  46. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
  47. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
  48. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
  50. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
  51. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
  54. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
  55. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
  61. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  62. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
  63. package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
  64. package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
  65. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
  66. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
  67. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
  68. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
  69. package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
  70. package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
  71. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
  72. package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
  73. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
  74. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
  75. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
  76. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
  77. package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
  80. package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
  82. package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
  83. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
  84. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
  85. package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
  86. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
  87. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
  88. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
  89. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
  90. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
  91. package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
  92. package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
  93. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
  94. package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
  95. package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
  96. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  97. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
  98. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
  99. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  100. package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
  101. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
  102. package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
  103. package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
  104. package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
  105. package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
  106. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
  107. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
  108. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
  109. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
  111. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
  112. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
  113. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
  114. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
  115. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
  118. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
  119. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
  120. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
  121. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  122. package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
  123. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
  124. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
  125. package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
  126. package/deps/rocksdb/rocksdb/file/filename.h +4 -2
  127. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
  128. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
  129. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
  130. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
  131. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
  132. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
  134. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
  135. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
  136. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
  137. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
  139. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
  140. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
  142. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
  143. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
  144. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  145. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
  148. package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
  149. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
  150. package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
  151. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
  152. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
  153. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
  154. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
  155. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
  156. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
  157. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
  158. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
  159. package/deps/rocksdb/rocksdb/options/options.cc +2 -2
  160. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
  161. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
  162. package/deps/rocksdb/rocksdb/src.mk +4 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
  170. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
  171. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
  172. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
  173. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
  174. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
  175. package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
  176. package/deps/rocksdb/rocksdb/table/format.cc +26 -29
  177. package/deps/rocksdb/rocksdb/table/format.h +44 -26
  178. package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
  179. package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
  180. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
  181. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
  182. package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
  183. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
  184. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
  185. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
  186. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
  187. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
  188. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
  189. package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
  190. package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
  191. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
  192. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
  193. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
  194. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
  195. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
  196. package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
  197. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
  198. package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
  199. package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
  200. package/deps/rocksdb/rocksdb/util/timer.h +2 -3
  201. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
  202. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
  203. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
  204. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
  205. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
  206. package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
  207. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
  208. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
  209. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
  210. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
  211. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
  212. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
  213. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
  214. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
  217. package/deps/rocksdb/rocksdb.gyp +0 -1
  218. package/index.js +6 -10
  219. package/package.json +1 -1
  220. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  221. package/prebuilds/linux-x64/node.napi.node +0 -0
  222. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
  223. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
@@ -116,14 +116,14 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
116
116
 
117
117
  Status TableCache::GetTableReader(
118
118
  const ReadOptions& ro, const FileOptions& file_options,
119
- const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
120
- bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
121
- std::unique_ptr<TableReader>* table_reader,
119
+ const InternalKeyComparator& internal_comparator,
120
+ const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
121
+ HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
122
122
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
123
123
  bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
124
124
  size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
125
- std::string fname =
126
- TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
125
+ std::string fname = TableFileName(
126
+ ioptions_.cf_paths, file_meta.fd.GetNumber(), file_meta.fd.GetPathId());
127
127
  std::unique_ptr<FSRandomAccessFile> file;
128
128
  FileOptions fopts = file_options;
129
129
  fopts.temperature = file_temperature;
@@ -156,14 +156,21 @@ Status TableCache::GetTableReader(
156
156
  record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
157
157
  file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
158
158
  file_temperature, level == ioptions_.num_levels - 1));
159
+ UniqueId64x2 expected_unique_id;
160
+ if (ioptions_.verify_sst_unique_id_in_manifest) {
161
+ expected_unique_id = file_meta.unique_id;
162
+ } else {
163
+ expected_unique_id = kNullUniqueId64x2; // null ID == no verification
164
+ }
159
165
  s = ioptions_.table_factory->NewTableReader(
160
166
  ro,
161
- TableReaderOptions(
162
- ioptions_, prefix_extractor, file_options, internal_comparator,
163
- skip_filters, immortal_tables_, false /* force_direct_prefetch */,
164
- level, fd.largest_seqno, block_cache_tracer_,
165
- max_file_size_for_l0_meta_pin, db_session_id_, fd.GetNumber()),
166
- std::move(file_reader), fd.GetFileSize(), table_reader,
167
+ TableReaderOptions(ioptions_, prefix_extractor, file_options,
168
+ internal_comparator, skip_filters, immortal_tables_,
169
+ false /* force_direct_prefetch */, level,
170
+ block_cache_tracer_, max_file_size_for_l0_meta_pin,
171
+ db_session_id_, file_meta.fd.GetNumber(),
172
+ expected_unique_id, file_meta.fd.largest_seqno),
173
+ std::move(file_reader), file_meta.fd.GetFileSize(), table_reader,
167
174
  prefetch_index_and_filter_in_cache);
168
175
  TEST_SYNC_POINT("TableCache::GetTableReader:0");
169
176
  }
@@ -179,14 +186,14 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
179
186
 
180
187
  Status TableCache::FindTable(
181
188
  const ReadOptions& ro, const FileOptions& file_options,
182
- const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
183
- Cache::Handle** handle,
189
+ const InternalKeyComparator& internal_comparator,
190
+ const FileMetaData& file_meta, Cache::Handle** handle,
184
191
  const std::shared_ptr<const SliceTransform>& prefix_extractor,
185
192
  const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
186
193
  bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
187
194
  size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
188
195
  PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
189
- uint64_t number = fd.GetNumber();
196
+ uint64_t number = file_meta.fd.GetNumber();
190
197
  Slice key = GetSliceForFileNumber(&number);
191
198
  *handle = cache_->Lookup(key);
192
199
  TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",
@@ -204,11 +211,12 @@ Status TableCache::FindTable(
204
211
  }
205
212
 
206
213
  std::unique_ptr<TableReader> table_reader;
207
- Status s = GetTableReader(
208
- ro, file_options, internal_comparator, fd, false /* sequential mode */,
209
- record_read_stats, file_read_hist, &table_reader, prefix_extractor,
210
- skip_filters, level, prefetch_index_and_filter_in_cache,
211
- max_file_size_for_l0_meta_pin, file_temperature);
214
+ Status s =
215
+ GetTableReader(ro, file_options, internal_comparator, file_meta,
216
+ false /* sequential mode */, record_read_stats,
217
+ file_read_hist, &table_reader, prefix_extractor,
218
+ skip_filters, level, prefetch_index_and_filter_in_cache,
219
+ max_file_size_for_l0_meta_pin, file_temperature);
212
220
  if (!s.ok()) {
213
221
  assert(table_reader == nullptr);
214
222
  RecordTick(ioptions_.stats, NO_FILE_ERRORS);
@@ -236,7 +244,8 @@ InternalIterator* TableCache::NewIterator(
236
244
  TableReaderCaller caller, Arena* arena, bool skip_filters, int level,
237
245
  size_t max_file_size_for_l0_meta_pin,
238
246
  const InternalKey* smallest_compaction_key,
239
- const InternalKey* largest_compaction_key, bool allow_unprepared_value) {
247
+ const InternalKey* largest_compaction_key, bool allow_unprepared_value,
248
+ TruncatedRangeDelIterator** range_del_iter) {
240
249
  PERF_TIMER_GUARD(new_table_iterator_nanos);
241
250
 
242
251
  Status s;
@@ -250,8 +259,8 @@ InternalIterator* TableCache::NewIterator(
250
259
  table_reader = fd.table_reader;
251
260
  if (table_reader == nullptr) {
252
261
  s = FindTable(
253
- options, file_options, icomparator, fd, &handle, prefix_extractor,
254
- options.read_tier == kBlockCacheTier /* no_io */,
262
+ options, file_options, icomparator, file_meta, &handle,
263
+ prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
255
264
  !for_compaction /* record_read_stats */, file_read_hist, skip_filters,
256
265
  level, true /* prefetch_index_and_filter_in_cache */,
257
266
  max_file_size_for_l0_meta_pin, file_meta.temperature);
@@ -281,25 +290,40 @@ InternalIterator* TableCache::NewIterator(
281
290
  *table_reader_ptr = table_reader;
282
291
  }
283
292
  }
284
- if (s.ok() && range_del_agg != nullptr && !options.ignore_range_deletions) {
285
- if (range_del_agg->AddFile(fd.GetNumber())) {
286
- std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
287
- static_cast<FragmentedRangeTombstoneIterator*>(
288
- table_reader->NewRangeTombstoneIterator(options)));
289
- if (range_del_iter != nullptr) {
290
- s = range_del_iter->status();
293
+ if (s.ok() && !options.ignore_range_deletions) {
294
+ if (range_del_iter != nullptr) {
295
+ auto new_range_del_iter =
296
+ table_reader->NewRangeTombstoneIterator(options);
297
+ if (new_range_del_iter == nullptr || new_range_del_iter->empty()) {
298
+ delete new_range_del_iter;
299
+ *range_del_iter = nullptr;
300
+ } else {
301
+ *range_del_iter = new TruncatedRangeDelIterator(
302
+ std::unique_ptr<FragmentedRangeTombstoneIterator>(
303
+ new_range_del_iter),
304
+ &icomparator, &file_meta.smallest, &file_meta.largest);
291
305
  }
292
- if (s.ok()) {
293
- const InternalKey* smallest = &file_meta.smallest;
294
- const InternalKey* largest = &file_meta.largest;
295
- if (smallest_compaction_key != nullptr) {
296
- smallest = smallest_compaction_key;
306
+ }
307
+ if (range_del_agg != nullptr) {
308
+ if (range_del_agg->AddFile(fd.GetNumber())) {
309
+ std::unique_ptr<FragmentedRangeTombstoneIterator> new_range_del_iter(
310
+ static_cast<FragmentedRangeTombstoneIterator*>(
311
+ table_reader->NewRangeTombstoneIterator(options)));
312
+ if (new_range_del_iter != nullptr) {
313
+ s = new_range_del_iter->status();
297
314
  }
298
- if (largest_compaction_key != nullptr) {
299
- largest = largest_compaction_key;
315
+ if (s.ok()) {
316
+ const InternalKey* smallest = &file_meta.smallest;
317
+ const InternalKey* largest = &file_meta.largest;
318
+ if (smallest_compaction_key != nullptr) {
319
+ smallest = smallest_compaction_key;
320
+ }
321
+ if (largest_compaction_key != nullptr) {
322
+ largest = largest_compaction_key;
323
+ }
324
+ range_del_agg->AddTombstones(std::move(new_range_del_iter), smallest,
325
+ largest);
300
326
  }
301
- range_del_agg->AddTombstones(std::move(range_del_iter), smallest,
302
- largest);
303
327
  }
304
328
  }
305
329
  }
@@ -325,7 +349,8 @@ Status TableCache::GetRangeTombstoneIterator(
325
349
  TableReader* t = fd.table_reader;
326
350
  Cache::Handle* handle = nullptr;
327
351
  if (t == nullptr) {
328
- s = FindTable(options, file_options_, internal_comparator, fd, &handle);
352
+ s = FindTable(options, file_options_, internal_comparator, file_meta,
353
+ &handle);
329
354
  if (s.ok()) {
330
355
  t = GetTableReaderFromHandle(handle);
331
356
  }
@@ -448,8 +473,8 @@ Status TableCache::Get(
448
473
  if (!done) {
449
474
  assert(s.ok());
450
475
  if (t == nullptr) {
451
- s = FindTable(options, file_options_, internal_comparator, fd, &handle,
452
- prefix_extractor,
476
+ s = FindTable(options, file_options_, internal_comparator, file_meta,
477
+ &handle, prefix_extractor,
453
478
  options.read_tier == kBlockCacheTier /* no_io */,
454
479
  true /* record_read_stats */, file_read_hist, skip_filters,
455
480
  level, true /* prefetch_index_and_filter_in_cache */,
@@ -544,7 +569,7 @@ Status TableCache::MultiGetFilter(
544
569
  mget_range->end());
545
570
  if (t == nullptr) {
546
571
  s = FindTable(
547
- options, file_options_, internal_comparator, fd, &handle,
572
+ options, file_options_, internal_comparator, file_meta, &handle,
548
573
  prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
549
574
  true /* record_read_stats */, file_read_hist, /*skip_filters=*/false,
550
575
  level, true /* prefetch_index_and_filter_in_cache */,
@@ -573,10 +598,11 @@ Status TableCache::MultiGetFilter(
573
598
 
574
599
  Status TableCache::GetTableProperties(
575
600
  const FileOptions& file_options,
576
- const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
601
+ const InternalKeyComparator& internal_comparator,
602
+ const FileMetaData& file_meta,
577
603
  std::shared_ptr<const TableProperties>* properties,
578
604
  const std::shared_ptr<const SliceTransform>& prefix_extractor, bool no_io) {
579
- auto table_reader = fd.table_reader;
605
+ auto table_reader = file_meta.fd.table_reader;
580
606
  // table already been pre-loaded?
581
607
  if (table_reader) {
582
608
  *properties = table_reader->GetTableProperties();
@@ -585,8 +611,8 @@ Status TableCache::GetTableProperties(
585
611
  }
586
612
 
587
613
  Cache::Handle* table_handle = nullptr;
588
- Status s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
589
- &table_handle, prefix_extractor, no_io);
614
+ Status s = FindTable(ReadOptions(), file_options, internal_comparator,
615
+ file_meta, &table_handle, prefix_extractor, no_io);
590
616
  if (!s.ok()) {
591
617
  return s;
592
618
  }
@@ -599,12 +625,12 @@ Status TableCache::GetTableProperties(
599
625
 
600
626
  Status TableCache::ApproximateKeyAnchors(
601
627
  const ReadOptions& ro, const InternalKeyComparator& internal_comparator,
602
- const FileDescriptor& fd, std::vector<TableReader::Anchor>& anchors) {
628
+ const FileMetaData& file_meta, std::vector<TableReader::Anchor>& anchors) {
603
629
  Status s;
604
- TableReader* t = fd.table_reader;
630
+ TableReader* t = file_meta.fd.table_reader;
605
631
  Cache::Handle* handle = nullptr;
606
632
  if (t == nullptr) {
607
- s = FindTable(ro, file_options_, internal_comparator, fd, &handle);
633
+ s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle);
608
634
  if (s.ok()) {
609
635
  t = GetTableReaderFromHandle(handle);
610
636
  }
@@ -620,17 +646,18 @@ Status TableCache::ApproximateKeyAnchors(
620
646
 
621
647
  size_t TableCache::GetMemoryUsageByTableReader(
622
648
  const FileOptions& file_options,
623
- const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
649
+ const InternalKeyComparator& internal_comparator,
650
+ const FileMetaData& file_meta,
624
651
  const std::shared_ptr<const SliceTransform>& prefix_extractor) {
625
- auto table_reader = fd.table_reader;
652
+ auto table_reader = file_meta.fd.table_reader;
626
653
  // table already been pre-loaded?
627
654
  if (table_reader) {
628
655
  return table_reader->ApproximateMemoryUsage();
629
656
  }
630
657
 
631
658
  Cache::Handle* table_handle = nullptr;
632
- Status s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
633
- &table_handle, prefix_extractor, true);
659
+ Status s = FindTable(ReadOptions(), file_options, internal_comparator,
660
+ file_meta, &table_handle, prefix_extractor, true);
634
661
  if (!s.ok()) {
635
662
  return 0;
636
663
  }
@@ -656,17 +683,18 @@ void TableCache::Evict(Cache* cache, uint64_t file_number) {
656
683
  }
657
684
 
658
685
  uint64_t TableCache::ApproximateOffsetOf(
659
- const Slice& key, const FileDescriptor& fd, TableReaderCaller caller,
686
+ const Slice& key, const FileMetaData& file_meta, TableReaderCaller caller,
660
687
  const InternalKeyComparator& internal_comparator,
661
688
  const std::shared_ptr<const SliceTransform>& prefix_extractor) {
662
689
  uint64_t result = 0;
663
- TableReader* table_reader = fd.table_reader;
690
+ TableReader* table_reader = file_meta.fd.table_reader;
664
691
  Cache::Handle* table_handle = nullptr;
665
692
  if (table_reader == nullptr) {
666
693
  const bool for_compaction = (caller == TableReaderCaller::kCompaction);
667
- Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
668
- &table_handle, prefix_extractor, false /* no_io */,
669
- !for_compaction /* record_read_stats */);
694
+ Status s =
695
+ FindTable(ReadOptions(), file_options_, internal_comparator, file_meta,
696
+ &table_handle, prefix_extractor, false /* no_io */,
697
+ !for_compaction /* record_read_stats */);
670
698
  if (s.ok()) {
671
699
  table_reader = GetTableReaderFromHandle(table_handle);
672
700
  }
@@ -683,17 +711,18 @@ uint64_t TableCache::ApproximateOffsetOf(
683
711
  }
684
712
 
685
713
  uint64_t TableCache::ApproximateSize(
686
- const Slice& start, const Slice& end, const FileDescriptor& fd,
714
+ const Slice& start, const Slice& end, const FileMetaData& file_meta,
687
715
  TableReaderCaller caller, const InternalKeyComparator& internal_comparator,
688
716
  const std::shared_ptr<const SliceTransform>& prefix_extractor) {
689
717
  uint64_t result = 0;
690
- TableReader* table_reader = fd.table_reader;
718
+ TableReader* table_reader = file_meta.fd.table_reader;
691
719
  Cache::Handle* table_handle = nullptr;
692
720
  if (table_reader == nullptr) {
693
721
  const bool for_compaction = (caller == TableReaderCaller::kCompaction);
694
- Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
695
- &table_handle, prefix_extractor, false /* no_io */,
696
- !for_compaction /* record_read_stats */);
722
+ Status s =
723
+ FindTable(ReadOptions(), file_options_, internal_comparator, file_meta,
724
+ &table_handle, prefix_extractor, false /* no_io */,
725
+ !for_compaction /* record_read_stats */);
697
726
  if (s.ok()) {
698
727
  table_reader = GetTableReaderFromHandle(table_handle);
699
728
  }
@@ -63,6 +63,11 @@ class TableCache {
63
63
  // the returned iterator. The returned "*table_reader_ptr" object is owned
64
64
  // by the cache and should not be deleted, and is valid for as long as the
65
65
  // returned iterator is live.
66
+ // If !options.ignore_range_deletions, and range_del_iter is non-nullptr,
67
+ // then range_del_iter is set to a TruncatedRangeDelIterator for range
68
+ // tombstones in the SST file corresponding to the specified file number. The
69
+ // upper/lower bounds for the TruncatedRangeDelIterator are set to the SST
70
+ // file's boundary.
66
71
  // @param options Must outlive the returned iterator.
67
72
  // @param range_del_agg If non-nullptr, adds range deletions to the
68
73
  // aggregator. If an error occurs, returns it in a NewErrorInternalIterator
@@ -79,7 +84,8 @@ class TableCache {
79
84
  TableReaderCaller caller, Arena* arena, bool skip_filters, int level,
80
85
  size_t max_file_size_for_l0_meta_pin,
81
86
  const InternalKey* smallest_compaction_key,
82
- const InternalKey* largest_compaction_key, bool allow_unprepared_value);
87
+ const InternalKey* largest_compaction_key, bool allow_unprepared_value,
88
+ TruncatedRangeDelIterator** range_del_iter = nullptr);
83
89
 
84
90
  // If a seek to internal key "k" in specified file finds an entry,
85
91
  // call get_context->SaveValue() repeatedly until
@@ -154,7 +160,7 @@ class TableCache {
154
160
  Status FindTable(
155
161
  const ReadOptions& ro, const FileOptions& toptions,
156
162
  const InternalKeyComparator& internal_comparator,
157
- const FileDescriptor& file_fd, Cache::Handle**,
163
+ const FileMetaData& file_meta, Cache::Handle**,
158
164
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
159
165
  const bool no_io = false, bool record_read_stats = true,
160
166
  HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
@@ -174,14 +180,14 @@ class TableCache {
174
180
  Status GetTableProperties(
175
181
  const FileOptions& toptions,
176
182
  const InternalKeyComparator& internal_comparator,
177
- const FileDescriptor& file_meta,
183
+ const FileMetaData& file_meta,
178
184
  std::shared_ptr<const TableProperties>* properties,
179
185
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
180
186
  bool no_io = false);
181
187
 
182
188
  Status ApproximateKeyAnchors(const ReadOptions& ro,
183
189
  const InternalKeyComparator& internal_comparator,
184
- const FileDescriptor& file_meta,
190
+ const FileMetaData& file_meta,
185
191
  std::vector<TableReader::Anchor>& anchors);
186
192
 
187
193
  // Return total memory usage of the table reader of the file.
@@ -189,19 +195,19 @@ class TableCache {
189
195
  size_t GetMemoryUsageByTableReader(
190
196
  const FileOptions& toptions,
191
197
  const InternalKeyComparator& internal_comparator,
192
- const FileDescriptor& fd,
198
+ const FileMetaData& file_meta,
193
199
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
194
200
 
195
201
  // Returns approximated offset of a key in a file represented by fd.
196
202
  uint64_t ApproximateOffsetOf(
197
- const Slice& key, const FileDescriptor& fd, TableReaderCaller caller,
203
+ const Slice& key, const FileMetaData& file_meta, TableReaderCaller caller,
198
204
  const InternalKeyComparator& internal_comparator,
199
205
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
200
206
 
201
207
  // Returns approximated data size between start and end keys in a file
202
208
  // represented by fd (the start key must not be greater than the end key).
203
209
  uint64_t ApproximateSize(
204
- const Slice& start, const Slice& end, const FileDescriptor& fd,
210
+ const Slice& start, const Slice& end, const FileMetaData& file_meta,
205
211
  TableReaderCaller caller,
206
212
  const InternalKeyComparator& internal_comparator,
207
213
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr);
@@ -228,8 +234,9 @@ class TableCache {
228
234
  Status GetTableReader(
229
235
  const ReadOptions& ro, const FileOptions& file_options,
230
236
  const InternalKeyComparator& internal_comparator,
231
- const FileDescriptor& fd, bool sequential_mode, bool record_read_stats,
232
- HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
237
+ const FileMetaData& file_meta, bool sequential_mode,
238
+ bool record_read_stats, HistogramImpl* file_read_hist,
239
+ std::unique_ptr<TableReader>* table_reader,
233
240
  const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
234
241
  bool skip_filters = false, int level = -1,
235
242
  bool prefetch_index_and_filter_in_cache = true,
@@ -67,8 +67,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
67
67
  if (s.ok() && !table_range.empty()) {
68
68
  if (t == nullptr) {
69
69
  assert(handle == nullptr);
70
- s = FindTable(options, file_options_, internal_comparator, fd, &handle,
71
- prefix_extractor,
70
+ s = FindTable(options, file_options_, internal_comparator, file_meta,
71
+ &handle, prefix_extractor,
72
72
  options.read_tier == kBlockCacheTier /* no_io */,
73
73
  true /* record_read_stats */, file_read_hist, skip_filters,
74
74
  level, true /* prefetch_index_and_filter_in_cache */,
@@ -43,9 +43,9 @@ Status UserKeyTablePropertiesCollector::InternalAdd(const Slice& key,
43
43
  }
44
44
 
45
45
  void UserKeyTablePropertiesCollector::BlockAdd(
46
- uint64_t block_raw_bytes, uint64_t block_compressed_bytes_fast,
46
+ uint64_t block_uncomp_bytes, uint64_t block_compressed_bytes_fast,
47
47
  uint64_t block_compressed_bytes_slow) {
48
- return collector_->BlockAdd(block_raw_bytes, block_compressed_bytes_fast,
48
+ return collector_->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
49
49
  block_compressed_bytes_slow);
50
50
  }
51
51
 
@@ -29,7 +29,7 @@ class IntTblPropCollector {
29
29
  virtual Status InternalAdd(const Slice& key, const Slice& value,
30
30
  uint64_t file_size) = 0;
31
31
 
32
- virtual void BlockAdd(uint64_t block_raw_bytes,
32
+ virtual void BlockAdd(uint64_t block_uncomp_bytes,
33
33
  uint64_t block_compressed_bytes_fast,
34
34
  uint64_t block_compressed_bytes_slow) = 0;
35
35
 
@@ -69,7 +69,7 @@ class UserKeyTablePropertiesCollector : public IntTblPropCollector {
69
69
  virtual Status InternalAdd(const Slice& key, const Slice& value,
70
70
  uint64_t file_size) override;
71
71
 
72
- virtual void BlockAdd(uint64_t block_raw_bytes,
72
+ virtual void BlockAdd(uint64_t block_uncomp_bytes,
73
73
  uint64_t block_compressed_bytes_fast,
74
74
  uint64_t block_compressed_bytes_slow) override;
75
75
 
@@ -143,7 +143,7 @@ class TimestampTablePropertiesCollector : public IntTblPropCollector {
143
143
  return Status::OK();
144
144
  }
145
145
 
146
- void BlockAdd(uint64_t /* block_raw_bytes */,
146
+ void BlockAdd(uint64_t /* block_uncomp_bytes */,
147
147
  uint64_t /* block_compressed_bytes_fast */,
148
148
  uint64_t /* block_compressed_bytes_slow */) override {
149
149
  return;
@@ -176,7 +176,7 @@ class RegularKeysStartWithAInternal : public IntTblPropCollector {
176
176
  return Status::OK();
177
177
  }
178
178
 
179
- void BlockAdd(uint64_t /* block_raw_bytes */,
179
+ void BlockAdd(uint64_t /* block_uncomp_bytes */,
180
180
  uint64_t /* block_compressed_bytes_fast */,
181
181
  uint64_t /* block_compressed_bytes_slow */) override {
182
182
  // Nothing to do.
@@ -1274,7 +1274,7 @@ class VersionBuilder::Rep {
1274
1274
  int level = files_meta[file_idx].second;
1275
1275
  statuses[file_idx] = table_cache_->FindTable(
1276
1276
  ReadOptions(), file_options_,
1277
- *(base_vstorage_->InternalComparator()), file_meta->fd,
1277
+ *(base_vstorage_->InternalComparator()), *file_meta,
1278
1278
  &file_meta->table_reader_handle, prefix_extractor, false /*no_io */,
1279
1279
  true /* record_read_stats */,
1280
1280
  internal_stats->GetFileReadHist(level), false, level,
@@ -23,6 +23,7 @@
23
23
  #include "rocksdb/advanced_options.h"
24
24
  #include "rocksdb/cache.h"
25
25
  #include "table/table_reader.h"
26
+ #include "table/unique_id_impl.h"
26
27
  #include "util/autovector.h"
27
28
 
28
29
  namespace ROCKSDB_NAMESPACE {
@@ -104,8 +105,6 @@ constexpr uint64_t kUnknownFileCreationTime = 0;
104
105
 
105
106
  extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
106
107
 
107
- using UniqueId64x2 = std::array<uint64_t, 2>;
108
-
109
108
  // A copyable structure contains information needed to read data from an SST
110
109
  // file. It can contain a pointer to a table reader opened for the file, or
111
110
  // file number and size, which can be used to create a new table reader for it.