@nxtedition/rocksdb 7.0.24 → 7.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/binding.cc +3 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
  3. package/deps/rocksdb/rocksdb/Makefile +6 -2
  4. package/deps/rocksdb/rocksdb/TARGETS +14 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
  6. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
  8. package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
  12. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
  17. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
  21. package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
  24. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
  25. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
  26. package/deps/rocksdb/rocksdb/db/c.cc +68 -0
  27. package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
  28. package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
  48. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
  49. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
  51. package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
  52. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
  54. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
  55. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
  56. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
  57. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  58. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
  59. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
  60. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
  61. package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
  62. package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
  63. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
  64. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
  65. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
  66. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  67. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
  68. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
  69. package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
  70. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
  71. package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
  72. package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
  73. package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
  74. package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
  75. package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
  78. package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
  79. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
  80. package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
  82. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
  93. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
  94. package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
  95. package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
  96. package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
  97. package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
  98. package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
  99. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
  100. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
  101. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
  102. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
  103. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
  104. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
  107. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
  108. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
  110. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
  111. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
  113. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
  115. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
  116. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
  117. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
  118. package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
  119. package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
  120. package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
  121. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  122. package/deps/rocksdb/rocksdb/src.mk +5 -0
  123. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  127. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
  128. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
  129. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
  131. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  132. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
  133. package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
  134. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
  135. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
  136. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
  137. package/deps/rocksdb/rocksdb/util/compression.h +2 -0
  138. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
  139. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
  140. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
  142. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
  143. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
  144. package/deps/rocksdb/rocksdb.gyp +5 -1
  145. package/package.json +1 -1
  146. package/prebuilds/darwin-arm64/node.napi.node +0 -0
@@ -11,34 +11,24 @@
11
11
 
12
12
  #include <algorithm>
13
13
  #include <cinttypes>
14
- #include <functional>
15
- #include <list>
16
14
  #include <memory>
17
- #include <random>
18
15
  #include <set>
19
- #include <thread>
20
16
  #include <utility>
21
17
  #include <vector>
22
18
 
23
19
  #include "db/blob/blob_counting_iterator.h"
24
20
  #include "db/blob/blob_file_addition.h"
25
21
  #include "db/blob/blob_file_builder.h"
26
- #include "db/blob/blob_garbage_meter.h"
27
22
  #include "db/builder.h"
28
23
  #include "db/compaction/clipping_iterator.h"
24
+ #include "db/compaction/compaction_state.h"
29
25
  #include "db/db_impl/db_impl.h"
30
- #include "db/db_iter.h"
31
26
  #include "db/dbformat.h"
32
27
  #include "db/error_handler.h"
33
28
  #include "db/event_helpers.h"
34
29
  #include "db/history_trimming_iterator.h"
35
- #include "db/log_reader.h"
36
30
  #include "db/log_writer.h"
37
- #include "db/memtable.h"
38
- #include "db/memtable_list.h"
39
- #include "db/merge_context.h"
40
31
  #include "db/merge_helper.h"
41
- #include "db/output_validator.h"
42
32
  #include "db/range_del_aggregator.h"
43
33
  #include "db/version_set.h"
44
34
  #include "file/filename.h"
@@ -48,30 +38,21 @@
48
38
  #include "logging/log_buffer.h"
49
39
  #include "logging/logging.h"
50
40
  #include "monitoring/iostats_context_imp.h"
51
- #include "monitoring/perf_context_imp.h"
52
41
  #include "monitoring/thread_status_util.h"
53
42
  #include "options/configurable_helper.h"
54
43
  #include "options/options_helper.h"
55
44
  #include "port/port.h"
56
45
  #include "rocksdb/db.h"
57
46
  #include "rocksdb/env.h"
58
- #include "rocksdb/sst_partitioner.h"
59
47
  #include "rocksdb/statistics.h"
60
48
  #include "rocksdb/status.h"
61
49
  #include "rocksdb/table.h"
62
50
  #include "rocksdb/utilities/options_type.h"
63
- #include "table/block_based/block.h"
64
- #include "table/block_based/block_based_table_factory.h"
65
51
  #include "table/merging_iterator.h"
66
52
  #include "table/table_builder.h"
67
53
  #include "table/unique_id_impl.h"
68
54
  #include "test_util/sync_point.h"
69
- #include "util/coding.h"
70
- #include "util/hash.h"
71
- #include "util/mutexlock.h"
72
- #include "util/random.h"
73
55
  #include "util/stop_watch.h"
74
- #include "util/string_util.h"
75
56
 
76
57
  namespace ROCKSDB_NAMESPACE {
77
58
 
@@ -121,319 +102,6 @@ const char* GetCompactionReasonString(CompactionReason compaction_reason) {
121
102
  }
122
103
  }
123
104
 
124
- // Maintains state for each sub-compaction
125
- struct CompactionJob::SubcompactionState {
126
- const Compaction* compaction;
127
- std::unique_ptr<CompactionIterator> c_iter;
128
-
129
- // The boundaries of the key-range this compaction is interested in. No two
130
- // subcompactions may have overlapping key-ranges.
131
- // 'start' is inclusive, 'end' is exclusive, and nullptr means unbounded
132
- Slice *start, *end;
133
-
134
- // The return status of this subcompaction
135
- Status status;
136
-
137
- // The return IO Status of this subcompaction
138
- IOStatus io_status;
139
-
140
- // Files produced by this subcompaction
141
- struct Output {
142
- Output(FileMetaData&& _meta, const InternalKeyComparator& _icmp,
143
- bool _enable_order_check, bool _enable_hash, bool _finished = false,
144
- uint64_t precalculated_hash = 0)
145
- : meta(std::move(_meta)),
146
- validator(_icmp, _enable_order_check, _enable_hash,
147
- precalculated_hash),
148
- finished(_finished) {}
149
- FileMetaData meta;
150
- OutputValidator validator;
151
- bool finished;
152
- std::shared_ptr<const TableProperties> table_properties;
153
- };
154
-
155
- // State kept for output being generated
156
- std::vector<Output> outputs;
157
- std::vector<BlobFileAddition> blob_file_additions;
158
- std::unique_ptr<BlobGarbageMeter> blob_garbage_meter;
159
- std::unique_ptr<WritableFileWriter> outfile;
160
- std::unique_ptr<TableBuilder> builder;
161
-
162
- Output* current_output() {
163
- if (outputs.empty()) {
164
- // This subcompaction's output could be empty if compaction was aborted
165
- // before this subcompaction had a chance to generate any output files.
166
- // When subcompactions are executed sequentially this is more likely and
167
- // will be particularly likely for the later subcompactions to be empty.
168
- // Once they are run in parallel however it should be much rarer.
169
- return nullptr;
170
- } else {
171
- return &outputs.back();
172
- }
173
- }
174
-
175
- // Some identified files with old oldest ancester time and the range should be
176
- // isolated out so that the output file(s) in that range can be merged down
177
- // for TTL and clear the timestamps for the range.
178
- std::vector<FileMetaData*> files_to_cut_for_ttl;
179
- int cur_files_to_cut_for_ttl = -1;
180
- int next_files_to_cut_for_ttl = 0;
181
-
182
- uint64_t current_output_file_size = 0;
183
-
184
- // State during the subcompaction
185
- uint64_t total_bytes = 0;
186
- uint64_t num_output_records = 0;
187
- CompactionJobStats compaction_job_stats;
188
- uint64_t approx_size = 0;
189
- // An index that used to speed up ShouldStopBefore().
190
- size_t grandparent_index = 0;
191
- // The number of bytes overlapping between the current output and
192
- // grandparent files used in ShouldStopBefore().
193
- uint64_t overlapped_bytes = 0;
194
- // A flag determines whether the key has been seen in ShouldStopBefore()
195
- bool seen_key = false;
196
- // sub compaction job id, which is used to identify different sub-compaction
197
- // within the same compaction job.
198
- const uint32_t sub_job_id;
199
-
200
- // Notify on sub-compaction completion only if listener was notified on
201
- // sub-compaction begin.
202
- bool notify_on_subcompaction_completion = false;
203
-
204
- // A flag determines if this subcompaction has been split by the cursor
205
- bool is_split = false;
206
-
207
- SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size,
208
- uint32_t _sub_job_id)
209
- : compaction(c),
210
- start(_start),
211
- end(_end),
212
- approx_size(size),
213
- sub_job_id(_sub_job_id) {
214
- assert(compaction != nullptr);
215
- }
216
-
217
- // Adds the key and value to the builder
218
- // If paranoid is true, adds the key-value to the paranoid hash
219
- Status AddToBuilder(const Slice& key, const Slice& value) {
220
- auto curr = current_output();
221
- assert(builder != nullptr);
222
- assert(curr != nullptr);
223
- Status s = curr->validator.Add(key, value);
224
- if (!s.ok()) {
225
- return s;
226
- }
227
- builder->Add(key, value);
228
- return Status::OK();
229
- }
230
-
231
- void FillFilesToCutForTtl();
232
-
233
- // Returns true iff we should stop building the current output
234
- // before processing "internal_key".
235
- bool ShouldStopBefore(const Slice& internal_key, uint64_t curr_file_size) {
236
- const InternalKeyComparator* icmp =
237
- &compaction->column_family_data()->internal_comparator();
238
- const std::vector<FileMetaData*>& grandparents = compaction->grandparents();
239
-
240
- const InternalKey output_split_key = compaction->GetOutputSplitKey();
241
- if (output_split_key.Valid() && !is_split) {
242
- // Invalid output_split_key indicates that we do not need to split
243
- if ((end == nullptr || icmp->user_comparator()->Compare(
244
- ExtractUserKey(output_split_key.Encode()),
245
- ExtractUserKey(*end)) < 0) &&
246
- (start == nullptr || icmp->user_comparator()->Compare(
247
- ExtractUserKey(output_split_key.Encode()),
248
- ExtractUserKey(*start)) > 0)) {
249
- // We may only split the output when the cursor is in the range. Split
250
- // occurs when the next key is larger than/equal to the cursor
251
- if (icmp->Compare(internal_key, output_split_key.Encode()) >= 0) {
252
- is_split = true;
253
- return true;
254
- }
255
- }
256
- }
257
- bool grandparant_file_switched = false;
258
- // Scan to find earliest grandparent file that contains key.
259
- while (grandparent_index < grandparents.size() &&
260
- icmp->Compare(internal_key,
261
- grandparents[grandparent_index]->largest.Encode()) >
262
- 0) {
263
- if (seen_key) {
264
- overlapped_bytes += grandparents[grandparent_index]->fd.GetFileSize();
265
- grandparant_file_switched = true;
266
- }
267
- assert(grandparent_index + 1 >= grandparents.size() ||
268
- icmp->Compare(
269
- grandparents[grandparent_index]->largest.Encode(),
270
- grandparents[grandparent_index + 1]->smallest.Encode()) <= 0);
271
- grandparent_index++;
272
- }
273
- seen_key = true;
274
-
275
- if (grandparant_file_switched && overlapped_bytes + curr_file_size >
276
- compaction->max_compaction_bytes()) {
277
- // Too much overlap for current output; start new output
278
- overlapped_bytes = 0;
279
- return true;
280
- }
281
-
282
- if (!files_to_cut_for_ttl.empty()) {
283
- if (cur_files_to_cut_for_ttl != -1) {
284
- // Previous key is inside the range of a file
285
- if (icmp->Compare(internal_key,
286
- files_to_cut_for_ttl[cur_files_to_cut_for_ttl]
287
- ->largest.Encode()) > 0) {
288
- next_files_to_cut_for_ttl = cur_files_to_cut_for_ttl + 1;
289
- cur_files_to_cut_for_ttl = -1;
290
- return true;
291
- }
292
- } else {
293
- // Look for the key position
294
- while (next_files_to_cut_for_ttl <
295
- static_cast<int>(files_to_cut_for_ttl.size())) {
296
- if (icmp->Compare(internal_key,
297
- files_to_cut_for_ttl[next_files_to_cut_for_ttl]
298
- ->smallest.Encode()) >= 0) {
299
- if (icmp->Compare(internal_key,
300
- files_to_cut_for_ttl[next_files_to_cut_for_ttl]
301
- ->largest.Encode()) <= 0) {
302
- // With in the current file
303
- cur_files_to_cut_for_ttl = next_files_to_cut_for_ttl;
304
- return true;
305
- }
306
- // Beyond the current file
307
- next_files_to_cut_for_ttl++;
308
- } else {
309
- // Still fall into the gap
310
- break;
311
- }
312
- }
313
- }
314
- }
315
-
316
- return false;
317
- }
318
-
319
- Status ProcessOutFlowIfNeeded(const Slice& key, const Slice& value) {
320
- if (!blob_garbage_meter) {
321
- return Status::OK();
322
- }
323
-
324
- return blob_garbage_meter->ProcessOutFlow(key, value);
325
- }
326
- };
327
-
328
- void CompactionJob::SubcompactionState::FillFilesToCutForTtl() {
329
- if (compaction->immutable_options()->compaction_style !=
330
- CompactionStyle::kCompactionStyleLevel ||
331
- compaction->immutable_options()->compaction_pri !=
332
- CompactionPri::kMinOverlappingRatio ||
333
- compaction->mutable_cf_options()->ttl == 0 ||
334
- compaction->num_input_levels() < 2 || compaction->bottommost_level()) {
335
- return;
336
- }
337
-
338
- // We define new file with oldest ancestor time to be younger than 1/4 TTL,
339
- // and an old one to be older than 1/2 TTL time.
340
- int64_t temp_current_time;
341
- auto get_time_status = compaction->immutable_options()->clock->GetCurrentTime(
342
- &temp_current_time);
343
- if (!get_time_status.ok()) {
344
- return;
345
- }
346
- uint64_t current_time = static_cast<uint64_t>(temp_current_time);
347
- if (current_time < compaction->mutable_cf_options()->ttl) {
348
- return;
349
- }
350
- uint64_t old_age_thres =
351
- current_time - compaction->mutable_cf_options()->ttl / 2;
352
-
353
- const std::vector<FileMetaData*>& olevel =
354
- *(compaction->inputs(compaction->num_input_levels() - 1));
355
- for (FileMetaData* file : olevel) {
356
- // Worth filtering out by start and end?
357
- uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
358
- // We put old files if they are not too small to prevent a flood
359
- // of small files.
360
- if (oldest_ancester_time < old_age_thres &&
361
- file->fd.GetFileSize() >
362
- compaction->mutable_cf_options()->target_file_size_base / 2) {
363
- files_to_cut_for_ttl.push_back(file);
364
- }
365
- }
366
- }
367
-
368
- // Maintains state for the entire compaction
369
- struct CompactionJob::CompactionState {
370
- Compaction* const compaction;
371
-
372
- // REQUIRED: subcompaction states are stored in order of increasing
373
- // key-range
374
- std::vector<CompactionJob::SubcompactionState> sub_compact_states;
375
- Status status;
376
-
377
- size_t num_output_files = 0;
378
- uint64_t total_bytes = 0;
379
- size_t num_blob_output_files = 0;
380
- uint64_t total_blob_bytes = 0;
381
- uint64_t num_output_records = 0;
382
-
383
- explicit CompactionState(Compaction* c) : compaction(c) {}
384
-
385
- Slice SmallestUserKey() {
386
- for (const auto& sub_compact_state : sub_compact_states) {
387
- if (!sub_compact_state.outputs.empty() &&
388
- sub_compact_state.outputs[0].finished) {
389
- return sub_compact_state.outputs[0].meta.smallest.user_key();
390
- }
391
- }
392
- // If there is no finished output, return an empty slice.
393
- return Slice(nullptr, 0);
394
- }
395
-
396
- Slice LargestUserKey() {
397
- for (auto it = sub_compact_states.rbegin(); it < sub_compact_states.rend();
398
- ++it) {
399
- if (!it->outputs.empty() && it->current_output()->finished) {
400
- assert(it->current_output() != nullptr);
401
- return it->current_output()->meta.largest.user_key();
402
- }
403
- }
404
- // If there is no finished output, return an empty slice.
405
- return Slice(nullptr, 0);
406
- }
407
- };
408
-
409
- void CompactionJob::AggregateStatistics() {
410
- assert(compact_);
411
-
412
- for (SubcompactionState& sc : compact_->sub_compact_states) {
413
- auto& outputs = sc.outputs;
414
-
415
- if (!outputs.empty() && !outputs.back().meta.fd.file_size) {
416
- // An error occurred, so ignore the last output.
417
- outputs.pop_back();
418
- }
419
-
420
- compact_->num_output_files += outputs.size();
421
- compact_->total_bytes += sc.total_bytes;
422
-
423
- const auto& blobs = sc.blob_file_additions;
424
-
425
- compact_->num_blob_output_files += blobs.size();
426
-
427
- for (const auto& blob : blobs) {
428
- compact_->total_blob_bytes += blob.GetTotalBlobBytes();
429
- }
430
-
431
- compact_->num_output_records += sc.num_output_records;
432
-
433
- compaction_job_stats_->Add(sc.compaction_job_stats);
434
- }
435
- }
436
-
437
105
  CompactionJob::CompactionJob(
438
106
  int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
439
107
  const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
@@ -462,8 +130,8 @@ CompactionJob::CompactionJob(
462
130
  stats_(stats),
463
131
  bottommost_level_(false),
464
132
  write_hint_(Env::WLTH_NOT_SET),
465
- job_id_(job_id),
466
133
  compaction_job_stats_(compaction_job_stats),
134
+ job_id_(job_id),
467
135
  dbname_(dbname),
468
136
  db_id_(db_id),
469
137
  db_session_id_(db_session_id),
@@ -568,12 +236,11 @@ void CompactionJob::Prepare() {
568
236
  StopWatch sw(db_options_.clock, stats_, SUBCOMPACTION_SETUP_TIME);
569
237
  GenSubcompactionBoundaries();
570
238
  }
571
- assert(sizes_.size() == boundaries_.size() + 1);
572
239
 
573
240
  for (size_t i = 0; i <= boundaries_.size(); i++) {
574
241
  Slice* start = i == 0 ? nullptr : &boundaries_[i - 1];
575
242
  Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i];
576
- compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i],
243
+ compact_->sub_compact_states.emplace_back(c, start, end,
577
244
  static_cast<uint32_t>(i));
578
245
  }
579
246
  RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
@@ -581,10 +248,8 @@ void CompactionJob::Prepare() {
581
248
  } else {
582
249
  constexpr Slice* start = nullptr;
583
250
  constexpr Slice* end = nullptr;
584
- constexpr uint64_t size = 0;
585
251
 
586
- compact_->sub_compact_states.emplace_back(c, start, end, size,
587
- /*sub_job_id*/ 0);
252
+ compact_->sub_compact_states.emplace_back(c, start, end, /*sub_job_id*/ 0);
588
253
  }
589
254
  }
590
255
 
@@ -641,16 +306,6 @@ void CompactionJob::GenSubcompactionBoundaries() {
641
306
  }
642
307
  }
643
308
 
644
- Slice output_split_user_key;
645
- const InternalKey output_split_key = c->GetOutputSplitKey();
646
- if (output_split_key.Valid()) {
647
- output_split_user_key = ExtractUserKey(output_split_key.Encode());
648
- bounds.emplace_back(output_split_key.Encode());
649
- } else {
650
- // Empty user key indicates that splitting is not required here
651
- output_split_user_key = Slice();
652
- }
653
-
654
309
  std::sort(bounds.begin(), bounds.end(),
655
310
  [cfd_comparator](const Slice& a, const Slice& b) -> bool {
656
311
  return cfd_comparator->Compare(ExtractUserKey(a),
@@ -721,20 +376,12 @@ void CompactionJob::GenSubcompactionBoundaries() {
721
376
  // need to put an end boundary
722
377
  continue;
723
378
  }
724
- if (sum >= mean ||
725
- (!output_split_user_key.empty() &&
726
- cfd_comparator->Compare(ExtractUserKey(ranges[i].range.limit),
727
- output_split_user_key) == 0)) {
379
+ if (sum >= mean) {
728
380
  boundaries_.emplace_back(ExtractUserKey(ranges[i].range.limit));
729
- sizes_.emplace_back(sum);
730
381
  subcompactions--;
731
382
  sum = 0;
732
383
  }
733
384
  }
734
- sizes_.emplace_back(sum + ranges.back().size);
735
- } else {
736
- // Only one range so its size is the total sum of sizes computed above
737
- sizes_.emplace_back(sum);
738
385
  }
739
386
  }
740
387
 
@@ -766,16 +413,17 @@ Status CompactionJob::Run() {
766
413
  thread.join();
767
414
  }
768
415
 
769
- compaction_stats_.micros = db_options_.clock->NowMicros() - start_micros;
770
- compaction_stats_.cpu_micros = 0;
771
- for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) {
772
- compaction_stats_.cpu_micros +=
773
- compact_->sub_compact_states[i].compaction_job_stats.cpu_micros;
416
+ compaction_stats_.SetMicros(db_options_.clock->NowMicros() - start_micros);
417
+
418
+ for (auto& state : compact_->sub_compact_states) {
419
+ compaction_stats_.AddCpuMicros(state.compaction_job_stats.cpu_micros);
420
+ state.RemoveLastEmptyOutput();
774
421
  }
775
422
 
776
- RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros);
423
+ RecordTimeToHistogram(stats_, COMPACTION_TIME,
424
+ compaction_stats_.stats.micros);
777
425
  RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
778
- compaction_stats_.cpu_micros);
426
+ compaction_stats_.stats.cpu_micros);
779
427
 
780
428
  TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify");
781
429
 
@@ -791,7 +439,7 @@ Status CompactionJob::Run() {
791
439
  break;
792
440
  }
793
441
 
794
- if (!state.blob_file_additions.empty()) {
442
+ if (state.Current().HasBlobFileAdditions()) {
795
443
  wrote_new_blob_files = true;
796
444
  }
797
445
  }
@@ -823,9 +471,9 @@ Status CompactionJob::Run() {
823
471
  }
824
472
  if (status.ok()) {
825
473
  thread_pool.clear();
826
- std::vector<const CompactionJob::SubcompactionState::Output*> files_output;
474
+ std::vector<const CompactionOutputs::Output*> files_output;
827
475
  for (const auto& state : compact_->sub_compact_states) {
828
- for (const auto& output : state.outputs) {
476
+ for (const auto& output : state.GetOutputs()) {
829
477
  files_output.emplace_back(&output);
830
478
  }
831
479
  }
@@ -907,7 +555,7 @@ Status CompactionJob::Run() {
907
555
 
908
556
  TablePropertiesCollection tp;
909
557
  for (const auto& state : compact_->sub_compact_states) {
910
- for (const auto& output : state.outputs) {
558
+ for (const auto& output : state.GetOutputs()) {
911
559
  auto fn =
912
560
  TableFileName(state.compaction->immutable_options()->cf_paths,
913
561
  output.meta.fd.GetNumber(), output.meta.fd.GetPathId());
@@ -917,7 +565,7 @@ Status CompactionJob::Run() {
917
565
  compact_->compaction->SetOutputTableProperties(std::move(tp));
918
566
 
919
567
  // Finish up all book-keeping to unify the subcompaction results
920
- AggregateStatistics();
568
+ compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
921
569
  UpdateCompactionStats();
922
570
 
923
571
  RecordCompactionIOStats();
@@ -939,8 +587,9 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
939
587
  ColumnFamilyData* cfd = compact_->compaction->column_family_data();
940
588
  assert(cfd);
941
589
 
942
- cfd->internal_stats()->AddCompactionStats(
943
- compact_->compaction->output_level(), thread_pri_, compaction_stats_);
590
+ int output_level = compact_->compaction->output_level();
591
+ cfd->internal_stats()->AddCompactionStats(output_level, thread_pri_,
592
+ compaction_stats_);
944
593
 
945
594
  if (status.ok()) {
946
595
  status = InstallCompactionResults(mutable_cf_options);
@@ -951,7 +600,7 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
951
600
 
952
601
  VersionStorageInfo::LevelSummaryStorage tmp;
953
602
  auto vstorage = cfd->current()->storage_info();
954
- const auto& stats = compaction_stats_;
603
+ const auto& stats = compaction_stats_.stats;
955
604
 
956
605
  double read_write_amp = 0.0;
957
606
  double write_amp = 0.0;
@@ -1013,6 +662,18 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
1013
662
  blob_files.back()->GetBlobFileNumber());
1014
663
  }
1015
664
 
665
+ if (compaction_stats_.has_penultimate_level_output) {
666
+ ROCKS_LOG_BUFFER(
667
+ log_buffer_,
668
+ "[%s] has Penultimate Level output: %" PRIu64
669
+ ", level %d, number of files: %" PRIu64 ", number of records: %" PRIu64,
670
+ column_family_name.c_str(),
671
+ compaction_stats_.penultimate_level_stats.bytes_written,
672
+ compact_->compaction->GetPenultimateLevel(),
673
+ compaction_stats_.penultimate_level_stats.num_output_files,
674
+ compaction_stats_.penultimate_level_stats.num_output_records);
675
+ }
676
+
1016
677
  UpdateCompactionJobStats(stats);
1017
678
 
1018
679
  auto stream = event_logger_->LogToBuffer(log_buffer_, 8192);
@@ -1021,16 +682,16 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
1021
682
  << "compaction_time_micros" << stats.micros
1022
683
  << "compaction_time_cpu_micros" << stats.cpu_micros << "output_level"
1023
684
  << compact_->compaction->output_level() << "num_output_files"
1024
- << compact_->num_output_files << "total_output_size"
1025
- << compact_->total_bytes;
685
+ << stats.num_output_files << "total_output_size"
686
+ << stats.bytes_written;
1026
687
 
1027
- if (compact_->num_blob_output_files > 0) {
1028
- stream << "num_blob_output_files" << compact_->num_blob_output_files
1029
- << "total_blob_output_size" << compact_->total_blob_bytes;
688
+ if (stats.num_output_files_blob > 0) {
689
+ stream << "num_blob_output_files" << stats.num_output_files_blob
690
+ << "total_blob_output_size" << stats.bytes_written_blob;
1030
691
  }
1031
692
 
1032
693
  stream << "num_input_records" << stats.num_input_records
1033
- << "num_output_records" << compact_->num_output_records
694
+ << "num_output_records" << stats.num_output_records
1034
695
  << "num_subcompactions" << compact_->sub_compact_states.size()
1035
696
  << "output_compression"
1036
697
  << CompressionTypeToString(compact_->compaction->output_compression());
@@ -1064,217 +725,22 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
1064
725
  stream << "blob_file_tail" << blob_files.back()->GetBlobFileNumber();
1065
726
  }
1066
727
 
1067
- CleanupCompaction();
1068
- return status;
1069
- }
1070
-
1071
- #ifndef ROCKSDB_LITE
1072
- CompactionServiceJobStatus
1073
- CompactionJob::ProcessKeyValueCompactionWithCompactionService(
1074
- SubcompactionState* sub_compact) {
1075
- assert(sub_compact);
1076
- assert(sub_compact->compaction);
1077
- assert(db_options_.compaction_service);
1078
-
1079
- const Compaction* compaction = sub_compact->compaction;
1080
- CompactionServiceInput compaction_input;
1081
- compaction_input.output_level = compaction->output_level();
1082
- compaction_input.db_id = db_id_;
1083
-
1084
- const std::vector<CompactionInputFiles>& inputs =
1085
- *(compact_->compaction->inputs());
1086
- for (const auto& files_per_level : inputs) {
1087
- for (const auto& file : files_per_level.files) {
1088
- compaction_input.input_files.emplace_back(
1089
- MakeTableFileName(file->fd.GetNumber()));
1090
- }
1091
- }
1092
- compaction_input.column_family.name =
1093
- compaction->column_family_data()->GetName();
1094
- compaction_input.column_family.options =
1095
- compaction->column_family_data()->GetLatestCFOptions();
1096
- compaction_input.db_options =
1097
- BuildDBOptions(db_options_, mutable_db_options_copy_);
1098
- compaction_input.snapshots = existing_snapshots_;
1099
- compaction_input.has_begin = sub_compact->start;
1100
- compaction_input.begin =
1101
- compaction_input.has_begin ? sub_compact->start->ToString() : "";
1102
- compaction_input.has_end = sub_compact->end;
1103
- compaction_input.end =
1104
- compaction_input.has_end ? sub_compact->end->ToString() : "";
1105
- compaction_input.approx_size = sub_compact->approx_size;
1106
-
1107
- std::string compaction_input_binary;
1108
- Status s = compaction_input.Write(&compaction_input_binary);
1109
- if (!s.ok()) {
1110
- sub_compact->status = s;
1111
- return CompactionServiceJobStatus::kFailure;
1112
- }
1113
-
1114
- std::ostringstream input_files_oss;
1115
- bool is_first_one = true;
1116
- for (const auto& file : compaction_input.input_files) {
1117
- input_files_oss << (is_first_one ? "" : ", ") << file;
1118
- is_first_one = false;
1119
- }
1120
-
1121
- ROCKS_LOG_INFO(
1122
- db_options_.info_log,
1123
- "[%s] [JOB %d] Starting remote compaction (output level: %d): %s",
1124
- compaction_input.column_family.name.c_str(), job_id_,
1125
- compaction_input.output_level, input_files_oss.str().c_str());
1126
- CompactionServiceJobInfo info(dbname_, db_id_, db_session_id_,
1127
- GetCompactionId(sub_compact), thread_pri_);
1128
- CompactionServiceJobStatus compaction_status =
1129
- db_options_.compaction_service->StartV2(info, compaction_input_binary);
1130
- switch (compaction_status) {
1131
- case CompactionServiceJobStatus::kSuccess:
1132
- break;
1133
- case CompactionServiceJobStatus::kFailure:
1134
- sub_compact->status = Status::Incomplete(
1135
- "CompactionService failed to start compaction job.");
1136
- ROCKS_LOG_WARN(db_options_.info_log,
1137
- "[%s] [JOB %d] Remote compaction failed to start.",
1138
- compaction_input.column_family.name.c_str(), job_id_);
1139
- return compaction_status;
1140
- case CompactionServiceJobStatus::kUseLocal:
1141
- ROCKS_LOG_INFO(
1142
- db_options_.info_log,
1143
- "[%s] [JOB %d] Remote compaction fallback to local by API Start.",
1144
- compaction_input.column_family.name.c_str(), job_id_);
1145
- return compaction_status;
1146
- default:
1147
- assert(false); // unknown status
1148
- break;
1149
- }
1150
-
1151
- ROCKS_LOG_INFO(db_options_.info_log,
1152
- "[%s] [JOB %d] Waiting for remote compaction...",
1153
- compaction_input.column_family.name.c_str(), job_id_);
1154
- std::string compaction_result_binary;
1155
- compaction_status = db_options_.compaction_service->WaitForCompleteV2(
1156
- info, &compaction_result_binary);
1157
-
1158
- if (compaction_status == CompactionServiceJobStatus::kUseLocal) {
1159
- ROCKS_LOG_INFO(db_options_.info_log,
1160
- "[%s] [JOB %d] Remote compaction fallback to local by API "
1161
- "WaitForComplete.",
1162
- compaction_input.column_family.name.c_str(), job_id_);
1163
- return compaction_status;
1164
- }
1165
-
1166
- CompactionServiceResult compaction_result;
1167
- s = CompactionServiceResult::Read(compaction_result_binary,
1168
- &compaction_result);
1169
-
1170
- if (compaction_status == CompactionServiceJobStatus::kFailure) {
1171
- if (s.ok()) {
1172
- if (compaction_result.status.ok()) {
1173
- sub_compact->status = Status::Incomplete(
1174
- "CompactionService failed to run the compaction job (even though "
1175
- "the internal status is okay).");
1176
- } else {
1177
- // set the current sub compaction status with the status returned from
1178
- // remote
1179
- sub_compact->status = compaction_result.status;
1180
- }
1181
- } else {
1182
- sub_compact->status = Status::Incomplete(
1183
- "CompactionService failed to run the compaction job (and no valid "
1184
- "result is returned).");
1185
- compaction_result.status.PermitUncheckedError();
1186
- }
1187
- ROCKS_LOG_WARN(db_options_.info_log,
1188
- "[%s] [JOB %d] Remote compaction failed.",
1189
- compaction_input.column_family.name.c_str(), job_id_);
1190
- return compaction_status;
1191
- }
1192
-
1193
- if (!s.ok()) {
1194
- sub_compact->status = s;
1195
- compaction_result.status.PermitUncheckedError();
1196
- return CompactionServiceJobStatus::kFailure;
1197
- }
1198
- sub_compact->status = compaction_result.status;
1199
-
1200
- std::ostringstream output_files_oss;
1201
- is_first_one = true;
1202
- for (const auto& file : compaction_result.output_files) {
1203
- output_files_oss << (is_first_one ? "" : ", ") << file.file_name;
1204
- is_first_one = false;
1205
- }
1206
-
1207
- ROCKS_LOG_INFO(db_options_.info_log,
1208
- "[%s] [JOB %d] Receive remote compaction result, output path: "
1209
- "%s, files: %s",
1210
- compaction_input.column_family.name.c_str(), job_id_,
1211
- compaction_result.output_path.c_str(),
1212
- output_files_oss.str().c_str());
1213
-
1214
- if (!s.ok()) {
1215
- sub_compact->status = s;
1216
- return CompactionServiceJobStatus::kFailure;
728
+ if (compaction_stats_.has_penultimate_level_output) {
729
+ InternalStats::CompactionStats& pl_stats =
730
+ compaction_stats_.penultimate_level_stats;
731
+ stream << "penultimate_level_num_output_files" << pl_stats.num_output_files;
732
+ stream << "penultimate_level_bytes_written" << pl_stats.bytes_written;
733
+ stream << "penultimate_level_num_output_records"
734
+ << pl_stats.num_output_records;
735
+ stream << "penultimate_level_num_output_files_blob"
736
+ << pl_stats.num_output_files_blob;
737
+ stream << "penultimate_level_bytes_written_blob"
738
+ << pl_stats.bytes_written_blob;
1217
739
  }
1218
740
 
1219
- for (const auto& file : compaction_result.output_files) {
1220
- uint64_t file_num = versions_->NewFileNumber();
1221
- auto src_file = compaction_result.output_path + "/" + file.file_name;
1222
- auto tgt_file = TableFileName(compaction->immutable_options()->cf_paths,
1223
- file_num, compaction->output_path_id());
1224
- s = fs_->RenameFile(src_file, tgt_file, IOOptions(), nullptr);
1225
- if (!s.ok()) {
1226
- sub_compact->status = s;
1227
- return CompactionServiceJobStatus::kFailure;
1228
- }
1229
-
1230
- FileMetaData meta;
1231
- uint64_t file_size;
1232
- s = fs_->GetFileSize(tgt_file, IOOptions(), &file_size, nullptr);
1233
- if (!s.ok()) {
1234
- sub_compact->status = s;
1235
- return CompactionServiceJobStatus::kFailure;
1236
- }
1237
- meta.fd = FileDescriptor(file_num, compaction->output_path_id(), file_size,
1238
- file.smallest_seqno, file.largest_seqno);
1239
- meta.smallest.DecodeFrom(file.smallest_internal_key);
1240
- meta.largest.DecodeFrom(file.largest_internal_key);
1241
- meta.oldest_ancester_time = file.oldest_ancester_time;
1242
- meta.file_creation_time = file.file_creation_time;
1243
- meta.marked_for_compaction = file.marked_for_compaction;
1244
- meta.unique_id = file.unique_id;
1245
-
1246
- auto cfd = compaction->column_family_data();
1247
- sub_compact->outputs.emplace_back(std::move(meta),
1248
- cfd->internal_comparator(), false, false,
1249
- true, file.paranoid_hash);
1250
- }
1251
- sub_compact->compaction_job_stats = compaction_result.stats;
1252
- sub_compact->num_output_records = compaction_result.num_output_records;
1253
- sub_compact->approx_size = compaction_input.approx_size; // is this used?
1254
- sub_compact->total_bytes = compaction_result.total_bytes;
1255
- RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read);
1256
- RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES,
1257
- compaction_result.bytes_written);
1258
- return CompactionServiceJobStatus::kSuccess;
1259
- }
1260
-
1261
- void CompactionJob::BuildSubcompactionJobInfo(
1262
- SubcompactionState* sub_compact,
1263
- SubcompactionJobInfo* subcompaction_job_info) const {
1264
- Compaction* c = compact_->compaction;
1265
- ColumnFamilyData* cfd = c->column_family_data();
1266
-
1267
- subcompaction_job_info->cf_id = cfd->GetID();
1268
- subcompaction_job_info->cf_name = cfd->GetName();
1269
- subcompaction_job_info->status = sub_compact->status;
1270
- subcompaction_job_info->thread_id = env_->GetThreadID();
1271
- subcompaction_job_info->job_id = job_id_;
1272
- subcompaction_job_info->subcompaction_job_id = sub_compact->sub_job_id;
1273
- subcompaction_job_info->base_input_level = c->start_level();
1274
- subcompaction_job_info->output_level = c->output_level();
1275
- subcompaction_job_info->stats = sub_compact->compaction_job_stats;
741
+ CleanupCompaction();
742
+ return status;
1276
743
  }
1277
- #endif // !ROCKSDB_LITE
1278
744
 
1279
745
  void CompactionJob::NotifyOnSubcompactionBegin(
1280
746
  SubcompactionState* sub_compact) {
@@ -1295,9 +761,11 @@ void CompactionJob::NotifyOnSubcompactionBegin(
1295
761
  sub_compact->notify_on_subcompaction_completion = true;
1296
762
 
1297
763
  SubcompactionJobInfo info{};
1298
- BuildSubcompactionJobInfo(sub_compact, &info);
764
+ sub_compact->BuildSubcompactionJobInfo(info);
765
+ info.job_id = static_cast<int>(job_id_);
766
+ info.thread_id = env_->GetThreadID();
1299
767
 
1300
- for (auto listener : db_options_.listeners) {
768
+ for (const auto& listener : db_options_.listeners) {
1301
769
  listener->OnSubcompactionBegin(info);
1302
770
  }
1303
771
  info.status.PermitUncheckedError();
@@ -1323,9 +791,11 @@ void CompactionJob::NotifyOnSubcompactionCompleted(
1323
791
  }
1324
792
 
1325
793
  SubcompactionJobInfo info{};
1326
- BuildSubcompactionJobInfo(sub_compact, &info);
794
+ sub_compact->BuildSubcompactionJobInfo(info);
795
+ info.job_id = static_cast<int>(job_id_);
796
+ info.thread_id = env_->GetThreadID();
1327
797
 
1328
- for (auto listener : db_options_.listeners) {
798
+ for (const auto& listener : db_options_.listeners) {
1329
799
  listener->OnSubcompactionCompleted(info);
1330
800
  }
1331
801
  #else
@@ -1373,8 +843,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1373
843
 
1374
844
  NotifyOnSubcompactionBegin(sub_compact);
1375
845
 
1376
- CompactionRangeDelAggregator range_del_agg(&cfd->internal_comparator(),
1377
- existing_snapshots_);
846
+ auto range_del_agg = std::make_unique<CompactionRangeDelAggregator>(
847
+ &cfd->internal_comparator(), existing_snapshots_);
1378
848
 
1379
849
  // TODO: since we already use C++17, should use
1380
850
  // std::optional<const Slice> instead.
@@ -1400,7 +870,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1400
870
  // Although the v2 aggregator is what the level iterator(s) know about,
1401
871
  // the AddTombstones calls will be propagated down to the v1 aggregator.
1402
872
  std::unique_ptr<InternalIterator> raw_input(versions_->MakeInputIterator(
1403
- read_options, sub_compact->compaction, &range_del_agg,
873
+ read_options, sub_compact->compaction, range_del_agg.get(),
1404
874
  file_options_for_read_,
1405
875
  (start == nullptr) ? std::optional<const Slice>{}
1406
876
  : std::optional<const Slice>{*start},
@@ -1433,9 +903,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1433
903
  std::unique_ptr<InternalIterator> blob_counter;
1434
904
 
1435
905
  if (sub_compact->compaction->DoesInputReferenceBlobFiles()) {
1436
- sub_compact->blob_garbage_meter = std::make_unique<BlobGarbageMeter>();
1437
- blob_counter = std::make_unique<BlobCountingIterator>(
1438
- input, sub_compact->blob_garbage_meter.get());
906
+ BlobGarbageMeter* meter = sub_compact->Current().CreateBlobGarbageMeter();
907
+ blob_counter = std::make_unique<BlobCountingIterator>(input, meter);
1439
908
  input = blob_counter.get();
1440
909
  }
1441
910
 
@@ -1484,6 +953,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1484
953
 
1485
954
  std::vector<std::string> blob_file_paths;
1486
955
 
956
+ // TODO: BlobDB to support output_to_penultimate_level compaction, which needs
957
+ // 2 builders, so may need to move to `CompactionOutputs`
1487
958
  std::unique_ptr<BlobFileBuilder> blob_file_builder(
1488
959
  (mutable_cf_options->enable_blob_files &&
1489
960
  sub_compact->compaction->output_level() >=
@@ -1494,7 +965,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1494
965
  mutable_cf_options, &file_options_, job_id_, cfd->GetID(),
1495
966
  cfd->GetName(), Env::IOPriority::IO_LOW, write_hint_,
1496
967
  io_tracer_, blob_callback_, BlobFileCreationReason::kCompaction,
1497
- &blob_file_paths, &sub_compact->blob_file_additions)
968
+ &blob_file_paths,
969
+ sub_compact->Current().GetBlobFileAdditionsPtr())
1498
970
  : nullptr);
1499
971
 
1500
972
  TEST_SYNC_POINT("CompactionJob::Run():Inprogress");
@@ -1503,44 +975,53 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1503
975
  reinterpret_cast<void*>(
1504
976
  const_cast<std::atomic<bool>*>(&manual_compaction_canceled_)));
1505
977
 
1506
- Status status;
1507
978
  const std::string* const full_history_ts_low =
1508
979
  full_history_ts_low_.empty() ? nullptr : &full_history_ts_low_;
1509
980
  const SequenceNumber job_snapshot_seq =
1510
981
  job_context_ ? job_context_->GetJobSnapshotSequence()
1511
982
  : kMaxSequenceNumber;
1512
- sub_compact->c_iter.reset(new CompactionIterator(
983
+
984
+ auto c_iter = std::make_unique<CompactionIterator>(
1513
985
  input, cfd->user_comparator(), &merge, versions_->LastSequence(),
1514
986
  &existing_snapshots_, earliest_write_conflict_snapshot_, job_snapshot_seq,
1515
987
  snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_),
1516
- /*expect_valid_internal_key=*/true, &range_del_agg,
988
+ /*expect_valid_internal_key=*/true, range_del_agg.get(),
1517
989
  blob_file_builder.get(), db_options_.allow_data_in_errors,
1518
990
  db_options_.enforce_single_del_contracts, manual_compaction_canceled_,
1519
991
  sub_compact->compaction, compaction_filter, shutting_down_,
1520
- db_options_.info_log, full_history_ts_low));
1521
- auto c_iter = sub_compact->c_iter.get();
992
+ db_options_.info_log, full_history_ts_low);
1522
993
  c_iter->SeekToFirst();
994
+
995
+ // Assign range delete aggregator to the target output level, which makes sure
996
+ // it only output to single level
997
+ sub_compact->AssignRangeDelAggregator(std::move(range_del_agg));
998
+
1523
999
  if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) {
1524
1000
  sub_compact->FillFilesToCutForTtl();
1525
1001
  // ShouldStopBefore() maintains state based on keys processed so far. The
1526
1002
  // compaction loop always calls it on the "next" key, thus won't tell it the
1527
1003
  // first key. So we do that here.
1528
- sub_compact->ShouldStopBefore(c_iter->key(),
1529
- sub_compact->current_output_file_size);
1004
+ sub_compact->ShouldStopBefore(c_iter->key());
1530
1005
  }
1531
1006
  const auto& c_iter_stats = c_iter->iter_stats();
1532
1007
 
1533
- std::unique_ptr<SstPartitioner> partitioner =
1534
- sub_compact->compaction->output_level() == 0
1535
- ? nullptr
1536
- : sub_compact->compaction->CreateSstPartitioner();
1537
- std::string last_key_for_partitioner;
1008
+ // define the open and close functions for the compaction files, which will be
1009
+ // used open/close output files when needed.
1010
+ const CompactionFileOpenFunc open_file_func =
1011
+ [this, sub_compact](CompactionOutputs& outputs) {
1012
+ return this->OpenCompactionOutputFile(sub_compact, outputs);
1013
+ };
1014
+ const CompactionFileCloseFunc close_file_func =
1015
+ [this, sub_compact](CompactionOutputs& outputs, const Status& status,
1016
+ const Slice& next_table_min_key) {
1017
+ return this->FinishCompactionOutputFile(status, sub_compact, outputs,
1018
+ next_table_min_key);
1019
+ };
1538
1020
 
1021
+ Status status;
1539
1022
  while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
1540
1023
  // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
1541
1024
  // returns true.
1542
- const Slice& key = c_iter->key();
1543
- const Slice& value = c_iter->value();
1544
1025
 
1545
1026
  assert(!end ||
1546
1027
  cfd->user_comparator()->Compare(c_iter->user_key(), *end) < 0);
@@ -1552,88 +1033,33 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1552
1033
  RecordCompactionIOStats();
1553
1034
  }
1554
1035
 
1555
- // Open output file if necessary
1556
- if (sub_compact->builder == nullptr) {
1557
- status = OpenCompactionOutputFile(sub_compact);
1558
- if (!status.ok()) {
1559
- break;
1560
- }
1561
- }
1562
- status = sub_compact->AddToBuilder(key, value);
1036
+ // Add current compaction_iterator key to target compaction output, if the
1037
+ // output file needs to be close or open, it will call the `open_file_func`
1038
+ // and `close_file_func`.
1039
+ // TODO: it would be better to have the compaction file open/close moved
1040
+ // into `CompactionOutputs` which has the output file information.
1041
+ status = sub_compact->AddToOutput(*c_iter, open_file_func, close_file_func);
1563
1042
  if (!status.ok()) {
1564
1043
  break;
1565
1044
  }
1566
1045
 
1567
- status = sub_compact->ProcessOutFlowIfNeeded(key, value);
1568
- if (!status.ok()) {
1569
- break;
1570
- }
1571
-
1572
- const ParsedInternalKey& ikey = c_iter->ikey();
1573
- status = sub_compact->current_output()->meta.UpdateBoundaries(
1574
- key, value, ikey.sequence, ikey.type);
1575
- if (!status.ok()) {
1576
- break;
1577
- }
1578
-
1579
- sub_compact->current_output_file_size =
1580
- sub_compact->builder->EstimatedFileSize();
1581
- sub_compact->num_output_records++;
1582
-
1583
- // Close output file if it is big enough. Two possibilities determine it's
1584
- // time to close it: (1) the current key should be this file's last key, (2)
1585
- // the next key should not be in this file.
1586
- //
1587
- // TODO(aekmekji): determine if file should be closed earlier than this
1588
- // during subcompactions (i.e. if output size, estimated by input size, is
1589
- // going to be 1.2MB and max_output_file_size = 1MB, prefer to have 0.6MB
1590
- // and 0.6MB instead of 1MB and 0.2MB)
1591
- bool output_file_ended = false;
1592
- if (sub_compact->compaction->output_level() != 0 &&
1593
- sub_compact->current_output_file_size >=
1594
- sub_compact->compaction->max_output_file_size()) {
1595
- // (1) this key terminates the file. For historical reasons, the iterator
1596
- // status before advancing will be given to FinishCompactionOutputFile().
1597
- output_file_ended = true;
1598
- }
1599
1046
  TEST_SYNC_POINT_CALLBACK(
1600
1047
  "CompactionJob::Run():PausingManualCompaction:2",
1601
1048
  reinterpret_cast<void*>(
1602
1049
  const_cast<std::atomic<bool>*>(&manual_compaction_canceled_)));
1603
- if (partitioner.get()) {
1604
- last_key_for_partitioner.assign(c_iter->user_key().data_,
1605
- c_iter->user_key().size_);
1606
- }
1607
1050
  c_iter->Next();
1608
1051
  if (c_iter->status().IsManualCompactionPaused()) {
1609
1052
  break;
1610
1053
  }
1611
- if (!output_file_ended && c_iter->Valid()) {
1612
- if (((partitioner.get() &&
1613
- partitioner->ShouldPartition(PartitionerRequest(
1614
- last_key_for_partitioner, c_iter->user_key(),
1615
- sub_compact->current_output_file_size)) == kRequired) ||
1616
- (sub_compact->compaction->output_level() != 0 &&
1617
- sub_compact->ShouldStopBefore(
1618
- c_iter->key(), sub_compact->current_output_file_size))) &&
1619
- sub_compact->builder != nullptr) {
1620
- // (2) this key belongs to the next file. For historical reasons, the
1621
- // iterator status after advancing will be given to
1622
- // FinishCompactionOutputFile().
1623
- output_file_ended = true;
1624
- }
1625
- }
1626
- if (output_file_ended) {
1627
- const Slice* next_key = nullptr;
1628
- if (c_iter->Valid()) {
1629
- next_key = &c_iter->key();
1630
- }
1631
- CompactionIterationStats range_del_out_stats;
1632
- status = FinishCompactionOutputFile(input->status(), sub_compact,
1633
- &range_del_agg, &range_del_out_stats,
1634
- next_key);
1635
- RecordDroppedKeys(range_del_out_stats,
1636
- &sub_compact->compaction_job_stats);
1054
+
1055
+ // TODO: Support earlier file cut for the penultimate level files. Maybe by
1056
+ // moving `ShouldStopBefore()` to `CompactionOutputs` class. Currently
1057
+ // the penultimate level output is only cut when it reaches the size limit.
1058
+ if (!sub_compact->Current().IsPendingClose() &&
1059
+ sub_compact->compaction->output_level() != 0 &&
1060
+ !sub_compact->compaction->SupportsPerKeyPlacement() &&
1061
+ sub_compact->ShouldStopBefore(c_iter->key())) {
1062
+ sub_compact->Current().SetPendingClose();
1637
1063
  }
1638
1064
  }
1639
1065
 
@@ -1688,23 +1114,12 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1688
1114
  status = c_iter->status();
1689
1115
  }
1690
1116
 
1691
- if (status.ok() && sub_compact->builder == nullptr &&
1692
- sub_compact->outputs.size() == 0 && !range_del_agg.IsEmpty()) {
1693
- // handle subcompaction containing only range deletions
1694
- status = OpenCompactionOutputFile(sub_compact);
1695
- }
1696
-
1697
1117
  // Call FinishCompactionOutputFile() even if status is not ok: it needs to
1698
- // close the output file.
1699
- if (sub_compact->builder != nullptr) {
1700
- CompactionIterationStats range_del_out_stats;
1701
- Status s = FinishCompactionOutputFile(status, sub_compact, &range_del_agg,
1702
- &range_del_out_stats);
1703
- if (!s.ok() && status.ok()) {
1704
- status = s;
1705
- }
1706
- RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats);
1707
- }
1118
+ // close the output files. Open file function is also passed, in case there's
1119
+ // only range-dels, no file was opened, to save the range-dels, it need to
1120
+ // create a new output file.
1121
+ status = sub_compact->CloseCompactionFiles(status, open_file_func,
1122
+ close_file_func);
1708
1123
 
1709
1124
  if (blob_file_builder) {
1710
1125
  if (status.ok()) {
@@ -1713,6 +1128,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1713
1128
  blob_file_builder->Abandon(status);
1714
1129
  }
1715
1130
  blob_file_builder.reset();
1131
+ sub_compact->Current().UpdateBlobStats();
1716
1132
  }
1717
1133
 
1718
1134
  sub_compact->compaction_job_stats.cpu_micros =
@@ -1737,8 +1153,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1737
1153
  }
1738
1154
  #ifdef ROCKSDB_ASSERT_STATUS_CHECKED
1739
1155
  if (!status.ok()) {
1740
- if (sub_compact->c_iter) {
1741
- sub_compact->c_iter->status().PermitUncheckedError();
1156
+ if (c_iter) {
1157
+ c_iter->status().PermitUncheckedError();
1742
1158
  }
1743
1159
  if (input) {
1744
1160
  input->status().PermitUncheckedError();
@@ -1746,7 +1162,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1746
1162
  }
1747
1163
  #endif // ROCKSDB_ASSERT_STATUS_CHECKED
1748
1164
 
1749
- sub_compact->c_iter.reset();
1750
1165
  blob_counter.reset();
1751
1166
  clip.reset();
1752
1167
  raw_input.reset();
@@ -1754,7 +1169,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1754
1169
  NotifyOnSubcompactionCompleted(sub_compact);
1755
1170
  }
1756
1171
 
1757
- uint64_t CompactionJob::GetCompactionId(SubcompactionState* sub_compact) {
1172
+ uint64_t CompactionJob::GetCompactionId(SubcompactionState* sub_compact) const {
1758
1173
  return (uint64_t)job_id_ << 32 | sub_compact->sub_job_id;
1759
1174
  }
1760
1175
 
@@ -1797,204 +1212,50 @@ void CompactionJob::RecordDroppedKeys(
1797
1212
 
1798
1213
  Status CompactionJob::FinishCompactionOutputFile(
1799
1214
  const Status& input_status, SubcompactionState* sub_compact,
1800
- CompactionRangeDelAggregator* range_del_agg,
1801
- CompactionIterationStats* range_del_out_stats,
1802
- const Slice* next_table_min_key /* = nullptr */) {
1215
+ CompactionOutputs& outputs, const Slice& next_table_min_key) {
1803
1216
  AutoThreadOperationStageUpdater stage_updater(
1804
1217
  ThreadStatus::STAGE_COMPACTION_SYNC_FILE);
1805
1218
  assert(sub_compact != nullptr);
1806
- assert(sub_compact->outfile);
1807
- assert(sub_compact->builder != nullptr);
1808
- assert(sub_compact->current_output() != nullptr);
1219
+ assert(outputs.HasBuilder());
1809
1220
 
1810
- uint64_t output_number = sub_compact->current_output()->meta.fd.GetNumber();
1221
+ FileMetaData* meta = outputs.GetMetaData();
1222
+ uint64_t output_number = meta->fd.GetNumber();
1811
1223
  assert(output_number != 0);
1812
1224
 
1813
1225
  ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
1814
- const Comparator* ucmp = cfd->user_comparator();
1815
1226
  std::string file_checksum = kUnknownFileChecksum;
1816
1227
  std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
1817
1228
 
1818
1229
  // Check for iterator errors
1819
1230
  Status s = input_status;
1820
- auto meta = &sub_compact->current_output()->meta;
1821
- assert(meta != nullptr);
1822
- if (s.ok()) {
1823
- Slice lower_bound_guard, upper_bound_guard;
1824
- std::string smallest_user_key;
1825
- const Slice *lower_bound, *upper_bound;
1826
- bool lower_bound_from_sub_compact = false;
1827
- if (sub_compact->outputs.size() == 1) {
1828
- // For the first output table, include range tombstones before the min key
1829
- // but after the subcompaction boundary.
1830
- lower_bound = sub_compact->start;
1831
- lower_bound_from_sub_compact = true;
1832
- } else if (meta->smallest.size() > 0) {
1833
- // For subsequent output tables, only include range tombstones from min
1834
- // key onwards since the previous file was extended to contain range
1835
- // tombstones falling before min key.
1836
- smallest_user_key = meta->smallest.user_key().ToString(false /*hex*/);
1837
- lower_bound_guard = Slice(smallest_user_key);
1838
- lower_bound = &lower_bound_guard;
1839
- } else {
1840
- lower_bound = nullptr;
1841
- }
1842
- if (next_table_min_key != nullptr) {
1843
- // This may be the last file in the subcompaction in some cases, so we
1844
- // need to compare the end key of subcompaction with the next file start
1845
- // key. When the end key is chosen by the subcompaction, we know that
1846
- // it must be the biggest key in output file. Therefore, it is safe to
1847
- // use the smaller key as the upper bound of the output file, to ensure
1848
- // that there is no overlapping between different output files.
1849
- upper_bound_guard = ExtractUserKey(*next_table_min_key);
1850
- if (sub_compact->end != nullptr &&
1851
- ucmp->Compare(upper_bound_guard, *sub_compact->end) >= 0) {
1852
- upper_bound = sub_compact->end;
1853
- } else {
1854
- upper_bound = &upper_bound_guard;
1855
- }
1856
- } else {
1857
- // This is the last file in the subcompaction, so extend until the
1858
- // subcompaction ends.
1859
- upper_bound = sub_compact->end;
1860
- }
1861
- auto earliest_snapshot = kMaxSequenceNumber;
1862
- if (existing_snapshots_.size() > 0) {
1863
- earliest_snapshot = existing_snapshots_[0];
1864
- }
1865
- bool has_overlapping_endpoints;
1866
- if (upper_bound != nullptr && meta->largest.size() > 0) {
1867
- has_overlapping_endpoints =
1868
- ucmp->Compare(meta->largest.user_key(), *upper_bound) == 0;
1869
- } else {
1870
- has_overlapping_endpoints = false;
1871
- }
1872
1231
 
1873
- // The end key of the subcompaction must be bigger or equal to the upper
1874
- // bound. If the end of subcompaction is null or the upper bound is null,
1875
- // it means that this file is the last file in the compaction. So there
1876
- // will be no overlapping between this file and others.
1877
- assert(sub_compact->end == nullptr ||
1878
- upper_bound == nullptr ||
1879
- ucmp->Compare(*upper_bound , *sub_compact->end) <= 0);
1880
- auto it = range_del_agg->NewIterator(lower_bound, upper_bound,
1881
- has_overlapping_endpoints);
1882
- // Position the range tombstone output iterator. There may be tombstone
1883
- // fragments that are entirely out of range, so make sure that we do not
1884
- // include those.
1885
- if (lower_bound != nullptr) {
1886
- it->Seek(*lower_bound);
1887
- } else {
1888
- it->SeekToFirst();
1232
+ // Add range tombstones
1233
+ auto earliest_snapshot = kMaxSequenceNumber;
1234
+ if (existing_snapshots_.size() > 0) {
1235
+ earliest_snapshot = existing_snapshots_[0];
1236
+ }
1237
+ if (s.ok()) {
1238
+ CompactionIterationStats range_del_out_stats;
1239
+ // if the compaction supports per_key_placement, only output range dels to
1240
+ // the penultimate level.
1241
+ // Note: Use `bottommost_level_ = true` for both bottommost and
1242
+ // output_to_penultimate_level compaction here, as it's only used to decide
1243
+ // if range dels could be dropped.
1244
+ if (outputs.HasRangeDel()) {
1245
+ s = outputs.AddRangeDels(sub_compact->start, sub_compact->end,
1246
+ range_del_out_stats, bottommost_level_,
1247
+ cfd->internal_comparator(), earliest_snapshot,
1248
+ next_table_min_key);
1889
1249
  }
1250
+ RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats);
1890
1251
  TEST_SYNC_POINT("CompactionJob::FinishCompactionOutputFile1");
1891
- for (; it->Valid(); it->Next()) {
1892
- auto tombstone = it->Tombstone();
1893
- if (upper_bound != nullptr) {
1894
- int cmp = ucmp->Compare(*upper_bound, tombstone.start_key_);
1895
- if ((has_overlapping_endpoints && cmp < 0) ||
1896
- (!has_overlapping_endpoints && cmp <= 0)) {
1897
- // Tombstones starting after upper_bound only need to be included in
1898
- // the next table. If the current SST ends before upper_bound, i.e.,
1899
- // `has_overlapping_endpoints == false`, we can also skip over range
1900
- // tombstones that start exactly at upper_bound. Such range tombstones
1901
- // will be included in the next file and are not relevant to the point
1902
- // keys or endpoints of the current file.
1903
- break;
1904
- }
1905
- }
1252
+ }
1906
1253
 
1907
- if (bottommost_level_ && tombstone.seq_ <= earliest_snapshot) {
1908
- // TODO(andrewkr): tombstones that span multiple output files are
1909
- // counted for each compaction output file, so lots of double counting.
1910
- range_del_out_stats->num_range_del_drop_obsolete++;
1911
- range_del_out_stats->num_record_drop_obsolete++;
1912
- continue;
1913
- }
1254
+ const uint64_t current_entries = outputs.NumEntries();
1255
+
1256
+ s = outputs.Finish(s);
1914
1257
 
1915
- auto kv = tombstone.Serialize();
1916
- assert(lower_bound == nullptr ||
1917
- ucmp->Compare(*lower_bound, kv.second) < 0);
1918
- // Range tombstone is not supported by output validator yet.
1919
- sub_compact->builder->Add(kv.first.Encode(), kv.second);
1920
- InternalKey smallest_candidate = std::move(kv.first);
1921
- if (lower_bound != nullptr &&
1922
- ucmp->Compare(smallest_candidate.user_key(), *lower_bound) <= 0) {
1923
- // Pretend the smallest key has the same user key as lower_bound
1924
- // (the max key in the previous table or subcompaction) in order for
1925
- // files to appear key-space partitioned.
1926
- //
1927
- // When lower_bound is chosen by a subcompaction, we know that
1928
- // subcompactions over smaller keys cannot contain any keys at
1929
- // lower_bound. We also know that smaller subcompactions exist, because
1930
- // otherwise the subcompaction woud be unbounded on the left. As a
1931
- // result, we know that no other files on the output level will contain
1932
- // actual keys at lower_bound (an output file may have a largest key of
1933
- // lower_bound@kMaxSequenceNumber, but this only indicates a large range
1934
- // tombstone was truncated). Therefore, it is safe to use the
1935
- // tombstone's sequence number, to ensure that keys at lower_bound at
1936
- // lower levels are covered by truncated tombstones.
1937
- //
1938
- // If lower_bound was chosen by the smallest data key in the file,
1939
- // choose lowest seqnum so this file's smallest internal key comes after
1940
- // the previous file's largest. The fake seqnum is OK because the read
1941
- // path's file-picking code only considers user key.
1942
- smallest_candidate = InternalKey(
1943
- *lower_bound, lower_bound_from_sub_compact ? tombstone.seq_ : 0,
1944
- kTypeRangeDeletion);
1945
- }
1946
- InternalKey largest_candidate = tombstone.SerializeEndKey();
1947
- if (upper_bound != nullptr &&
1948
- ucmp->Compare(*upper_bound, largest_candidate.user_key()) <= 0) {
1949
- // Pretend the largest key has the same user key as upper_bound (the
1950
- // min key in the following table or subcompaction) in order for files
1951
- // to appear key-space partitioned.
1952
- //
1953
- // Choose highest seqnum so this file's largest internal key comes
1954
- // before the next file's/subcompaction's smallest. The fake seqnum is
1955
- // OK because the read path's file-picking code only considers the user
1956
- // key portion.
1957
- //
1958
- // Note Seek() also creates InternalKey with (user_key,
1959
- // kMaxSequenceNumber), but with kTypeDeletion (0x7) instead of
1960
- // kTypeRangeDeletion (0xF), so the range tombstone comes before the
1961
- // Seek() key in InternalKey's ordering. So Seek() will look in the
1962
- // next file for the user key.
1963
- largest_candidate =
1964
- InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion);
1965
- }
1966
- #ifndef NDEBUG
1967
- SequenceNumber smallest_ikey_seqnum = kMaxSequenceNumber;
1968
- if (meta->smallest.size() > 0) {
1969
- smallest_ikey_seqnum = GetInternalKeySeqno(meta->smallest.Encode());
1970
- }
1971
- #endif
1972
- meta->UpdateBoundariesForRange(smallest_candidate, largest_candidate,
1973
- tombstone.seq_,
1974
- cfd->internal_comparator());
1975
- // The smallest key in a file is used for range tombstone truncation, so
1976
- // it cannot have a seqnum of 0 (unless the smallest data key in a file
1977
- // has a seqnum of 0). Otherwise, the truncated tombstone may expose
1978
- // deleted keys at lower levels.
1979
- assert(smallest_ikey_seqnum == 0 ||
1980
- ExtractInternalKeyFooter(meta->smallest.Encode()) !=
1981
- PackSequenceAndType(0, kTypeRangeDeletion));
1982
- }
1983
- }
1984
- const uint64_t current_entries = sub_compact->builder->NumEntries();
1985
- if (s.ok()) {
1986
- s = sub_compact->builder->Finish();
1987
- } else {
1988
- sub_compact->builder->Abandon();
1989
- }
1990
- IOStatus io_s = sub_compact->builder->io_status();
1991
- if (s.ok()) {
1992
- s = io_s;
1993
- }
1994
- const uint64_t current_bytes = sub_compact->builder->FileSize();
1995
1258
  if (s.ok()) {
1996
- meta->fd.file_size = current_bytes;
1997
- meta->marked_for_compaction = sub_compact->builder->NeedCompact();
1998
1259
  // With accurate smallest and largest key, we can get a slightly more
1999
1260
  // accurate oldest ancester time.
2000
1261
  // This makes oldest ancester time in manifest more accurate than in
@@ -2014,25 +1275,16 @@ Status CompactionJob::FinishCompactionOutputFile(
2014
1275
  }
2015
1276
  }
2016
1277
  }
2017
- sub_compact->current_output()->finished = true;
2018
- sub_compact->total_bytes += current_bytes;
2019
1278
 
2020
1279
  // Finish and check for file errors
2021
- if (s.ok()) {
2022
- StopWatch sw(db_options_.clock, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
2023
- io_s = sub_compact->outfile->Sync(db_options_.use_fsync);
2024
- }
2025
- if (s.ok() && io_s.ok()) {
2026
- io_s = sub_compact->outfile->Close();
2027
- }
1280
+ IOStatus io_s = outputs.WriterSyncClose(s, db_options_.clock, stats_,
1281
+ db_options_.use_fsync);
1282
+
2028
1283
  if (s.ok() && io_s.ok()) {
2029
- // Add the checksum information to file metadata.
2030
- meta->file_checksum = sub_compact->outfile->GetFileChecksum();
2031
- meta->file_checksum_func_name =
2032
- sub_compact->outfile->GetFileChecksumFuncName();
2033
1284
  file_checksum = meta->file_checksum;
2034
1285
  file_checksum_func_name = meta->file_checksum_func_name;
2035
1286
  }
1287
+
2036
1288
  if (s.ok()) {
2037
1289
  s = io_s;
2038
1290
  }
@@ -2042,11 +1294,10 @@ Status CompactionJob::FinishCompactionOutputFile(
2042
1294
  // "normal" status, it does not also need to be checked
2043
1295
  sub_compact->io_status.PermitUncheckedError();
2044
1296
  }
2045
- sub_compact->outfile.reset();
2046
1297
 
2047
1298
  TableProperties tp;
2048
1299
  if (s.ok()) {
2049
- tp = sub_compact->builder->GetTableProperties();
1300
+ tp = outputs.GetTableProperties();
2050
1301
  }
2051
1302
 
2052
1303
  if (s.ok() && current_entries == 0 && tp.num_range_deletions == 0) {
@@ -2071,21 +1322,20 @@ Status CompactionJob::FinishCompactionOutputFile(
2071
1322
 
2072
1323
  // Also need to remove the file from outputs, or it will be added to the
2073
1324
  // VersionEdit.
2074
- assert(!sub_compact->outputs.empty());
2075
- sub_compact->outputs.pop_back();
1325
+ outputs.RemoveLastOutput();
2076
1326
  meta = nullptr;
2077
1327
  }
2078
1328
 
2079
1329
  if (s.ok() && (current_entries > 0 || tp.num_range_deletions > 0)) {
2080
1330
  // Output to event logger and fire events.
2081
- sub_compact->current_output()->table_properties =
2082
- std::make_shared<TableProperties>(tp);
1331
+ outputs.UpdateTableProperties();
2083
1332
  ROCKS_LOG_INFO(db_options_.info_log,
2084
1333
  "[%s] [JOB %d] Generated table #%" PRIu64 ": %" PRIu64
2085
- " keys, %" PRIu64 " bytes%s",
1334
+ " keys, %" PRIu64 " bytes%s, temperature: %s",
2086
1335
  cfd->GetName().c_str(), job_id_, output_number,
2087
- current_entries, current_bytes,
2088
- meta->marked_for_compaction ? " (need compaction)" : "");
1336
+ current_entries, meta->fd.file_size,
1337
+ meta->marked_for_compaction ? " (need compaction)" : "",
1338
+ temperature_to_string[meta->temperature].c_str());
2089
1339
  }
2090
1340
  std::string fname;
2091
1341
  FileDescriptor output_fd;
@@ -2121,16 +1371,14 @@ Status CompactionJob::FinishCompactionOutputFile(
2121
1371
  // compaction output file (similarly to how flush works when full)?
2122
1372
  s = Status::SpaceLimit("Max allowed space was reached");
2123
1373
  TEST_SYNC_POINT(
2124
- "CompactionJob::FinishCompactionOutputFile:"
2125
- "MaxAllowedSpaceReached");
1374
+ "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached");
2126
1375
  InstrumentedMutexLock l(db_mutex_);
2127
1376
  db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction);
2128
1377
  }
2129
1378
  }
2130
1379
  #endif
2131
1380
 
2132
- sub_compact->builder.reset();
2133
- sub_compact->current_output_file_size = 0;
1381
+ outputs.ResetBuilder();
2134
1382
  return s;
2135
1383
  }
2136
1384
 
@@ -2145,11 +1393,23 @@ Status CompactionJob::InstallCompactionResults(
2145
1393
 
2146
1394
  {
2147
1395
  Compaction::InputLevelSummaryBuffer inputs_summary;
2148
- ROCKS_LOG_BUFFER(log_buffer_,
2149
- "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes",
2150
- compaction->column_family_data()->GetName().c_str(),
2151
- job_id_, compaction->InputLevelSummary(&inputs_summary),
2152
- compact_->total_bytes + compact_->total_blob_bytes);
1396
+ if (compaction_stats_.has_penultimate_level_output) {
1397
+ ROCKS_LOG_BUFFER(
1398
+ log_buffer_,
1399
+ "[%s] [JOB %d] Compacted %s => output_to_penultimate_level: %" PRIu64
1400
+ " bytes + last: %" PRIu64 " bytes. Total: %" PRIu64 " bytes",
1401
+ compaction->column_family_data()->GetName().c_str(), job_id_,
1402
+ compaction->InputLevelSummary(&inputs_summary),
1403
+ compaction_stats_.penultimate_level_stats.bytes_written,
1404
+ compaction_stats_.stats.bytes_written,
1405
+ compaction_stats_.TotalBytesWritten());
1406
+ } else {
1407
+ ROCKS_LOG_BUFFER(log_buffer_,
1408
+ "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes",
1409
+ compaction->column_family_data()->GetName().c_str(),
1410
+ job_id_, compaction->InputLevelSummary(&inputs_summary),
1411
+ compaction_stats_.TotalBytesWritten());
1412
+ }
2153
1413
  }
2154
1414
 
2155
1415
  VersionEdit* const edit = compaction->edit();
@@ -2161,16 +1421,14 @@ Status CompactionJob::InstallCompactionResults(
2161
1421
  std::unordered_map<uint64_t, BlobGarbageMeter::BlobStats> blob_total_garbage;
2162
1422
 
2163
1423
  for (const auto& sub_compact : compact_->sub_compact_states) {
2164
- for (const auto& out : sub_compact.outputs) {
2165
- edit->AddFile(compaction->output_level(), out.meta);
2166
- }
1424
+ sub_compact.AddOutputsEdit(edit);
2167
1425
 
2168
- for (const auto& blob : sub_compact.blob_file_additions) {
1426
+ for (const auto& blob : sub_compact.Current().GetBlobFileAdditions()) {
2169
1427
  edit->AddBlobFile(blob);
2170
1428
  }
2171
1429
 
2172
- if (sub_compact.blob_garbage_meter) {
2173
- const auto& flows = sub_compact.blob_garbage_meter->flows();
1430
+ if (sub_compact.Current().GetBlobGarbageMeter()) {
1431
+ const auto& flows = sub_compact.Current().GetBlobGarbageMeter()->flows();
2174
1432
 
2175
1433
  for (const auto& pair : flows) {
2176
1434
  const uint64_t blob_file_number = pair.first;
@@ -2231,10 +1489,10 @@ void CompactionJob::RecordCompactionIOStats() {
2231
1489
  IOSTATS_RESET(bytes_written);
2232
1490
  }
2233
1491
 
2234
- Status CompactionJob::OpenCompactionOutputFile(
2235
- SubcompactionState* sub_compact) {
1492
+ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
1493
+ CompactionOutputs& outputs) {
2236
1494
  assert(sub_compact != nullptr);
2237
- assert(sub_compact->builder == nullptr);
1495
+
2238
1496
  // no need to lock because VersionSet::next_file_number_ is atomic
2239
1497
  uint64_t file_number = versions_->NewFileNumber();
2240
1498
  std::string fname = GetTableFileName(file_number);
@@ -2256,7 +1514,8 @@ Status CompactionJob::OpenCompactionOutputFile(
2256
1514
  // Pass temperature of botommost files to FileSystem.
2257
1515
  FileOptions fo_copy = file_options_;
2258
1516
  Temperature temperature = sub_compact->compaction->output_temperature();
2259
- if (temperature == Temperature::kUnknown && bottommost_level_) {
1517
+ if (temperature == Temperature::kUnknown && bottommost_level_ &&
1518
+ !sub_compact->IsCurrentPenultimateLevel()) {
2260
1519
  temperature =
2261
1520
  sub_compact->compaction->mutable_cf_options()->bottommost_temperature;
2262
1521
  }
@@ -2332,12 +1591,11 @@ Status CompactionJob::OpenCompactionOutputFile(
2332
1591
  s.ToString().c_str());
2333
1592
  return s;
2334
1593
  }
2335
- sub_compact->outputs.emplace_back(
2336
- std::move(meta), cfd->internal_comparator(),
2337
- /*enable_order_check=*/
2338
- sub_compact->compaction->mutable_cf_options()
2339
- ->check_flush_compaction_key_order,
2340
- /*enable_hash=*/paranoid_file_checks_);
1594
+
1595
+ outputs.AddOutput(std::move(meta), cfd->internal_comparator(),
1596
+ sub_compact->compaction->mutable_cf_options()
1597
+ ->check_flush_compaction_key_order,
1598
+ paranoid_file_checks_);
2341
1599
  }
2342
1600
 
2343
1601
  writable_file->SetIOPriority(GetRateLimiterPriority());
@@ -2347,7 +1605,7 @@ Status CompactionJob::OpenCompactionOutputFile(
2347
1605
  sub_compact->compaction->OutputFilePreallocationSize()));
2348
1606
  const auto& listeners =
2349
1607
  sub_compact->compaction->immutable_options()->listeners;
2350
- sub_compact->outfile.reset(new WritableFileWriter(
1608
+ outputs.AssignFileWriter(new WritableFileWriter(
2351
1609
  std::move(writable_file), fname, fo_copy, db_options_.clock, io_tracer_,
2352
1610
  db_options_.stats, listeners, db_options_.file_checksum_gen_factory.get(),
2353
1611
  tmp_set.Contains(FileType::kTableFile), false));
@@ -2362,33 +1620,16 @@ Status CompactionJob::OpenCompactionOutputFile(
2362
1620
  oldest_ancester_time, 0 /* oldest_key_time */, current_time, db_id_,
2363
1621
  db_session_id_, sub_compact->compaction->max_output_file_size(),
2364
1622
  file_number);
2365
- sub_compact->builder.reset(
2366
- NewTableBuilder(tboptions, sub_compact->outfile.get()));
1623
+
1624
+ outputs.NewBuilder(tboptions);
1625
+
2367
1626
  LogFlush(db_options_.info_log);
2368
1627
  return s;
2369
1628
  }
2370
1629
 
2371
1630
  void CompactionJob::CleanupCompaction() {
2372
1631
  for (SubcompactionState& sub_compact : compact_->sub_compact_states) {
2373
- const auto& sub_status = sub_compact.status;
2374
-
2375
- if (sub_compact.builder != nullptr) {
2376
- // May happen if we get a shutdown call in the middle of compaction
2377
- sub_compact.builder->Abandon();
2378
- sub_compact.builder.reset();
2379
- } else {
2380
- assert(!sub_status.ok() || sub_compact.outfile == nullptr);
2381
- }
2382
- for (const auto& out : sub_compact.outputs) {
2383
- // If this file was inserted into the table cache then remove
2384
- // them here because this compaction was not committed.
2385
- if (!sub_status.ok()) {
2386
- TableCache::Evict(table_cache_.get(), out.meta.fd.GetNumber());
2387
- }
2388
- }
2389
- // TODO: sub_compact.io_status is not checked like status. Not sure if thats
2390
- // intentional. So ignoring the io_status as of now.
2391
- sub_compact.io_status.PermitUncheckedError();
1632
+ sub_compact.Cleanup(table_cache_.get());
2392
1633
  }
2393
1634
  delete compact_;
2394
1635
  compact_ = nullptr;
@@ -2409,37 +1650,28 @@ void CompactionJob::UpdateCompactionStats() {
2409
1650
  assert(compact_);
2410
1651
 
2411
1652
  Compaction* compaction = compact_->compaction;
2412
- compaction_stats_.num_input_files_in_non_output_levels = 0;
2413
- compaction_stats_.num_input_files_in_output_level = 0;
1653
+ compaction_stats_.stats.num_input_files_in_non_output_levels = 0;
1654
+ compaction_stats_.stats.num_input_files_in_output_level = 0;
2414
1655
  for (int input_level = 0;
2415
1656
  input_level < static_cast<int>(compaction->num_input_levels());
2416
1657
  ++input_level) {
2417
1658
  if (compaction->level(input_level) != compaction->output_level()) {
2418
1659
  UpdateCompactionInputStatsHelper(
2419
- &compaction_stats_.num_input_files_in_non_output_levels,
2420
- &compaction_stats_.bytes_read_non_output_levels, input_level);
1660
+ &compaction_stats_.stats.num_input_files_in_non_output_levels,
1661
+ &compaction_stats_.stats.bytes_read_non_output_levels, input_level);
2421
1662
  } else {
2422
1663
  UpdateCompactionInputStatsHelper(
2423
- &compaction_stats_.num_input_files_in_output_level,
2424
- &compaction_stats_.bytes_read_output_level, input_level);
1664
+ &compaction_stats_.stats.num_input_files_in_output_level,
1665
+ &compaction_stats_.stats.bytes_read_output_level, input_level);
2425
1666
  }
2426
1667
  }
2427
1668
 
2428
1669
  assert(compaction_job_stats_);
2429
- compaction_stats_.bytes_read_blob =
1670
+ compaction_stats_.stats.bytes_read_blob =
2430
1671
  compaction_job_stats_->total_blob_bytes_read;
2431
1672
 
2432
- compaction_stats_.num_output_files =
2433
- static_cast<int>(compact_->num_output_files);
2434
- compaction_stats_.num_output_files_blob =
2435
- static_cast<int>(compact_->num_blob_output_files);
2436
- compaction_stats_.bytes_written = compact_->total_bytes;
2437
- compaction_stats_.bytes_written_blob = compact_->total_blob_bytes;
2438
-
2439
- if (compaction_stats_.num_input_records > compact_->num_output_records) {
2440
- compaction_stats_.num_dropped_records =
2441
- compaction_stats_.num_input_records - compact_->num_output_records;
2442
- }
1673
+ compaction_stats_.stats.num_dropped_records =
1674
+ compaction_stats_.DroppedRecords();
2443
1675
  }
2444
1676
 
2445
1677
  void CompactionJob::UpdateCompactionInputStatsHelper(int* num_files,
@@ -2452,7 +1684,7 @@ void CompactionJob::UpdateCompactionInputStatsHelper(int* num_files,
2452
1684
  for (size_t i = 0; i < num_input_files; ++i) {
2453
1685
  const auto* file_meta = compaction->input(input_level, i);
2454
1686
  *bytes_read += file_meta->fd.GetFileSize();
2455
- compaction_stats_.num_input_records +=
1687
+ compaction_stats_.stats.num_input_records +=
2456
1688
  static_cast<uint64_t>(file_meta->num_entries);
2457
1689
  }
2458
1690
  }
@@ -2475,7 +1707,7 @@ void CompactionJob::UpdateCompactionJobStats(
2475
1707
  // output information
2476
1708
  compaction_job_stats_->total_output_bytes = stats.bytes_written;
2477
1709
  compaction_job_stats_->total_output_bytes_blob = stats.bytes_written_blob;
2478
- compaction_job_stats_->num_output_records = compact_->num_output_records;
1710
+ compaction_job_stats_->num_output_records = stats.num_output_records;
2479
1711
  compaction_job_stats_->num_output_files = stats.num_output_files;
2480
1712
  compaction_job_stats_->num_output_files_blob = stats.num_output_files_blob;
2481
1713
 
@@ -2544,617 +1776,4 @@ Env::IOPriority CompactionJob::GetRateLimiterPriority() {
2544
1776
  return Env::IO_LOW;
2545
1777
  }
2546
1778
 
2547
- #ifndef ROCKSDB_LITE
2548
- std::string CompactionServiceCompactionJob::GetTableFileName(
2549
- uint64_t file_number) {
2550
- return MakeTableFileName(output_path_, file_number);
2551
- }
2552
-
2553
- void CompactionServiceCompactionJob::RecordCompactionIOStats() {
2554
- compaction_result_->bytes_read += IOSTATS(bytes_read);
2555
- compaction_result_->bytes_written += IOSTATS(bytes_written);
2556
- CompactionJob::RecordCompactionIOStats();
2557
- }
2558
-
2559
- CompactionServiceCompactionJob::CompactionServiceCompactionJob(
2560
- int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
2561
- const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
2562
- VersionSet* versions, const std::atomic<bool>* shutting_down,
2563
- LogBuffer* log_buffer, FSDirectory* output_directory, Statistics* stats,
2564
- InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
2565
- std::vector<SequenceNumber> existing_snapshots,
2566
- std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
2567
- const std::string& dbname, const std::shared_ptr<IOTracer>& io_tracer,
2568
- const std::atomic<bool>& manual_compaction_canceled,
2569
- const std::string& db_id, const std::string& db_session_id,
2570
- const std::string& output_path,
2571
- const CompactionServiceInput& compaction_service_input,
2572
- CompactionServiceResult* compaction_service_result)
2573
- : CompactionJob(
2574
- job_id, compaction, db_options, mutable_db_options, file_options,
2575
- versions, shutting_down, log_buffer, nullptr, output_directory,
2576
- nullptr, stats, db_mutex, db_error_handler, existing_snapshots,
2577
- kMaxSequenceNumber, nullptr, nullptr, table_cache, event_logger,
2578
- compaction->mutable_cf_options()->paranoid_file_checks,
2579
- compaction->mutable_cf_options()->report_bg_io_stats, dbname,
2580
- &(compaction_service_result->stats), Env::Priority::USER, io_tracer,
2581
- manual_compaction_canceled, db_id, db_session_id,
2582
- compaction->column_family_data()->GetFullHistoryTsLow()),
2583
- output_path_(output_path),
2584
- compaction_input_(compaction_service_input),
2585
- compaction_result_(compaction_service_result) {}
2586
-
2587
- Status CompactionServiceCompactionJob::Run() {
2588
- AutoThreadOperationStageUpdater stage_updater(
2589
- ThreadStatus::STAGE_COMPACTION_RUN);
2590
-
2591
- auto* c = compact_->compaction;
2592
- assert(c->column_family_data() != nullptr);
2593
- assert(c->column_family_data()->current()->storage_info()->NumLevelFiles(
2594
- compact_->compaction->level()) > 0);
2595
-
2596
- write_hint_ =
2597
- c->column_family_data()->CalculateSSTWriteHint(c->output_level());
2598
- bottommost_level_ = c->bottommost_level();
2599
-
2600
- Slice begin = compaction_input_.begin;
2601
- Slice end = compaction_input_.end;
2602
- compact_->sub_compact_states.emplace_back(
2603
- c, compaction_input_.has_begin ? &begin : nullptr,
2604
- compaction_input_.has_end ? &end : nullptr, compaction_input_.approx_size,
2605
- /*sub_job_id*/ 0);
2606
-
2607
- log_buffer_->FlushBufferToLog();
2608
- LogCompaction();
2609
- const uint64_t start_micros = db_options_.clock->NowMicros();
2610
- // Pick the only sub-compaction we should have
2611
- assert(compact_->sub_compact_states.size() == 1);
2612
- SubcompactionState* sub_compact = compact_->sub_compact_states.data();
2613
-
2614
- ProcessKeyValueCompaction(sub_compact);
2615
-
2616
- compaction_stats_.micros = db_options_.clock->NowMicros() - start_micros;
2617
- compaction_stats_.cpu_micros = sub_compact->compaction_job_stats.cpu_micros;
2618
-
2619
- RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros);
2620
- RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
2621
- compaction_stats_.cpu_micros);
2622
-
2623
- Status status = sub_compact->status;
2624
- IOStatus io_s = sub_compact->io_status;
2625
-
2626
- if (io_status_.ok()) {
2627
- io_status_ = io_s;
2628
- }
2629
-
2630
- if (status.ok()) {
2631
- constexpr IODebugContext* dbg = nullptr;
2632
-
2633
- if (output_directory_) {
2634
- io_s = output_directory_->FsyncWithDirOptions(IOOptions(), dbg,
2635
- DirFsyncOptions());
2636
- }
2637
- }
2638
- if (io_status_.ok()) {
2639
- io_status_ = io_s;
2640
- }
2641
- if (status.ok()) {
2642
- status = io_s;
2643
- }
2644
- if (status.ok()) {
2645
- // TODO: Add verify_table()
2646
- }
2647
-
2648
- // Finish up all book-keeping to unify the subcompaction results
2649
- AggregateStatistics();
2650
- UpdateCompactionStats();
2651
- RecordCompactionIOStats();
2652
-
2653
- LogFlush(db_options_.info_log);
2654
- compact_->status = status;
2655
- compact_->status.PermitUncheckedError();
2656
-
2657
- // Build compaction result
2658
- compaction_result_->output_level = compact_->compaction->output_level();
2659
- compaction_result_->output_path = output_path_;
2660
- for (const auto& output_file : sub_compact->outputs) {
2661
- auto& meta = output_file.meta;
2662
- compaction_result_->output_files.emplace_back(
2663
- MakeTableFileName(meta.fd.GetNumber()), meta.fd.smallest_seqno,
2664
- meta.fd.largest_seqno, meta.smallest.Encode().ToString(),
2665
- meta.largest.Encode().ToString(), meta.oldest_ancester_time,
2666
- meta.file_creation_time, output_file.validator.GetHash(),
2667
- meta.marked_for_compaction, meta.unique_id);
2668
- }
2669
- compaction_result_->num_output_records = sub_compact->num_output_records;
2670
- compaction_result_->total_bytes = sub_compact->total_bytes;
2671
-
2672
- return status;
2673
- }
2674
-
2675
- void CompactionServiceCompactionJob::CleanupCompaction() {
2676
- CompactionJob::CleanupCompaction();
2677
- }
2678
-
2679
- // Internal binary format for the input and result data
2680
- enum BinaryFormatVersion : uint32_t {
2681
- kOptionsString = 1, // Use string format similar to Option string format
2682
- };
2683
-
2684
- static std::unordered_map<std::string, OptionTypeInfo> cfd_type_info = {
2685
- {"name",
2686
- {offsetof(struct ColumnFamilyDescriptor, name), OptionType::kEncodedString,
2687
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2688
- {"options",
2689
- {offsetof(struct ColumnFamilyDescriptor, options),
2690
- OptionType::kConfigurable, OptionVerificationType::kNormal,
2691
- OptionTypeFlags::kNone,
2692
- [](const ConfigOptions& opts, const std::string& /*name*/,
2693
- const std::string& value, void* addr) {
2694
- auto cf_options = static_cast<ColumnFamilyOptions*>(addr);
2695
- return GetColumnFamilyOptionsFromString(opts, ColumnFamilyOptions(),
2696
- value, cf_options);
2697
- },
2698
- [](const ConfigOptions& opts, const std::string& /*name*/,
2699
- const void* addr, std::string* value) {
2700
- const auto cf_options = static_cast<const ColumnFamilyOptions*>(addr);
2701
- std::string result;
2702
- auto status =
2703
- GetStringFromColumnFamilyOptions(opts, *cf_options, &result);
2704
- *value = "{" + result + "}";
2705
- return status;
2706
- },
2707
- [](const ConfigOptions& opts, const std::string& name, const void* addr1,
2708
- const void* addr2, std::string* mismatch) {
2709
- const auto this_one = static_cast<const ColumnFamilyOptions*>(addr1);
2710
- const auto that_one = static_cast<const ColumnFamilyOptions*>(addr2);
2711
- auto this_conf = CFOptionsAsConfigurable(*this_one);
2712
- auto that_conf = CFOptionsAsConfigurable(*that_one);
2713
- std::string mismatch_opt;
2714
- bool result =
2715
- this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt);
2716
- if (!result) {
2717
- *mismatch = name + "." + mismatch_opt;
2718
- }
2719
- return result;
2720
- }}},
2721
- };
2722
-
2723
- static std::unordered_map<std::string, OptionTypeInfo> cs_input_type_info = {
2724
- {"column_family",
2725
- OptionTypeInfo::Struct(
2726
- "column_family", &cfd_type_info,
2727
- offsetof(struct CompactionServiceInput, column_family),
2728
- OptionVerificationType::kNormal, OptionTypeFlags::kNone)},
2729
- {"db_options",
2730
- {offsetof(struct CompactionServiceInput, db_options),
2731
- OptionType::kConfigurable, OptionVerificationType::kNormal,
2732
- OptionTypeFlags::kNone,
2733
- [](const ConfigOptions& opts, const std::string& /*name*/,
2734
- const std::string& value, void* addr) {
2735
- auto options = static_cast<DBOptions*>(addr);
2736
- return GetDBOptionsFromString(opts, DBOptions(), value, options);
2737
- },
2738
- [](const ConfigOptions& opts, const std::string& /*name*/,
2739
- const void* addr, std::string* value) {
2740
- const auto options = static_cast<const DBOptions*>(addr);
2741
- std::string result;
2742
- auto status = GetStringFromDBOptions(opts, *options, &result);
2743
- *value = "{" + result + "}";
2744
- return status;
2745
- },
2746
- [](const ConfigOptions& opts, const std::string& name, const void* addr1,
2747
- const void* addr2, std::string* mismatch) {
2748
- const auto this_one = static_cast<const DBOptions*>(addr1);
2749
- const auto that_one = static_cast<const DBOptions*>(addr2);
2750
- auto this_conf = DBOptionsAsConfigurable(*this_one);
2751
- auto that_conf = DBOptionsAsConfigurable(*that_one);
2752
- std::string mismatch_opt;
2753
- bool result =
2754
- this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt);
2755
- if (!result) {
2756
- *mismatch = name + "." + mismatch_opt;
2757
- }
2758
- return result;
2759
- }}},
2760
- {"snapshots", OptionTypeInfo::Vector<uint64_t>(
2761
- offsetof(struct CompactionServiceInput, snapshots),
2762
- OptionVerificationType::kNormal, OptionTypeFlags::kNone,
2763
- {0, OptionType::kUInt64T})},
2764
- {"input_files", OptionTypeInfo::Vector<std::string>(
2765
- offsetof(struct CompactionServiceInput, input_files),
2766
- OptionVerificationType::kNormal, OptionTypeFlags::kNone,
2767
- {0, OptionType::kEncodedString})},
2768
- {"output_level",
2769
- {offsetof(struct CompactionServiceInput, output_level), OptionType::kInt,
2770
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2771
- {"db_id",
2772
- {offsetof(struct CompactionServiceInput, db_id),
2773
- OptionType::kEncodedString}},
2774
- {"has_begin",
2775
- {offsetof(struct CompactionServiceInput, has_begin), OptionType::kBoolean,
2776
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2777
- {"begin",
2778
- {offsetof(struct CompactionServiceInput, begin),
2779
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2780
- OptionTypeFlags::kNone}},
2781
- {"has_end",
2782
- {offsetof(struct CompactionServiceInput, has_end), OptionType::kBoolean,
2783
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2784
- {"end",
2785
- {offsetof(struct CompactionServiceInput, end), OptionType::kEncodedString,
2786
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2787
- {"approx_size",
2788
- {offsetof(struct CompactionServiceInput, approx_size),
2789
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2790
- OptionTypeFlags::kNone}},
2791
- };
2792
-
2793
- static std::unordered_map<std::string, OptionTypeInfo>
2794
- cs_output_file_type_info = {
2795
- {"file_name",
2796
- {offsetof(struct CompactionServiceOutputFile, file_name),
2797
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2798
- OptionTypeFlags::kNone}},
2799
- {"smallest_seqno",
2800
- {offsetof(struct CompactionServiceOutputFile, smallest_seqno),
2801
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2802
- OptionTypeFlags::kNone}},
2803
- {"largest_seqno",
2804
- {offsetof(struct CompactionServiceOutputFile, largest_seqno),
2805
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2806
- OptionTypeFlags::kNone}},
2807
- {"smallest_internal_key",
2808
- {offsetof(struct CompactionServiceOutputFile, smallest_internal_key),
2809
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2810
- OptionTypeFlags::kNone}},
2811
- {"largest_internal_key",
2812
- {offsetof(struct CompactionServiceOutputFile, largest_internal_key),
2813
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2814
- OptionTypeFlags::kNone}},
2815
- {"oldest_ancester_time",
2816
- {offsetof(struct CompactionServiceOutputFile, oldest_ancester_time),
2817
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2818
- OptionTypeFlags::kNone}},
2819
- {"file_creation_time",
2820
- {offsetof(struct CompactionServiceOutputFile, file_creation_time),
2821
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2822
- OptionTypeFlags::kNone}},
2823
- {"paranoid_hash",
2824
- {offsetof(struct CompactionServiceOutputFile, paranoid_hash),
2825
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2826
- OptionTypeFlags::kNone}},
2827
- {"marked_for_compaction",
2828
- {offsetof(struct CompactionServiceOutputFile, marked_for_compaction),
2829
- OptionType::kBoolean, OptionVerificationType::kNormal,
2830
- OptionTypeFlags::kNone}},
2831
- {"unique_id",
2832
- OptionTypeInfo::Array<uint64_t, 2>(
2833
- offsetof(struct CompactionServiceOutputFile, unique_id),
2834
- OptionVerificationType::kNormal, OptionTypeFlags::kNone,
2835
- {0, OptionType::kUInt64T})},
2836
- };
2837
-
2838
- static std::unordered_map<std::string, OptionTypeInfo>
2839
- compaction_job_stats_type_info = {
2840
- {"elapsed_micros",
2841
- {offsetof(struct CompactionJobStats, elapsed_micros),
2842
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2843
- OptionTypeFlags::kNone}},
2844
- {"cpu_micros",
2845
- {offsetof(struct CompactionJobStats, cpu_micros), OptionType::kUInt64T,
2846
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
2847
- {"num_input_records",
2848
- {offsetof(struct CompactionJobStats, num_input_records),
2849
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2850
- OptionTypeFlags::kNone}},
2851
- {"num_blobs_read",
2852
- {offsetof(struct CompactionJobStats, num_blobs_read),
2853
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2854
- OptionTypeFlags::kNone}},
2855
- {"num_input_files",
2856
- {offsetof(struct CompactionJobStats, num_input_files),
2857
- OptionType::kSizeT, OptionVerificationType::kNormal,
2858
- OptionTypeFlags::kNone}},
2859
- {"num_input_files_at_output_level",
2860
- {offsetof(struct CompactionJobStats, num_input_files_at_output_level),
2861
- OptionType::kSizeT, OptionVerificationType::kNormal,
2862
- OptionTypeFlags::kNone}},
2863
- {"num_output_records",
2864
- {offsetof(struct CompactionJobStats, num_output_records),
2865
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2866
- OptionTypeFlags::kNone}},
2867
- {"num_output_files",
2868
- {offsetof(struct CompactionJobStats, num_output_files),
2869
- OptionType::kSizeT, OptionVerificationType::kNormal,
2870
- OptionTypeFlags::kNone}},
2871
- {"num_output_files_blob",
2872
- {offsetof(struct CompactionJobStats, num_output_files_blob),
2873
- OptionType::kSizeT, OptionVerificationType::kNormal,
2874
- OptionTypeFlags::kNone}},
2875
- {"is_full_compaction",
2876
- {offsetof(struct CompactionJobStats, is_full_compaction),
2877
- OptionType::kBoolean, OptionVerificationType::kNormal,
2878
- OptionTypeFlags::kNone}},
2879
- {"is_manual_compaction",
2880
- {offsetof(struct CompactionJobStats, is_manual_compaction),
2881
- OptionType::kBoolean, OptionVerificationType::kNormal,
2882
- OptionTypeFlags::kNone}},
2883
- {"total_input_bytes",
2884
- {offsetof(struct CompactionJobStats, total_input_bytes),
2885
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2886
- OptionTypeFlags::kNone}},
2887
- {"total_blob_bytes_read",
2888
- {offsetof(struct CompactionJobStats, total_blob_bytes_read),
2889
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2890
- OptionTypeFlags::kNone}},
2891
- {"total_output_bytes",
2892
- {offsetof(struct CompactionJobStats, total_output_bytes),
2893
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2894
- OptionTypeFlags::kNone}},
2895
- {"total_output_bytes_blob",
2896
- {offsetof(struct CompactionJobStats, total_output_bytes_blob),
2897
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2898
- OptionTypeFlags::kNone}},
2899
- {"num_records_replaced",
2900
- {offsetof(struct CompactionJobStats, num_records_replaced),
2901
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2902
- OptionTypeFlags::kNone}},
2903
- {"total_input_raw_key_bytes",
2904
- {offsetof(struct CompactionJobStats, total_input_raw_key_bytes),
2905
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2906
- OptionTypeFlags::kNone}},
2907
- {"total_input_raw_value_bytes",
2908
- {offsetof(struct CompactionJobStats, total_input_raw_value_bytes),
2909
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2910
- OptionTypeFlags::kNone}},
2911
- {"num_input_deletion_records",
2912
- {offsetof(struct CompactionJobStats, num_input_deletion_records),
2913
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2914
- OptionTypeFlags::kNone}},
2915
- {"num_expired_deletion_records",
2916
- {offsetof(struct CompactionJobStats, num_expired_deletion_records),
2917
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2918
- OptionTypeFlags::kNone}},
2919
- {"num_corrupt_keys",
2920
- {offsetof(struct CompactionJobStats, num_corrupt_keys),
2921
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2922
- OptionTypeFlags::kNone}},
2923
- {"file_write_nanos",
2924
- {offsetof(struct CompactionJobStats, file_write_nanos),
2925
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2926
- OptionTypeFlags::kNone}},
2927
- {"file_range_sync_nanos",
2928
- {offsetof(struct CompactionJobStats, file_range_sync_nanos),
2929
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2930
- OptionTypeFlags::kNone}},
2931
- {"file_fsync_nanos",
2932
- {offsetof(struct CompactionJobStats, file_fsync_nanos),
2933
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2934
- OptionTypeFlags::kNone}},
2935
- {"file_prepare_write_nanos",
2936
- {offsetof(struct CompactionJobStats, file_prepare_write_nanos),
2937
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2938
- OptionTypeFlags::kNone}},
2939
- {"smallest_output_key_prefix",
2940
- {offsetof(struct CompactionJobStats, smallest_output_key_prefix),
2941
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2942
- OptionTypeFlags::kNone}},
2943
- {"largest_output_key_prefix",
2944
- {offsetof(struct CompactionJobStats, largest_output_key_prefix),
2945
- OptionType::kEncodedString, OptionVerificationType::kNormal,
2946
- OptionTypeFlags::kNone}},
2947
- {"num_single_del_fallthru",
2948
- {offsetof(struct CompactionJobStats, num_single_del_fallthru),
2949
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2950
- OptionTypeFlags::kNone}},
2951
- {"num_single_del_mismatch",
2952
- {offsetof(struct CompactionJobStats, num_single_del_mismatch),
2953
- OptionType::kUInt64T, OptionVerificationType::kNormal,
2954
- OptionTypeFlags::kNone}},
2955
- };
2956
-
2957
- namespace {
2958
- // this is a helper struct to serialize and deserialize class Status, because
2959
- // Status's members are not public.
2960
- struct StatusSerializationAdapter {
2961
- uint8_t code;
2962
- uint8_t subcode;
2963
- uint8_t severity;
2964
- std::string message;
2965
-
2966
- StatusSerializationAdapter() {}
2967
- explicit StatusSerializationAdapter(const Status& s) {
2968
- code = s.code();
2969
- subcode = s.subcode();
2970
- severity = s.severity();
2971
- auto msg = s.getState();
2972
- message = msg ? msg : "";
2973
- }
2974
-
2975
- Status GetStatus() {
2976
- return Status(static_cast<Status::Code>(code),
2977
- static_cast<Status::SubCode>(subcode),
2978
- static_cast<Status::Severity>(severity), message);
2979
- }
2980
- };
2981
- } // namespace
2982
-
2983
- static std::unordered_map<std::string, OptionTypeInfo>
2984
- status_adapter_type_info = {
2985
- {"code",
2986
- {offsetof(struct StatusSerializationAdapter, code),
2987
- OptionType::kUInt8T, OptionVerificationType::kNormal,
2988
- OptionTypeFlags::kNone}},
2989
- {"subcode",
2990
- {offsetof(struct StatusSerializationAdapter, subcode),
2991
- OptionType::kUInt8T, OptionVerificationType::kNormal,
2992
- OptionTypeFlags::kNone}},
2993
- {"severity",
2994
- {offsetof(struct StatusSerializationAdapter, severity),
2995
- OptionType::kUInt8T, OptionVerificationType::kNormal,
2996
- OptionTypeFlags::kNone}},
2997
- {"message",
2998
- {offsetof(struct StatusSerializationAdapter, message),
2999
- OptionType::kEncodedString, OptionVerificationType::kNormal,
3000
- OptionTypeFlags::kNone}},
3001
- };
3002
-
3003
- static std::unordered_map<std::string, OptionTypeInfo> cs_result_type_info = {
3004
- {"status",
3005
- {offsetof(struct CompactionServiceResult, status),
3006
- OptionType::kCustomizable, OptionVerificationType::kNormal,
3007
- OptionTypeFlags::kNone,
3008
- [](const ConfigOptions& opts, const std::string& /*name*/,
3009
- const std::string& value, void* addr) {
3010
- auto status_obj = static_cast<Status*>(addr);
3011
- StatusSerializationAdapter adapter;
3012
- Status s = OptionTypeInfo::ParseType(
3013
- opts, value, status_adapter_type_info, &adapter);
3014
- *status_obj = adapter.GetStatus();
3015
- return s;
3016
- },
3017
- [](const ConfigOptions& opts, const std::string& /*name*/,
3018
- const void* addr, std::string* value) {
3019
- const auto status_obj = static_cast<const Status*>(addr);
3020
- StatusSerializationAdapter adapter(*status_obj);
3021
- std::string result;
3022
- Status s = OptionTypeInfo::SerializeType(opts, status_adapter_type_info,
3023
- &adapter, &result);
3024
- *value = "{" + result + "}";
3025
- return s;
3026
- },
3027
- [](const ConfigOptions& opts, const std::string& /*name*/,
3028
- const void* addr1, const void* addr2, std::string* mismatch) {
3029
- const auto status1 = static_cast<const Status*>(addr1);
3030
- const auto status2 = static_cast<const Status*>(addr2);
3031
-
3032
- StatusSerializationAdapter adatper1(*status1);
3033
- StatusSerializationAdapter adapter2(*status2);
3034
- return OptionTypeInfo::TypesAreEqual(opts, status_adapter_type_info,
3035
- &adatper1, &adapter2, mismatch);
3036
- }}},
3037
- {"output_files",
3038
- OptionTypeInfo::Vector<CompactionServiceOutputFile>(
3039
- offsetof(struct CompactionServiceResult, output_files),
3040
- OptionVerificationType::kNormal, OptionTypeFlags::kNone,
3041
- OptionTypeInfo::Struct("output_files", &cs_output_file_type_info, 0,
3042
- OptionVerificationType::kNormal,
3043
- OptionTypeFlags::kNone))},
3044
- {"output_level",
3045
- {offsetof(struct CompactionServiceResult, output_level), OptionType::kInt,
3046
- OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
3047
- {"output_path",
3048
- {offsetof(struct CompactionServiceResult, output_path),
3049
- OptionType::kEncodedString, OptionVerificationType::kNormal,
3050
- OptionTypeFlags::kNone}},
3051
- {"num_output_records",
3052
- {offsetof(struct CompactionServiceResult, num_output_records),
3053
- OptionType::kUInt64T, OptionVerificationType::kNormal,
3054
- OptionTypeFlags::kNone}},
3055
- {"total_bytes",
3056
- {offsetof(struct CompactionServiceResult, total_bytes),
3057
- OptionType::kUInt64T, OptionVerificationType::kNormal,
3058
- OptionTypeFlags::kNone}},
3059
- {"bytes_read",
3060
- {offsetof(struct CompactionServiceResult, bytes_read),
3061
- OptionType::kUInt64T, OptionVerificationType::kNormal,
3062
- OptionTypeFlags::kNone}},
3063
- {"bytes_written",
3064
- {offsetof(struct CompactionServiceResult, bytes_written),
3065
- OptionType::kUInt64T, OptionVerificationType::kNormal,
3066
- OptionTypeFlags::kNone}},
3067
- {"stats", OptionTypeInfo::Struct(
3068
- "stats", &compaction_job_stats_type_info,
3069
- offsetof(struct CompactionServiceResult, stats),
3070
- OptionVerificationType::kNormal, OptionTypeFlags::kNone)},
3071
- };
3072
-
3073
- Status CompactionServiceInput::Read(const std::string& data_str,
3074
- CompactionServiceInput* obj) {
3075
- if (data_str.size() <= sizeof(BinaryFormatVersion)) {
3076
- return Status::InvalidArgument("Invalid CompactionServiceInput string");
3077
- }
3078
- auto format_version = DecodeFixed32(data_str.data());
3079
- if (format_version == kOptionsString) {
3080
- ConfigOptions cf;
3081
- cf.invoke_prepare_options = false;
3082
- cf.ignore_unknown_options = true;
3083
- return OptionTypeInfo::ParseType(
3084
- cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_input_type_info,
3085
- obj);
3086
- } else {
3087
- return Status::NotSupported(
3088
- "Compaction Service Input data version not supported: " +
3089
- std::to_string(format_version));
3090
- }
3091
- }
3092
-
3093
- Status CompactionServiceInput::Write(std::string* output) {
3094
- char buf[sizeof(BinaryFormatVersion)];
3095
- EncodeFixed32(buf, kOptionsString);
3096
- output->append(buf, sizeof(BinaryFormatVersion));
3097
- ConfigOptions cf;
3098
- cf.invoke_prepare_options = false;
3099
- return OptionTypeInfo::SerializeType(cf, cs_input_type_info, this, output);
3100
- }
3101
-
3102
- Status CompactionServiceResult::Read(const std::string& data_str,
3103
- CompactionServiceResult* obj) {
3104
- if (data_str.size() <= sizeof(BinaryFormatVersion)) {
3105
- return Status::InvalidArgument("Invalid CompactionServiceResult string");
3106
- }
3107
- auto format_version = DecodeFixed32(data_str.data());
3108
- if (format_version == kOptionsString) {
3109
- ConfigOptions cf;
3110
- cf.invoke_prepare_options = false;
3111
- cf.ignore_unknown_options = true;
3112
- return OptionTypeInfo::ParseType(
3113
- cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_result_type_info,
3114
- obj);
3115
- } else {
3116
- return Status::NotSupported(
3117
- "Compaction Service Result data version not supported: " +
3118
- std::to_string(format_version));
3119
- }
3120
- }
3121
-
3122
- Status CompactionServiceResult::Write(std::string* output) {
3123
- char buf[sizeof(BinaryFormatVersion)];
3124
- EncodeFixed32(buf, kOptionsString);
3125
- output->append(buf, sizeof(BinaryFormatVersion));
3126
- ConfigOptions cf;
3127
- cf.invoke_prepare_options = false;
3128
- return OptionTypeInfo::SerializeType(cf, cs_result_type_info, this, output);
3129
- }
3130
-
3131
- #ifndef NDEBUG
3132
- bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other) {
3133
- std::string mismatch;
3134
- return TEST_Equals(other, &mismatch);
3135
- }
3136
-
3137
- bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other,
3138
- std::string* mismatch) {
3139
- ConfigOptions cf;
3140
- cf.invoke_prepare_options = false;
3141
- return OptionTypeInfo::TypesAreEqual(cf, cs_result_type_info, this, other,
3142
- mismatch);
3143
- }
3144
-
3145
- bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other) {
3146
- std::string mismatch;
3147
- return TEST_Equals(other, &mismatch);
3148
- }
3149
-
3150
- bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other,
3151
- std::string* mismatch) {
3152
- ConfigOptions cf;
3153
- cf.invoke_prepare_options = false;
3154
- return OptionTypeInfo::TypesAreEqual(cf, cs_input_type_info, this, other,
3155
- mismatch);
3156
- }
3157
- #endif // NDEBUG
3158
- #endif // !ROCKSDB_LITE
3159
-
3160
1779
  } // namespace ROCKSDB_NAMESPACE