@nxtedition/rocksdb 8.1.17 → 8.2.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +32 -2
- package/binding.gyp +8 -0
- package/deps/liburing/liburing.gyp +20 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
- package/deps/rocksdb/rocksdb/TARGETS +7 -0
- package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
- package/deps/rocksdb/rocksdb/db/c.cc +90 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
- package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
- package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
- package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
- package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
- package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
- package/deps/rocksdb/rocksdb.gyp +7 -1
- package/package.json +1 -1
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -101,12 +101,9 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
101
101
|
if (diff > 0) {
|
|
102
102
|
s = Status::NotFound();
|
|
103
103
|
} else if (diff == 0) {
|
|
104
|
-
|
|
105
|
-
GetValueBase(iter->value()), iter->value());
|
|
106
|
-
if (iter->columns() != expected_columns) {
|
|
104
|
+
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
107
105
|
VerificationAbort(shared, static_cast<int>(cf), i,
|
|
108
|
-
iter->value(), iter->columns()
|
|
109
|
-
expected_columns);
|
|
106
|
+
iter->value(), iter->columns());
|
|
110
107
|
}
|
|
111
108
|
|
|
112
109
|
from_db = iter->value().ToString();
|
|
@@ -159,26 +156,24 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
159
156
|
}
|
|
160
157
|
|
|
161
158
|
const std::string key = Key(i);
|
|
162
|
-
PinnableWideColumns
|
|
159
|
+
PinnableWideColumns result;
|
|
163
160
|
|
|
164
161
|
Status s =
|
|
165
|
-
db_->GetEntity(options, column_families_[cf], key, &
|
|
162
|
+
db_->GetEntity(options, column_families_[cf], key, &result);
|
|
166
163
|
|
|
167
164
|
std::string from_db;
|
|
168
165
|
|
|
169
166
|
if (s.ok()) {
|
|
170
|
-
const WideColumns&
|
|
167
|
+
const WideColumns& columns = result.columns();
|
|
171
168
|
|
|
172
|
-
if (!
|
|
173
|
-
|
|
174
|
-
from_db =
|
|
169
|
+
if (!columns.empty() &&
|
|
170
|
+
columns.front().name() == kDefaultWideColumnName) {
|
|
171
|
+
from_db = columns.front().value().ToString();
|
|
175
172
|
}
|
|
176
173
|
|
|
177
|
-
|
|
178
|
-
GenerateExpectedWideColumns(GetValueBase(from_db), from_db);
|
|
179
|
-
if (columns_from_db != expected_columns) {
|
|
174
|
+
if (!VerifyWideColumns(columns)) {
|
|
180
175
|
VerificationAbort(shared, static_cast<int>(cf), i, from_db,
|
|
181
|
-
|
|
176
|
+
columns);
|
|
182
177
|
}
|
|
183
178
|
}
|
|
184
179
|
|
|
@@ -256,18 +251,16 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
256
251
|
std::string from_db;
|
|
257
252
|
|
|
258
253
|
if (statuses[j].ok()) {
|
|
259
|
-
const WideColumns&
|
|
254
|
+
const WideColumns& columns = results[j].columns();
|
|
260
255
|
|
|
261
|
-
if (!
|
|
262
|
-
|
|
263
|
-
from_db =
|
|
256
|
+
if (!columns.empty() &&
|
|
257
|
+
columns.front().name() == kDefaultWideColumnName) {
|
|
258
|
+
from_db = columns.front().value().ToString();
|
|
264
259
|
}
|
|
265
260
|
|
|
266
|
-
|
|
267
|
-
GenerateExpectedWideColumns(GetValueBase(from_db), from_db);
|
|
268
|
-
if (columns_from_db != expected_columns) {
|
|
261
|
+
if (!VerifyWideColumns(columns)) {
|
|
269
262
|
VerificationAbort(shared, static_cast<int>(cf), i, from_db,
|
|
270
|
-
|
|
263
|
+
columns);
|
|
271
264
|
}
|
|
272
265
|
}
|
|
273
266
|
|
|
@@ -492,6 +485,11 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
492
485
|
ReadOptions read_opts_copy = read_opts;
|
|
493
486
|
std::string read_ts_str;
|
|
494
487
|
Slice read_ts_slice;
|
|
488
|
+
if (FLAGS_user_timestamp_size > 0) {
|
|
489
|
+
read_ts_str = GetNowNanos();
|
|
490
|
+
read_ts_slice = read_ts_str;
|
|
491
|
+
read_opts_copy.timestamp = &read_ts_slice;
|
|
492
|
+
}
|
|
495
493
|
bool read_older_ts = MaybeUseOlderTimestampForPointLookup(
|
|
496
494
|
thread, read_ts_str, read_ts_slice, read_opts_copy);
|
|
497
495
|
|
|
@@ -514,7 +512,7 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
514
512
|
// found case
|
|
515
513
|
thread->stats.AddGets(1, 1);
|
|
516
514
|
// we only have the latest expected state
|
|
517
|
-
if (!FLAGS_skip_verifydb && !
|
|
515
|
+
if (!FLAGS_skip_verifydb && !read_older_ts &&
|
|
518
516
|
thread->shared->Get(rand_column_families[0], rand_keys[0]) ==
|
|
519
517
|
SharedState::DELETION_SENTINEL) {
|
|
520
518
|
thread->shared->SetVerificationFailure();
|
|
@@ -751,6 +749,104 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
751
749
|
return statuses;
|
|
752
750
|
}
|
|
753
751
|
|
|
752
|
+
void TestGetEntity(ThreadState* thread, const ReadOptions& read_opts,
|
|
753
|
+
const std::vector<int>& rand_column_families,
|
|
754
|
+
const std::vector<int64_t>& rand_keys) override {
|
|
755
|
+
if (fault_fs_guard) {
|
|
756
|
+
fault_fs_guard->EnableErrorInjection();
|
|
757
|
+
SharedState::ignore_read_error = false;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
assert(thread);
|
|
761
|
+
|
|
762
|
+
SharedState* const shared = thread->shared;
|
|
763
|
+
assert(shared);
|
|
764
|
+
|
|
765
|
+
assert(!rand_column_families.empty());
|
|
766
|
+
assert(!rand_keys.empty());
|
|
767
|
+
|
|
768
|
+
std::unique_ptr<MutexLock> lock(new MutexLock(
|
|
769
|
+
shared->GetMutexForKey(rand_column_families[0], rand_keys[0])));
|
|
770
|
+
|
|
771
|
+
assert(rand_column_families[0] >= 0);
|
|
772
|
+
assert(rand_column_families[0] < static_cast<int>(column_families_.size()));
|
|
773
|
+
|
|
774
|
+
ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
|
|
775
|
+
assert(cfh);
|
|
776
|
+
|
|
777
|
+
const std::string key = Key(rand_keys[0]);
|
|
778
|
+
|
|
779
|
+
PinnableWideColumns from_db;
|
|
780
|
+
|
|
781
|
+
const Status s = db_->GetEntity(read_opts, cfh, key, &from_db);
|
|
782
|
+
|
|
783
|
+
int error_count = 0;
|
|
784
|
+
|
|
785
|
+
if (fault_fs_guard) {
|
|
786
|
+
error_count = fault_fs_guard->GetAndResetErrorCount();
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
if (s.ok()) {
|
|
790
|
+
if (fault_fs_guard) {
|
|
791
|
+
if (error_count && !SharedState::ignore_read_error) {
|
|
792
|
+
// Grab mutex so multiple threads don't try to print the
|
|
793
|
+
// stack trace at the same time
|
|
794
|
+
MutexLock l(shared->GetMutex());
|
|
795
|
+
fprintf(stderr, "Didn't get expected error from GetEntity\n");
|
|
796
|
+
fprintf(stderr, "Call stack that injected the fault\n");
|
|
797
|
+
fault_fs_guard->PrintFaultBacktrace();
|
|
798
|
+
std::terminate();
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
thread->stats.AddGets(1, 1);
|
|
803
|
+
|
|
804
|
+
if (!FLAGS_skip_verifydb) {
|
|
805
|
+
const WideColumns& columns = from_db.columns();
|
|
806
|
+
|
|
807
|
+
if (!VerifyWideColumns(columns)) {
|
|
808
|
+
shared->SetVerificationFailure();
|
|
809
|
+
fprintf(stderr,
|
|
810
|
+
"error : inconsistent columns returned by GetEntity for key "
|
|
811
|
+
"%s: %s\n",
|
|
812
|
+
StringToHex(key).c_str(), WideColumnsToHex(columns).c_str());
|
|
813
|
+
} else if (shared->Get(rand_column_families[0], rand_keys[0]) ==
|
|
814
|
+
SharedState::DELETION_SENTINEL) {
|
|
815
|
+
shared->SetVerificationFailure();
|
|
816
|
+
fprintf(
|
|
817
|
+
stderr,
|
|
818
|
+
"error : inconsistent values for key %s: GetEntity returns %s, "
|
|
819
|
+
"expected state does not have the key.\n",
|
|
820
|
+
StringToHex(key).c_str(), WideColumnsToHex(columns).c_str());
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
} else if (s.IsNotFound()) {
|
|
824
|
+
thread->stats.AddGets(1, 0);
|
|
825
|
+
|
|
826
|
+
if (!FLAGS_skip_verifydb) {
|
|
827
|
+
auto expected = shared->Get(rand_column_families[0], rand_keys[0]);
|
|
828
|
+
if (expected != SharedState::DELETION_SENTINEL &&
|
|
829
|
+
expected != SharedState::UNKNOWN_SENTINEL) {
|
|
830
|
+
shared->SetVerificationFailure();
|
|
831
|
+
fprintf(stderr,
|
|
832
|
+
"error : inconsistent values for key %s: expected state has "
|
|
833
|
+
"the key, GetEntity returns NotFound.\n",
|
|
834
|
+
StringToHex(key).c_str());
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
} else {
|
|
838
|
+
if (error_count == 0) {
|
|
839
|
+
thread->stats.AddErrors(1);
|
|
840
|
+
} else {
|
|
841
|
+
thread->stats.AddVerifiedErrors(1);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
if (fault_fs_guard) {
|
|
846
|
+
fault_fs_guard->DisableErrorInjection();
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
754
850
|
Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts,
|
|
755
851
|
const std::vector<int>& rand_column_families,
|
|
756
852
|
const std::vector<int64_t>& rand_keys) override {
|
|
@@ -805,12 +901,9 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
805
901
|
}
|
|
806
902
|
}
|
|
807
903
|
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
s = Status::Corruption(
|
|
812
|
-
"Value and columns inconsistent",
|
|
813
|
-
DebugString(iter->value(), iter->columns(), expected_columns));
|
|
904
|
+
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
905
|
+
s = Status::Corruption("Value and columns inconsistent",
|
|
906
|
+
DebugString(iter->value(), iter->columns()));
|
|
814
907
|
break;
|
|
815
908
|
}
|
|
816
909
|
}
|
|
@@ -1263,17 +1356,15 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
1263
1356
|
assert(iter);
|
|
1264
1357
|
assert(iter->Valid());
|
|
1265
1358
|
|
|
1266
|
-
|
|
1267
|
-
GetValueBase(iter->value()), iter->value());
|
|
1268
|
-
if (iter->columns() != expected_columns) {
|
|
1359
|
+
if (!VerifyWideColumns(iter->value(), iter->columns())) {
|
|
1269
1360
|
shared->SetVerificationFailure();
|
|
1270
1361
|
|
|
1271
1362
|
fprintf(stderr,
|
|
1272
1363
|
"Verification failed for key %s: "
|
|
1273
|
-
"Value and columns inconsistent: %s\n",
|
|
1364
|
+
"Value and columns inconsistent: value: %s, columns: %s\n",
|
|
1274
1365
|
Slice(iter->key()).ToString(/* hex */ true).c_str(),
|
|
1275
|
-
|
|
1276
|
-
|
|
1366
|
+
iter->value().ToString(/* hex */ true).c_str(),
|
|
1367
|
+
WideColumnsToHex(iter->columns()).c_str());
|
|
1277
1368
|
fprintf(stderr, "Column family: %s, op_logs: %s\n",
|
|
1278
1369
|
cfh->GetName().c_str(), op_logs.c_str());
|
|
1279
1370
|
|
|
@@ -1183,6 +1183,14 @@ class PosixFileSystem : public FileSystem {
|
|
|
1183
1183
|
#endif
|
|
1184
1184
|
}
|
|
1185
1185
|
|
|
1186
|
+
bool use_async_io() override {
|
|
1187
|
+
#if defined(ROCKSDB_IOURING_PRESENT)
|
|
1188
|
+
return IsIOUringEnabled();
|
|
1189
|
+
#else
|
|
1190
|
+
return false;
|
|
1191
|
+
#endif
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1186
1194
|
#if defined(ROCKSDB_IOURING_PRESENT)
|
|
1187
1195
|
// io_uring instance
|
|
1188
1196
|
std::unique_ptr<ThreadLocalPtr> thread_local_io_urings_;
|
|
@@ -162,6 +162,9 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
|
|
|
162
162
|
|
|
163
163
|
Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len,
|
|
164
164
|
rounddown_offset, curr_);
|
|
165
|
+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) {
|
|
166
|
+
RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len);
|
|
167
|
+
}
|
|
165
168
|
return s;
|
|
166
169
|
}
|
|
167
170
|
|
|
@@ -609,6 +612,22 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
|
|
|
609
612
|
Slice* result, Status* status,
|
|
610
613
|
Env::IOPriority rate_limiter_priority,
|
|
611
614
|
bool for_compaction /* = false */) {
|
|
615
|
+
bool ret = TryReadFromCacheUntracked(opts, reader, offset, n, result, status,
|
|
616
|
+
rate_limiter_priority, for_compaction);
|
|
617
|
+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
|
|
618
|
+
if (ret) {
|
|
619
|
+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
|
|
620
|
+
} else {
|
|
621
|
+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
return ret;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
bool FilePrefetchBuffer::TryReadFromCacheUntracked(
|
|
628
|
+
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
|
|
629
|
+
size_t n, Slice* result, Status* status,
|
|
630
|
+
Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */) {
|
|
612
631
|
if (track_min_offset_ && offset < min_offset_read_) {
|
|
613
632
|
min_offset_read_ = static_cast<size_t>(offset);
|
|
614
633
|
}
|
|
@@ -666,6 +685,22 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
666
685
|
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
|
|
667
686
|
size_t n, Slice* result, Status* status,
|
|
668
687
|
Env::IOPriority rate_limiter_priority) {
|
|
688
|
+
bool ret = TryReadFromCacheAsyncUntracked(opts, reader, offset, n, result,
|
|
689
|
+
status, rate_limiter_priority);
|
|
690
|
+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
|
|
691
|
+
if (ret) {
|
|
692
|
+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
|
|
693
|
+
} else {
|
|
694
|
+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
return ret;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked(
|
|
701
|
+
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
|
|
702
|
+
size_t n, Slice* result, Status* status,
|
|
703
|
+
Env::IOPriority rate_limiter_priority) {
|
|
669
704
|
if (track_min_offset_ && offset < min_offset_read_) {
|
|
670
705
|
min_offset_read_ = static_cast<size_t>(offset);
|
|
671
706
|
}
|
|
@@ -54,6 +54,11 @@ struct BufferInfo {
|
|
|
54
54
|
uint32_t pos_ = 0;
|
|
55
55
|
};
|
|
56
56
|
|
|
57
|
+
enum class FilePrefetchBufferUsage {
|
|
58
|
+
kTableOpenPrefetchTail,
|
|
59
|
+
kUnknown,
|
|
60
|
+
};
|
|
61
|
+
|
|
57
62
|
// FilePrefetchBuffer is a smart buffer to store and read data from a file.
|
|
58
63
|
class FilePrefetchBuffer {
|
|
59
64
|
public:
|
|
@@ -78,13 +83,13 @@ class FilePrefetchBuffer {
|
|
|
78
83
|
// and max_readahead_size are passed in.
|
|
79
84
|
// A user can construct a FilePrefetchBuffer without any arguments, but use
|
|
80
85
|
// `Prefetch` to load data into the buffer.
|
|
81
|
-
FilePrefetchBuffer(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
FilePrefetchBuffer(
|
|
87
|
+
size_t readahead_size = 0, size_t max_readahead_size = 0,
|
|
88
|
+
bool enable = true, bool track_min_offset = false,
|
|
89
|
+
bool implicit_auto_readahead = false, uint64_t num_file_reads = 0,
|
|
90
|
+
uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr,
|
|
91
|
+
SystemClock* clock = nullptr, Statistics* stats = nullptr,
|
|
92
|
+
FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown)
|
|
88
93
|
: curr_(0),
|
|
89
94
|
readahead_size_(readahead_size),
|
|
90
95
|
initial_auto_readahead_size_(readahead_size),
|
|
@@ -100,7 +105,8 @@ class FilePrefetchBuffer {
|
|
|
100
105
|
explicit_prefetch_submitted_(false),
|
|
101
106
|
fs_(fs),
|
|
102
107
|
clock_(clock),
|
|
103
|
-
stats_(stats)
|
|
108
|
+
stats_(stats),
|
|
109
|
+
usage_(usage) {
|
|
104
110
|
assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) ||
|
|
105
111
|
(num_file_reads_ == 0));
|
|
106
112
|
// If ReadOptions.async_io is enabled, data is asynchronously filled in
|
|
@@ -403,6 +409,19 @@ class FilePrefetchBuffer {
|
|
|
403
409
|
bool& copy_to_third_buffer, uint64_t& tmp_offset,
|
|
404
410
|
size_t& tmp_length);
|
|
405
411
|
|
|
412
|
+
bool TryReadFromCacheUntracked(const IOOptions& opts,
|
|
413
|
+
RandomAccessFileReader* reader,
|
|
414
|
+
uint64_t offset, size_t n, Slice* result,
|
|
415
|
+
Status* s,
|
|
416
|
+
Env::IOPriority rate_limiter_priority,
|
|
417
|
+
bool for_compaction = false);
|
|
418
|
+
|
|
419
|
+
bool TryReadFromCacheAsyncUntracked(const IOOptions& opts,
|
|
420
|
+
RandomAccessFileReader* reader,
|
|
421
|
+
uint64_t offset, size_t n, Slice* result,
|
|
422
|
+
Status* status,
|
|
423
|
+
Env::IOPriority rate_limiter_priority);
|
|
424
|
+
|
|
406
425
|
std::vector<BufferInfo> bufs_;
|
|
407
426
|
// curr_ represents the index for bufs_ indicating which buffer is being
|
|
408
427
|
// consumed currently.
|
|
@@ -442,5 +461,7 @@ class FilePrefetchBuffer {
|
|
|
442
461
|
FileSystem* fs_;
|
|
443
462
|
SystemClock* clock_;
|
|
444
463
|
Statistics* stats_;
|
|
464
|
+
|
|
465
|
+
FilePrefetchBufferUsage usage_;
|
|
445
466
|
};
|
|
446
467
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -135,7 +135,7 @@ IOStatus GenerateOneFileChecksum(
|
|
|
135
135
|
FileChecksumGenFactory* checksum_factory,
|
|
136
136
|
const std::string& requested_checksum_func_name, std::string* file_checksum,
|
|
137
137
|
std::string* file_checksum_func_name,
|
|
138
|
-
size_t verify_checksums_readahead_size, bool allow_mmap_reads
|
|
138
|
+
size_t verify_checksums_readahead_size, bool /*allow_mmap_reads*/,
|
|
139
139
|
std::shared_ptr<IOTracer>& io_tracer, RateLimiter* rate_limiter,
|
|
140
140
|
Env::IOPriority rate_limiter_priority) {
|
|
141
141
|
if (checksum_factory == nullptr) {
|
|
@@ -196,10 +196,12 @@ IOStatus GenerateOneFileChecksum(
|
|
|
196
196
|
size_t readahead_size = (verify_checksums_readahead_size != 0)
|
|
197
197
|
? verify_checksums_readahead_size
|
|
198
198
|
: default_max_read_ahead_size;
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
199
|
+
std::unique_ptr<char[]> buf;
|
|
200
|
+
if (reader->use_direct_io()) {
|
|
201
|
+
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
|
202
|
+
readahead_size = (readahead_size + alignment - 1) & ~(alignment - 1);
|
|
203
|
+
}
|
|
204
|
+
buf.reset(new char[readahead_size]);
|
|
203
205
|
|
|
204
206
|
Slice slice;
|
|
205
207
|
uint64_t offset = 0;
|
|
@@ -207,11 +209,11 @@ IOStatus GenerateOneFileChecksum(
|
|
|
207
209
|
while (size > 0) {
|
|
208
210
|
size_t bytes_to_read =
|
|
209
211
|
static_cast<size_t>(std::min(uint64_t{readahead_size}, size));
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
212
|
+
io_s = reader->Read(opts, offset, bytes_to_read, &slice, buf.get(), nullptr,
|
|
213
|
+
rate_limiter_priority);
|
|
214
|
+
if (!io_s.ok()) {
|
|
215
|
+
return IOStatus::Corruption("file read failed with error: " +
|
|
216
|
+
io_s.ToString());
|
|
215
217
|
}
|
|
216
218
|
if (slice.size() == 0) {
|
|
217
219
|
return IOStatus::Corruption("file too small");
|
|
@@ -219,6 +221,8 @@ IOStatus GenerateOneFileChecksum(
|
|
|
219
221
|
checksum_generator->Update(slice.data(), slice.size());
|
|
220
222
|
size -= slice.size();
|
|
221
223
|
offset += slice.size();
|
|
224
|
+
|
|
225
|
+
TEST_SYNC_POINT("GenerateOneFileChecksum::Chunk:0");
|
|
222
226
|
}
|
|
223
227
|
checksum_generator->Finalize();
|
|
224
228
|
*file_checksum = checksum_generator->GetChecksum();
|