@nxtedition/rocksdb 7.0.12 → 7.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/Makefile +3 -0
- package/deps/rocksdb/rocksdb/TARGETS +6 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
- package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
- package/deps/rocksdb/rocksdb/db/c.cc +159 -5
- package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
- package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
- package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
- package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
- package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
- package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
- package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
- package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
- package/deps/rocksdb/rocksdb/options/options.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
- package/deps/rocksdb/rocksdb/src.mk +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
- package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
- package/package.json +1 -1
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -334,14 +334,15 @@ class MemTableIterator : public InternalIterator {
|
|
|
334
334
|
// iterator should only use prefix bloom filter
|
|
335
335
|
auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
|
|
336
336
|
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
|
|
337
|
-
if (prefix_extractor_->InDomain(user_k_without_ts)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
337
|
+
if (prefix_extractor_->InDomain(user_k_without_ts)) {
|
|
338
|
+
if (!bloom_->MayContain(
|
|
339
|
+
prefix_extractor_->Transform(user_k_without_ts))) {
|
|
340
|
+
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
|
|
341
|
+
valid_ = false;
|
|
342
|
+
return;
|
|
343
|
+
} else {
|
|
344
|
+
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
|
|
345
|
+
}
|
|
345
346
|
}
|
|
346
347
|
}
|
|
347
348
|
iter_->Seek(k, nullptr);
|
|
@@ -353,14 +354,15 @@ class MemTableIterator : public InternalIterator {
|
|
|
353
354
|
if (bloom_) {
|
|
354
355
|
auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
|
|
355
356
|
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
|
|
356
|
-
if (prefix_extractor_->InDomain(user_k_without_ts)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
357
|
+
if (prefix_extractor_->InDomain(user_k_without_ts)) {
|
|
358
|
+
if (!bloom_->MayContain(
|
|
359
|
+
prefix_extractor_->Transform(user_k_without_ts))) {
|
|
360
|
+
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
|
|
361
|
+
valid_ = false;
|
|
362
|
+
return;
|
|
363
|
+
} else {
|
|
364
|
+
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
|
|
365
|
+
}
|
|
364
366
|
}
|
|
365
367
|
}
|
|
366
368
|
iter_->Seek(k, nullptr);
|
|
@@ -738,21 +740,33 @@ static bool SaveValue(void* arg, const char* entry) {
|
|
|
738
740
|
|
|
739
741
|
s->seq = seq;
|
|
740
742
|
|
|
741
|
-
if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex
|
|
743
|
+
if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex ||
|
|
744
|
+
type == kTypeWideColumnEntity) &&
|
|
742
745
|
max_covering_tombstone_seq > seq) {
|
|
743
746
|
type = kTypeRangeDeletion;
|
|
744
747
|
}
|
|
745
748
|
switch (type) {
|
|
746
749
|
case kTypeBlobIndex:
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
*(s->status) = Status::NotSupported(
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
} else if (
|
|
750
|
+
case kTypeWideColumnEntity:
|
|
751
|
+
if (*(s->merge_in_progress)) {
|
|
752
|
+
*(s->status) = Status::NotSupported("Merge operator not supported");
|
|
753
|
+
} else if (!s->do_merge) {
|
|
754
|
+
*(s->status) = Status::NotSupported("GetMergeOperands not supported");
|
|
755
|
+
} else if (type == kTypeBlobIndex) {
|
|
756
|
+
if (s->is_blob_index == nullptr) {
|
|
757
|
+
ROCKS_LOG_ERROR(s->logger, "Encounter unexpected blob index.");
|
|
758
|
+
*(s->status) = Status::NotSupported(
|
|
759
|
+
"Encounter unsupported blob value. Please open DB with "
|
|
760
|
+
"ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
assert(type == kTypeWideColumnEntity);
|
|
764
|
+
|
|
765
|
+
// TODO: support wide-column entities
|
|
753
766
|
*(s->status) =
|
|
754
|
-
Status::NotSupported("
|
|
767
|
+
Status::NotSupported("Encountered unexpected wide-column entity");
|
|
755
768
|
}
|
|
769
|
+
|
|
756
770
|
if (!s->status->ok()) {
|
|
757
771
|
*(s->found_final_value) = true;
|
|
758
772
|
return false;
|
|
@@ -893,16 +907,20 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
|
|
|
893
907
|
bool may_contain = true;
|
|
894
908
|
size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
|
|
895
909
|
Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz);
|
|
910
|
+
bool bloom_checked = false;
|
|
896
911
|
if (bloom_filter_) {
|
|
897
912
|
// when both memtable_whole_key_filtering and prefix_extractor_ are set,
|
|
898
913
|
// only do whole key filtering for Get() to save CPU
|
|
899
914
|
if (moptions_.memtable_whole_key_filtering) {
|
|
900
915
|
may_contain = bloom_filter_->MayContain(user_key_without_ts);
|
|
916
|
+
bloom_checked = true;
|
|
901
917
|
} else {
|
|
902
918
|
assert(prefix_extractor_);
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
919
|
+
if (prefix_extractor_->InDomain(user_key_without_ts)) {
|
|
920
|
+
may_contain = bloom_filter_->MayContain(
|
|
921
|
+
prefix_extractor_->Transform(user_key_without_ts));
|
|
922
|
+
bloom_checked = true;
|
|
923
|
+
}
|
|
906
924
|
}
|
|
907
925
|
}
|
|
908
926
|
|
|
@@ -911,7 +929,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
|
|
|
911
929
|
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
|
|
912
930
|
*seq = kMaxSequenceNumber;
|
|
913
931
|
} else {
|
|
914
|
-
if (
|
|
932
|
+
if (bloom_checked) {
|
|
915
933
|
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
|
|
916
934
|
}
|
|
917
935
|
GetFromTable(key, *max_covering_tombstone_seq, do_merge, callback,
|
|
@@ -988,10 +1006,6 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
988
1006
|
bloom_keys[num_keys] =
|
|
989
1007
|
prefix_extractor_->Transform(iter->ukey_without_ts);
|
|
990
1008
|
range_indexes[num_keys++] = iter.index();
|
|
991
|
-
} else {
|
|
992
|
-
// TODO: consider not counting these as Bloom hits to more closely
|
|
993
|
-
// match bloom_sst_hit_count
|
|
994
|
-
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
|
|
995
1009
|
}
|
|
996
1010
|
}
|
|
997
1011
|
bloom_filter_->MayContain(num_keys, &bloom_keys[0], &may_match[0]);
|
|
@@ -1044,8 +1058,8 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
1044
1058
|
PERF_COUNTER_ADD(get_from_memtable_count, 1);
|
|
1045
1059
|
}
|
|
1046
1060
|
|
|
1047
|
-
Status MemTable::Update(SequenceNumber seq,
|
|
1048
|
-
const Slice& value,
|
|
1061
|
+
Status MemTable::Update(SequenceNumber seq, ValueType value_type,
|
|
1062
|
+
const Slice& key, const Slice& value,
|
|
1049
1063
|
const ProtectionInfoKVOS64* kv_prot_info) {
|
|
1050
1064
|
LookupKey lkey(key, seq);
|
|
1051
1065
|
Slice mem_key = lkey.memtable_key();
|
|
@@ -1075,7 +1089,7 @@ Status MemTable::Update(SequenceNumber seq, const Slice& key,
|
|
|
1075
1089
|
SequenceNumber existing_seq;
|
|
1076
1090
|
UnPackSequenceAndType(tag, &existing_seq, &type);
|
|
1077
1091
|
assert(existing_seq != seq);
|
|
1078
|
-
if (type ==
|
|
1092
|
+
if (type == value_type) {
|
|
1079
1093
|
Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
|
|
1080
1094
|
uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
|
|
1081
1095
|
uint32_t new_size = static_cast<uint32_t>(value.size());
|
|
@@ -1103,8 +1117,8 @@ Status MemTable::Update(SequenceNumber seq, const Slice& key,
|
|
|
1103
1117
|
}
|
|
1104
1118
|
}
|
|
1105
1119
|
|
|
1106
|
-
// The latest value is not
|
|
1107
|
-
return Add(seq,
|
|
1120
|
+
// The latest value is not value_type or key doesn't exist
|
|
1121
|
+
return Add(seq, value_type, key, value, kv_prot_info);
|
|
1108
1122
|
}
|
|
1109
1123
|
|
|
1110
1124
|
Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
|
|
@@ -1137,66 +1151,62 @@ Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
|
|
|
1137
1151
|
ValueType type;
|
|
1138
1152
|
uint64_t existing_seq;
|
|
1139
1153
|
UnPackSequenceAndType(tag, &existing_seq, &type);
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
|
|
1144
|
-
|
|
1145
|
-
char* prev_buffer = const_cast<char*>(prev_value.data());
|
|
1146
|
-
uint32_t new_prev_size = prev_size;
|
|
1154
|
+
if (type == kTypeValue) {
|
|
1155
|
+
Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
|
|
1156
|
+
uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
|
|
1147
1157
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1158
|
+
char* prev_buffer = const_cast<char*>(prev_value.data());
|
|
1159
|
+
uint32_t new_prev_size = prev_size;
|
|
1160
|
+
|
|
1161
|
+
std::string str_value;
|
|
1162
|
+
WriteLock wl(GetLock(lkey.user_key()));
|
|
1163
|
+
auto status = moptions_.inplace_callback(prev_buffer, &new_prev_size,
|
|
1164
|
+
delta, &str_value);
|
|
1165
|
+
if (status == UpdateStatus::UPDATED_INPLACE) {
|
|
1166
|
+
// Value already updated by callback.
|
|
1167
|
+
assert(new_prev_size <= prev_size);
|
|
1168
|
+
if (new_prev_size < prev_size) {
|
|
1169
|
+
// overwrite the new prev_size
|
|
1170
|
+
char* p = EncodeVarint32(const_cast<char*>(key_ptr) + key_length,
|
|
1171
|
+
new_prev_size);
|
|
1172
|
+
if (VarintLength(new_prev_size) < VarintLength(prev_size)) {
|
|
1173
|
+
// shift the value buffer as well.
|
|
1174
|
+
memcpy(p, prev_buffer, new_prev_size);
|
|
1175
|
+
prev_buffer = p;
|
|
1164
1176
|
}
|
|
1165
|
-
RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
|
|
1166
|
-
UpdateFlushState();
|
|
1167
|
-
if (kv_prot_info != nullptr) {
|
|
1168
|
-
ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
|
|
1169
|
-
// `seq` is swallowed and `existing_seq` prevails.
|
|
1170
|
-
updated_kv_prot_info.UpdateS(seq, existing_seq);
|
|
1171
|
-
updated_kv_prot_info.UpdateV(delta,
|
|
1172
|
-
Slice(prev_buffer, new_prev_size));
|
|
1173
|
-
Slice encoded(entry, prev_buffer + new_prev_size - entry);
|
|
1174
|
-
return VerifyEncodedEntry(encoded, updated_kv_prot_info);
|
|
1175
|
-
}
|
|
1176
|
-
return Status::OK();
|
|
1177
|
-
} else if (status == UpdateStatus::UPDATED) {
|
|
1178
|
-
Status s;
|
|
1179
|
-
if (kv_prot_info != nullptr) {
|
|
1180
|
-
ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
|
|
1181
|
-
updated_kv_prot_info.UpdateV(delta, str_value);
|
|
1182
|
-
s = Add(seq, kTypeValue, key, Slice(str_value),
|
|
1183
|
-
&updated_kv_prot_info);
|
|
1184
|
-
} else {
|
|
1185
|
-
s = Add(seq, kTypeValue, key, Slice(str_value),
|
|
1186
|
-
nullptr /* kv_prot_info */);
|
|
1187
|
-
}
|
|
1188
|
-
RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
|
|
1189
|
-
UpdateFlushState();
|
|
1190
|
-
return s;
|
|
1191
|
-
} else if (status == UpdateStatus::UPDATE_FAILED) {
|
|
1192
|
-
// `UPDATE_FAILED` is named incorrectly. It indicates no update
|
|
1193
|
-
// happened. It does not indicate a failure happened.
|
|
1194
|
-
UpdateFlushState();
|
|
1195
|
-
return Status::OK();
|
|
1196
1177
|
}
|
|
1178
|
+
RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
|
|
1179
|
+
UpdateFlushState();
|
|
1180
|
+
if (kv_prot_info != nullptr) {
|
|
1181
|
+
ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
|
|
1182
|
+
// `seq` is swallowed and `existing_seq` prevails.
|
|
1183
|
+
updated_kv_prot_info.UpdateS(seq, existing_seq);
|
|
1184
|
+
updated_kv_prot_info.UpdateV(delta,
|
|
1185
|
+
Slice(prev_buffer, new_prev_size));
|
|
1186
|
+
Slice encoded(entry, prev_buffer + new_prev_size - entry);
|
|
1187
|
+
return VerifyEncodedEntry(encoded, updated_kv_prot_info);
|
|
1188
|
+
}
|
|
1189
|
+
return Status::OK();
|
|
1190
|
+
} else if (status == UpdateStatus::UPDATED) {
|
|
1191
|
+
Status s;
|
|
1192
|
+
if (kv_prot_info != nullptr) {
|
|
1193
|
+
ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
|
|
1194
|
+
updated_kv_prot_info.UpdateV(delta, str_value);
|
|
1195
|
+
s = Add(seq, kTypeValue, key, Slice(str_value),
|
|
1196
|
+
&updated_kv_prot_info);
|
|
1197
|
+
} else {
|
|
1198
|
+
s = Add(seq, kTypeValue, key, Slice(str_value),
|
|
1199
|
+
nullptr /* kv_prot_info */);
|
|
1200
|
+
}
|
|
1201
|
+
RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
|
|
1202
|
+
UpdateFlushState();
|
|
1203
|
+
return s;
|
|
1204
|
+
} else if (status == UpdateStatus::UPDATE_FAILED) {
|
|
1205
|
+
// `UPDATE_FAILED` is named incorrectly. It indicates no update
|
|
1206
|
+
// happened. It does not indicate a failure happened.
|
|
1207
|
+
UpdateFlushState();
|
|
1208
|
+
return Status::OK();
|
|
1197
1209
|
}
|
|
1198
|
-
default:
|
|
1199
|
-
break;
|
|
1200
1210
|
}
|
|
1201
1211
|
}
|
|
1202
1212
|
}
|
|
@@ -274,7 +274,7 @@ class MemTable {
|
|
|
274
274
|
void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
275
275
|
ReadCallback* callback);
|
|
276
276
|
|
|
277
|
-
// If `key` exists in current memtable with type
|
|
277
|
+
// If `key` exists in current memtable with type value_type and the existing
|
|
278
278
|
// value is at least as large as the new value, updates it in-place. Otherwise
|
|
279
279
|
// adds the new value to the memtable out-of-place.
|
|
280
280
|
//
|
|
@@ -284,8 +284,8 @@ class MemTable {
|
|
|
284
284
|
//
|
|
285
285
|
// REQUIRES: external synchronization to prevent simultaneous
|
|
286
286
|
// operations on the same MemTable.
|
|
287
|
-
Status Update(SequenceNumber seq,
|
|
288
|
-
const ProtectionInfoKVOS64* kv_prot_info);
|
|
287
|
+
Status Update(SequenceNumber seq, ValueType value_type, const Slice& key,
|
|
288
|
+
const Slice& value, const ProtectionInfoKVOS64* kv_prot_info);
|
|
289
289
|
|
|
290
290
|
// If `key` exists in current memtable with type `kTypeValue` and the existing
|
|
291
291
|
// value is at least as large as the new value, updates it in-place. Otherwise
|
|
@@ -212,11 +212,16 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|
|
212
212
|
const Slice val = iter->value();
|
|
213
213
|
PinnableSlice blob_value;
|
|
214
214
|
const Slice* val_ptr;
|
|
215
|
-
if ((kTypeValue == ikey.type || kTypeBlobIndex == ikey.type
|
|
215
|
+
if ((kTypeValue == ikey.type || kTypeBlobIndex == ikey.type ||
|
|
216
|
+
kTypeWideColumnEntity == ikey.type) &&
|
|
216
217
|
(range_del_agg == nullptr ||
|
|
217
218
|
!range_del_agg->ShouldDelete(
|
|
218
219
|
ikey, RangeDelPositioningMode::kForwardTraversal))) {
|
|
219
|
-
if (ikey.type ==
|
|
220
|
+
if (ikey.type == kTypeWideColumnEntity) {
|
|
221
|
+
// TODO: support wide-column entities
|
|
222
|
+
return Status::NotSupported(
|
|
223
|
+
"Merge currently not supported for wide-column entities");
|
|
224
|
+
} else if (ikey.type == kTypeBlobIndex) {
|
|
220
225
|
BlobIndex blob_index;
|
|
221
226
|
|
|
222
227
|
s = blob_index.DecodeFrom(val);
|
|
@@ -316,7 +316,7 @@ struct FileMetaData {
|
|
|
316
316
|
};
|
|
317
317
|
|
|
318
318
|
// A compressed copy of file meta data that just contain minimum data needed
|
|
319
|
-
// to
|
|
319
|
+
// to serve read operations, while still keeping the pointer to full metadata
|
|
320
320
|
// of the file in case it is needed.
|
|
321
321
|
struct FdWithKeyRange {
|
|
322
322
|
FileDescriptor fd;
|
|
@@ -2161,6 +2161,10 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
|
2161
2161
|
"Encounter unexpected blob index. Please open DB with "
|
|
2162
2162
|
"ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
|
|
2163
2163
|
return;
|
|
2164
|
+
case GetContext::kUnexpectedWideColumnEntity:
|
|
2165
|
+
*status =
|
|
2166
|
+
Status::NotSupported("Encountered unexpected wide-column entity");
|
|
2167
|
+
return;
|
|
2164
2168
|
}
|
|
2165
2169
|
f = fp.GetNextFile();
|
|
2166
2170
|
}
|
|
@@ -3189,11 +3193,15 @@ void SortFileByOverlappingRatio(
|
|
|
3189
3193
|
ttl_boost_score;
|
|
3190
3194
|
}
|
|
3191
3195
|
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3196
|
+
size_t num_to_sort = temp->size() > VersionStorageInfo::kNumberFilesToSort
|
|
3197
|
+
? VersionStorageInfo::kNumberFilesToSort
|
|
3198
|
+
: temp->size();
|
|
3199
|
+
|
|
3200
|
+
std::partial_sort(temp->begin(), temp->begin() + num_to_sort, temp->end(),
|
|
3201
|
+
[&](const Fsize& f1, const Fsize& f2) -> bool {
|
|
3202
|
+
return file_to_order[f1.file->fd.GetNumber()] <
|
|
3203
|
+
file_to_order[f2.file->fd.GetNumber()];
|
|
3204
|
+
});
|
|
3197
3205
|
}
|
|
3198
3206
|
|
|
3199
3207
|
void SortFileByRoundRobin(const InternalKeyComparator& icmp,
|
|
@@ -141,6 +141,11 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
|
|
|
141
141
|
"ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
|
|
142
142
|
file_range.MarkKeyDone(iter);
|
|
143
143
|
continue;
|
|
144
|
+
case GetContext::kUnexpectedWideColumnEntity:
|
|
145
|
+
*status =
|
|
146
|
+
Status::NotSupported("Encountered unexpected wide-column entity");
|
|
147
|
+
file_range.MarkKeyDone(iter);
|
|
148
|
+
continue;
|
|
144
149
|
}
|
|
145
150
|
}
|
|
146
151
|
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include <array>
|
|
7
|
+
#include <memory>
|
|
8
|
+
|
|
9
|
+
#include "db/db_test_util.h"
|
|
10
|
+
#include "port/stack_trace.h"
|
|
11
|
+
#include "test_util/testutil.h"
|
|
12
|
+
#include "utilities/merge_operators.h"
|
|
13
|
+
|
|
14
|
+
namespace ROCKSDB_NAMESPACE {
|
|
15
|
+
|
|
16
|
+
class DBWideBasicTest : public DBTestBase {
|
|
17
|
+
protected:
|
|
18
|
+
explicit DBWideBasicTest()
|
|
19
|
+
: DBTestBase("db_wide_basic_test", /* env_do_fsync */ false) {}
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
TEST_F(DBWideBasicTest, PutEntity) {
|
|
23
|
+
Options options = GetDefaultOptions();
|
|
24
|
+
|
|
25
|
+
// Use the DB::PutEntity API
|
|
26
|
+
constexpr char first_key[] = "first";
|
|
27
|
+
WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
|
|
28
|
+
|
|
29
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
30
|
+
first_key, first_columns));
|
|
31
|
+
|
|
32
|
+
// Use WriteBatch
|
|
33
|
+
constexpr char second_key[] = "second";
|
|
34
|
+
WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
|
|
35
|
+
|
|
36
|
+
WriteBatch batch;
|
|
37
|
+
ASSERT_OK(
|
|
38
|
+
batch.PutEntity(db_->DefaultColumnFamily(), second_key, second_columns));
|
|
39
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
40
|
+
|
|
41
|
+
// Note: currently, read APIs are supposed to return NotSupported
|
|
42
|
+
auto verify = [&]() {
|
|
43
|
+
{
|
|
44
|
+
PinnableSlice result;
|
|
45
|
+
ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), first_key,
|
|
46
|
+
&result)
|
|
47
|
+
.IsNotSupported());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
PinnableSlice result;
|
|
52
|
+
ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(),
|
|
53
|
+
second_key, &result)
|
|
54
|
+
.IsNotSupported());
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
constexpr size_t num_keys = 2;
|
|
59
|
+
|
|
60
|
+
std::array<Slice, num_keys> keys{{first_key, second_key}};
|
|
61
|
+
std::array<PinnableSlice, num_keys> values;
|
|
62
|
+
std::array<Status, num_keys> statuses;
|
|
63
|
+
|
|
64
|
+
db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
|
|
65
|
+
&keys[0], &values[0], &statuses[0]);
|
|
66
|
+
|
|
67
|
+
ASSERT_TRUE(values[0].empty());
|
|
68
|
+
ASSERT_TRUE(statuses[0].IsNotSupported());
|
|
69
|
+
|
|
70
|
+
ASSERT_TRUE(values[1].empty());
|
|
71
|
+
ASSERT_TRUE(statuses[1].IsNotSupported());
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
{
|
|
75
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
|
|
76
|
+
|
|
77
|
+
iter->SeekToFirst();
|
|
78
|
+
ASSERT_FALSE(iter->Valid());
|
|
79
|
+
ASSERT_TRUE(iter->status().IsNotSupported());
|
|
80
|
+
|
|
81
|
+
iter->SeekToLast();
|
|
82
|
+
ASSERT_FALSE(iter->Valid());
|
|
83
|
+
ASSERT_TRUE(iter->status().IsNotSupported());
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Try reading from memtable
|
|
88
|
+
verify();
|
|
89
|
+
|
|
90
|
+
// Try reading after recovery
|
|
91
|
+
Close();
|
|
92
|
+
options.avoid_flush_during_recovery = true;
|
|
93
|
+
Reopen(options);
|
|
94
|
+
|
|
95
|
+
verify();
|
|
96
|
+
|
|
97
|
+
// Try reading from storage
|
|
98
|
+
ASSERT_OK(Flush());
|
|
99
|
+
|
|
100
|
+
verify();
|
|
101
|
+
|
|
102
|
+
// Add a couple of merge operands
|
|
103
|
+
Close();
|
|
104
|
+
options.merge_operator = MergeOperators::CreateStringAppendOperator();
|
|
105
|
+
Reopen(options);
|
|
106
|
+
|
|
107
|
+
constexpr char merge_operand[] = "bla";
|
|
108
|
+
|
|
109
|
+
ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key,
|
|
110
|
+
merge_operand));
|
|
111
|
+
ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key,
|
|
112
|
+
merge_operand));
|
|
113
|
+
|
|
114
|
+
// Try reading from memtable
|
|
115
|
+
verify();
|
|
116
|
+
|
|
117
|
+
// Try reading from storage
|
|
118
|
+
ASSERT_OK(Flush());
|
|
119
|
+
|
|
120
|
+
verify();
|
|
121
|
+
|
|
122
|
+
// Do it again, with the Put and the Merge in the same memtable
|
|
123
|
+
ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
124
|
+
first_key, first_columns));
|
|
125
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
126
|
+
ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key,
|
|
127
|
+
merge_operand));
|
|
128
|
+
ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key,
|
|
129
|
+
merge_operand));
|
|
130
|
+
|
|
131
|
+
// Try reading from memtable
|
|
132
|
+
verify();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
TEST_F(DBWideBasicTest, PutEntityColumnFamily) {
|
|
136
|
+
Options options = GetDefaultOptions();
|
|
137
|
+
CreateAndReopenWithCF({"corinthian"}, options);
|
|
138
|
+
|
|
139
|
+
// Use the DB::PutEntity API
|
|
140
|
+
constexpr char first_key[] = "first";
|
|
141
|
+
WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
|
|
142
|
+
|
|
143
|
+
ASSERT_OK(
|
|
144
|
+
db_->PutEntity(WriteOptions(), handles_[1], first_key, first_columns));
|
|
145
|
+
|
|
146
|
+
// Use WriteBatch
|
|
147
|
+
constexpr char second_key[] = "second";
|
|
148
|
+
WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
|
|
149
|
+
|
|
150
|
+
WriteBatch batch;
|
|
151
|
+
ASSERT_OK(batch.PutEntity(handles_[1], second_key, second_columns));
|
|
152
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
TEST_F(DBWideBasicTest, PutEntityTimestampError) {
|
|
156
|
+
// Note: timestamps are currently not supported
|
|
157
|
+
|
|
158
|
+
Options options = GetDefaultOptions();
|
|
159
|
+
options.comparator = test::BytewiseComparatorWithU64TsWrapper();
|
|
160
|
+
|
|
161
|
+
ColumnFamilyHandle* handle = nullptr;
|
|
162
|
+
ASSERT_OK(db_->CreateColumnFamily(options, "corinthian", &handle));
|
|
163
|
+
std::unique_ptr<ColumnFamilyHandle> handle_guard(handle);
|
|
164
|
+
|
|
165
|
+
// Use the DB::PutEntity API
|
|
166
|
+
constexpr char first_key[] = "first";
|
|
167
|
+
WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
|
|
168
|
+
|
|
169
|
+
ASSERT_TRUE(db_->PutEntity(WriteOptions(), handle, first_key, first_columns)
|
|
170
|
+
.IsInvalidArgument());
|
|
171
|
+
|
|
172
|
+
// Use WriteBatch
|
|
173
|
+
constexpr char second_key[] = "second";
|
|
174
|
+
WideColumns second_columns{{"doric", "column"}, {"ionic", "column"}};
|
|
175
|
+
|
|
176
|
+
WriteBatch batch;
|
|
177
|
+
ASSERT_TRUE(
|
|
178
|
+
batch.PutEntity(handle, second_key, second_columns).IsInvalidArgument());
|
|
179
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
TEST_F(DBWideBasicTest, PutEntitySerializationError) {
|
|
183
|
+
// Make sure duplicate columns are caught
|
|
184
|
+
|
|
185
|
+
Options options = GetDefaultOptions();
|
|
186
|
+
|
|
187
|
+
// Use the DB::PutEntity API
|
|
188
|
+
constexpr char first_key[] = "first";
|
|
189
|
+
WideColumns first_columns{{"foo", "bar"}, {"foo", "baz"}};
|
|
190
|
+
|
|
191
|
+
ASSERT_TRUE(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
|
|
192
|
+
first_key, first_columns)
|
|
193
|
+
.IsCorruption());
|
|
194
|
+
|
|
195
|
+
// Use WriteBatch
|
|
196
|
+
constexpr char second_key[] = "second";
|
|
197
|
+
WideColumns second_columns{{"column", "doric"}, {"column", "ionic"}};
|
|
198
|
+
|
|
199
|
+
WriteBatch batch;
|
|
200
|
+
ASSERT_TRUE(
|
|
201
|
+
batch.PutEntity(db_->DefaultColumnFamily(), second_key, second_columns)
|
|
202
|
+
.IsCorruption());
|
|
203
|
+
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
207
|
+
|
|
208
|
+
int main(int argc, char** argv) {
|
|
209
|
+
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
210
|
+
::testing::InitGoogleTest(&argc, argv);
|
|
211
|
+
RegisterCustomObjects(argc, argv);
|
|
212
|
+
return RUN_ALL_TESTS();
|
|
213
|
+
}
|
|
@@ -17,12 +17,6 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
17
17
|
|
|
18
18
|
Status WideColumnSerialization::Serialize(const WideColumns& columns,
|
|
19
19
|
std::string& output) {
|
|
20
|
-
// Column names should be strictly ascending
|
|
21
|
-
assert(std::adjacent_find(columns.cbegin(), columns.cend(),
|
|
22
|
-
[](const WideColumn& lhs, const WideColumn& rhs) {
|
|
23
|
-
return lhs.name().compare(rhs.name()) > 0;
|
|
24
|
-
}) == columns.cend());
|
|
25
|
-
|
|
26
20
|
if (columns.size() >
|
|
27
21
|
static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
|
|
28
22
|
return Status::InvalidArgument("Too many wide columns");
|
|
@@ -32,12 +26,17 @@ Status WideColumnSerialization::Serialize(const WideColumns& columns,
|
|
|
32
26
|
|
|
33
27
|
PutVarint32(&output, static_cast<uint32_t>(columns.size()));
|
|
34
28
|
|
|
35
|
-
for (
|
|
29
|
+
for (size_t i = 0; i < columns.size(); ++i) {
|
|
30
|
+
const WideColumn& column = columns[i];
|
|
31
|
+
|
|
36
32
|
const Slice& name = column.name();
|
|
37
33
|
if (name.size() >
|
|
38
34
|
static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
|
|
39
35
|
return Status::InvalidArgument("Wide column name too long");
|
|
40
36
|
}
|
|
37
|
+
if (i > 0 && columns[i - 1].name().compare(name) >= 0) {
|
|
38
|
+
return Status::Corruption("Wide columns out of order");
|
|
39
|
+
}
|
|
41
40
|
|
|
42
41
|
const Slice& value = column.value();
|
|
43
42
|
if (value.size() >
|
|
@@ -124,6 +124,22 @@ TEST(WideColumnSerializationTest, SerializeDeserialize) {
|
|
|
124
124
|
}
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
+
TEST(WideColumnSerializationTest, SerializeDuplicateError) {
|
|
128
|
+
WideColumns columns{{"foo", "bar"}, {"foo", "baz"}};
|
|
129
|
+
std::string output;
|
|
130
|
+
|
|
131
|
+
ASSERT_TRUE(
|
|
132
|
+
WideColumnSerialization::Serialize(columns, output).IsCorruption());
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
TEST(WideColumnSerializationTest, SerializeOutOfOrderError) {
|
|
136
|
+
WideColumns columns{{"hello", "world"}, {"foo", "bar"}};
|
|
137
|
+
std::string output;
|
|
138
|
+
|
|
139
|
+
ASSERT_TRUE(
|
|
140
|
+
WideColumnSerialization::Serialize(columns, output).IsCorruption());
|
|
141
|
+
}
|
|
142
|
+
|
|
127
143
|
TEST(WideColumnSerializationTest, DeserializeVersionError) {
|
|
128
144
|
// Can't decode version
|
|
129
145
|
|