@nxtedition/rocksdb 7.0.12 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  2. package/deps/rocksdb/rocksdb/Makefile +3 -0
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  5. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  6. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  8. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  9. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  13. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  14. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  15. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  16. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  24. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  26. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  36. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  37. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  38. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  41. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  42. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  43. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  45. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  46. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  47. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  49. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  50. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  51. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  53. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  54. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  55. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  59. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  60. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  61. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  62. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  63. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  66. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  68. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  72. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  74. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  75. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  76. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  78. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  79. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  80. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  81. package/deps/rocksdb/rocksdb/src.mk +1 -0
  82. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  90. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  93. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  96. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  97. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  98. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  99. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  100. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  101. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  102. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  103. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  104. package/package.json +1 -1
  105. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -334,14 +334,15 @@ class MemTableIterator : public InternalIterator {
334
334
  // iterator should only use prefix bloom filter
335
335
  auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
336
336
  Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
337
- if (prefix_extractor_->InDomain(user_k_without_ts) &&
338
- !bloom_->MayContain(
339
- prefix_extractor_->Transform(user_k_without_ts))) {
340
- PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
341
- valid_ = false;
342
- return;
343
- } else {
344
- PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
337
+ if (prefix_extractor_->InDomain(user_k_without_ts)) {
338
+ if (!bloom_->MayContain(
339
+ prefix_extractor_->Transform(user_k_without_ts))) {
340
+ PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
341
+ valid_ = false;
342
+ return;
343
+ } else {
344
+ PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
345
+ }
345
346
  }
346
347
  }
347
348
  iter_->Seek(k, nullptr);
@@ -353,14 +354,15 @@ class MemTableIterator : public InternalIterator {
353
354
  if (bloom_) {
354
355
  auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size();
355
356
  Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
356
- if (prefix_extractor_->InDomain(user_k_without_ts) &&
357
- !bloom_->MayContain(
358
- prefix_extractor_->Transform(user_k_without_ts))) {
359
- PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
360
- valid_ = false;
361
- return;
362
- } else {
363
- PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
357
+ if (prefix_extractor_->InDomain(user_k_without_ts)) {
358
+ if (!bloom_->MayContain(
359
+ prefix_extractor_->Transform(user_k_without_ts))) {
360
+ PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
361
+ valid_ = false;
362
+ return;
363
+ } else {
364
+ PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
365
+ }
364
366
  }
365
367
  }
366
368
  iter_->Seek(k, nullptr);
@@ -738,21 +740,33 @@ static bool SaveValue(void* arg, const char* entry) {
738
740
 
739
741
  s->seq = seq;
740
742
 
741
- if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex) &&
743
+ if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex ||
744
+ type == kTypeWideColumnEntity) &&
742
745
  max_covering_tombstone_seq > seq) {
743
746
  type = kTypeRangeDeletion;
744
747
  }
745
748
  switch (type) {
746
749
  case kTypeBlobIndex:
747
- if (s->is_blob_index == nullptr) {
748
- ROCKS_LOG_ERROR(s->logger, "Encounter unexpected blob index.");
749
- *(s->status) = Status::NotSupported(
750
- "Encounter unsupported blob value. Please open DB with "
751
- "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
752
- } else if (*(s->merge_in_progress)) {
750
+ case kTypeWideColumnEntity:
751
+ if (*(s->merge_in_progress)) {
752
+ *(s->status) = Status::NotSupported("Merge operator not supported");
753
+ } else if (!s->do_merge) {
754
+ *(s->status) = Status::NotSupported("GetMergeOperands not supported");
755
+ } else if (type == kTypeBlobIndex) {
756
+ if (s->is_blob_index == nullptr) {
757
+ ROCKS_LOG_ERROR(s->logger, "Encounter unexpected blob index.");
758
+ *(s->status) = Status::NotSupported(
759
+ "Encounter unsupported blob value. Please open DB with "
760
+ "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
761
+ }
762
+ } else {
763
+ assert(type == kTypeWideColumnEntity);
764
+
765
+ // TODO: support wide-column entities
753
766
  *(s->status) =
754
- Status::NotSupported("Blob DB does not support merge operator.");
767
+ Status::NotSupported("Encountered unexpected wide-column entity");
755
768
  }
769
+
756
770
  if (!s->status->ok()) {
757
771
  *(s->found_final_value) = true;
758
772
  return false;
@@ -893,16 +907,20 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
893
907
  bool may_contain = true;
894
908
  size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size();
895
909
  Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz);
910
+ bool bloom_checked = false;
896
911
  if (bloom_filter_) {
897
912
  // when both memtable_whole_key_filtering and prefix_extractor_ are set,
898
913
  // only do whole key filtering for Get() to save CPU
899
914
  if (moptions_.memtable_whole_key_filtering) {
900
915
  may_contain = bloom_filter_->MayContain(user_key_without_ts);
916
+ bloom_checked = true;
901
917
  } else {
902
918
  assert(prefix_extractor_);
903
- may_contain = !prefix_extractor_->InDomain(user_key_without_ts) ||
904
- bloom_filter_->MayContain(
905
- prefix_extractor_->Transform(user_key_without_ts));
919
+ if (prefix_extractor_->InDomain(user_key_without_ts)) {
920
+ may_contain = bloom_filter_->MayContain(
921
+ prefix_extractor_->Transform(user_key_without_ts));
922
+ bloom_checked = true;
923
+ }
906
924
  }
907
925
  }
908
926
 
@@ -911,7 +929,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
911
929
  PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
912
930
  *seq = kMaxSequenceNumber;
913
931
  } else {
914
- if (bloom_filter_) {
932
+ if (bloom_checked) {
915
933
  PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
916
934
  }
917
935
  GetFromTable(key, *max_covering_tombstone_seq, do_merge, callback,
@@ -988,10 +1006,6 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
988
1006
  bloom_keys[num_keys] =
989
1007
  prefix_extractor_->Transform(iter->ukey_without_ts);
990
1008
  range_indexes[num_keys++] = iter.index();
991
- } else {
992
- // TODO: consider not counting these as Bloom hits to more closely
993
- // match bloom_sst_hit_count
994
- PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
995
1009
  }
996
1010
  }
997
1011
  bloom_filter_->MayContain(num_keys, &bloom_keys[0], &may_match[0]);
@@ -1044,8 +1058,8 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
1044
1058
  PERF_COUNTER_ADD(get_from_memtable_count, 1);
1045
1059
  }
1046
1060
 
1047
- Status MemTable::Update(SequenceNumber seq, const Slice& key,
1048
- const Slice& value,
1061
+ Status MemTable::Update(SequenceNumber seq, ValueType value_type,
1062
+ const Slice& key, const Slice& value,
1049
1063
  const ProtectionInfoKVOS64* kv_prot_info) {
1050
1064
  LookupKey lkey(key, seq);
1051
1065
  Slice mem_key = lkey.memtable_key();
@@ -1075,7 +1089,7 @@ Status MemTable::Update(SequenceNumber seq, const Slice& key,
1075
1089
  SequenceNumber existing_seq;
1076
1090
  UnPackSequenceAndType(tag, &existing_seq, &type);
1077
1091
  assert(existing_seq != seq);
1078
- if (type == kTypeValue) {
1092
+ if (type == value_type) {
1079
1093
  Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
1080
1094
  uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
1081
1095
  uint32_t new_size = static_cast<uint32_t>(value.size());
@@ -1103,8 +1117,8 @@ Status MemTable::Update(SequenceNumber seq, const Slice& key,
1103
1117
  }
1104
1118
  }
1105
1119
 
1106
- // The latest value is not `kTypeValue` or key doesn't exist
1107
- return Add(seq, kTypeValue, key, value, kv_prot_info);
1120
+ // The latest value is not value_type or key doesn't exist
1121
+ return Add(seq, value_type, key, value, kv_prot_info);
1108
1122
  }
1109
1123
 
1110
1124
  Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
@@ -1137,66 +1151,62 @@ Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
1137
1151
  ValueType type;
1138
1152
  uint64_t existing_seq;
1139
1153
  UnPackSequenceAndType(tag, &existing_seq, &type);
1140
- switch (type) {
1141
- case kTypeValue: {
1142
- Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
1143
- uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
1144
-
1145
- char* prev_buffer = const_cast<char*>(prev_value.data());
1146
- uint32_t new_prev_size = prev_size;
1154
+ if (type == kTypeValue) {
1155
+ Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length);
1156
+ uint32_t prev_size = static_cast<uint32_t>(prev_value.size());
1147
1157
 
1148
- std::string str_value;
1149
- WriteLock wl(GetLock(lkey.user_key()));
1150
- auto status = moptions_.inplace_callback(prev_buffer, &new_prev_size,
1151
- delta, &str_value);
1152
- if (status == UpdateStatus::UPDATED_INPLACE) {
1153
- // Value already updated by callback.
1154
- assert(new_prev_size <= prev_size);
1155
- if (new_prev_size < prev_size) {
1156
- // overwrite the new prev_size
1157
- char* p = EncodeVarint32(const_cast<char*>(key_ptr) + key_length,
1158
- new_prev_size);
1159
- if (VarintLength(new_prev_size) < VarintLength(prev_size)) {
1160
- // shift the value buffer as well.
1161
- memcpy(p, prev_buffer, new_prev_size);
1162
- prev_buffer = p;
1163
- }
1158
+ char* prev_buffer = const_cast<char*>(prev_value.data());
1159
+ uint32_t new_prev_size = prev_size;
1160
+
1161
+ std::string str_value;
1162
+ WriteLock wl(GetLock(lkey.user_key()));
1163
+ auto status = moptions_.inplace_callback(prev_buffer, &new_prev_size,
1164
+ delta, &str_value);
1165
+ if (status == UpdateStatus::UPDATED_INPLACE) {
1166
+ // Value already updated by callback.
1167
+ assert(new_prev_size <= prev_size);
1168
+ if (new_prev_size < prev_size) {
1169
+ // overwrite the new prev_size
1170
+ char* p = EncodeVarint32(const_cast<char*>(key_ptr) + key_length,
1171
+ new_prev_size);
1172
+ if (VarintLength(new_prev_size) < VarintLength(prev_size)) {
1173
+ // shift the value buffer as well.
1174
+ memcpy(p, prev_buffer, new_prev_size);
1175
+ prev_buffer = p;
1164
1176
  }
1165
- RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
1166
- UpdateFlushState();
1167
- if (kv_prot_info != nullptr) {
1168
- ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1169
- // `seq` is swallowed and `existing_seq` prevails.
1170
- updated_kv_prot_info.UpdateS(seq, existing_seq);
1171
- updated_kv_prot_info.UpdateV(delta,
1172
- Slice(prev_buffer, new_prev_size));
1173
- Slice encoded(entry, prev_buffer + new_prev_size - entry);
1174
- return VerifyEncodedEntry(encoded, updated_kv_prot_info);
1175
- }
1176
- return Status::OK();
1177
- } else if (status == UpdateStatus::UPDATED) {
1178
- Status s;
1179
- if (kv_prot_info != nullptr) {
1180
- ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1181
- updated_kv_prot_info.UpdateV(delta, str_value);
1182
- s = Add(seq, kTypeValue, key, Slice(str_value),
1183
- &updated_kv_prot_info);
1184
- } else {
1185
- s = Add(seq, kTypeValue, key, Slice(str_value),
1186
- nullptr /* kv_prot_info */);
1187
- }
1188
- RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
1189
- UpdateFlushState();
1190
- return s;
1191
- } else if (status == UpdateStatus::UPDATE_FAILED) {
1192
- // `UPDATE_FAILED` is named incorrectly. It indicates no update
1193
- // happened. It does not indicate a failure happened.
1194
- UpdateFlushState();
1195
- return Status::OK();
1196
1177
  }
1178
+ RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED);
1179
+ UpdateFlushState();
1180
+ if (kv_prot_info != nullptr) {
1181
+ ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1182
+ // `seq` is swallowed and `existing_seq` prevails.
1183
+ updated_kv_prot_info.UpdateS(seq, existing_seq);
1184
+ updated_kv_prot_info.UpdateV(delta,
1185
+ Slice(prev_buffer, new_prev_size));
1186
+ Slice encoded(entry, prev_buffer + new_prev_size - entry);
1187
+ return VerifyEncodedEntry(encoded, updated_kv_prot_info);
1188
+ }
1189
+ return Status::OK();
1190
+ } else if (status == UpdateStatus::UPDATED) {
1191
+ Status s;
1192
+ if (kv_prot_info != nullptr) {
1193
+ ProtectionInfoKVOS64 updated_kv_prot_info(*kv_prot_info);
1194
+ updated_kv_prot_info.UpdateV(delta, str_value);
1195
+ s = Add(seq, kTypeValue, key, Slice(str_value),
1196
+ &updated_kv_prot_info);
1197
+ } else {
1198
+ s = Add(seq, kTypeValue, key, Slice(str_value),
1199
+ nullptr /* kv_prot_info */);
1200
+ }
1201
+ RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN);
1202
+ UpdateFlushState();
1203
+ return s;
1204
+ } else if (status == UpdateStatus::UPDATE_FAILED) {
1205
+ // `UPDATE_FAILED` is named incorrectly. It indicates no update
1206
+ // happened. It does not indicate a failure happened.
1207
+ UpdateFlushState();
1208
+ return Status::OK();
1197
1209
  }
1198
- default:
1199
- break;
1200
1210
  }
1201
1211
  }
1202
1212
  }
@@ -274,7 +274,7 @@ class MemTable {
274
274
  void MultiGet(const ReadOptions& read_options, MultiGetRange* range,
275
275
  ReadCallback* callback);
276
276
 
277
- // If `key` exists in current memtable with type `kTypeValue` and the existing
277
+ // If `key` exists in current memtable with type value_type and the existing
278
278
  // value is at least as large as the new value, updates it in-place. Otherwise
279
279
  // adds the new value to the memtable out-of-place.
280
280
  //
@@ -284,8 +284,8 @@ class MemTable {
284
284
  //
285
285
  // REQUIRES: external synchronization to prevent simultaneous
286
286
  // operations on the same MemTable.
287
- Status Update(SequenceNumber seq, const Slice& key, const Slice& value,
288
- const ProtectionInfoKVOS64* kv_prot_info);
287
+ Status Update(SequenceNumber seq, ValueType value_type, const Slice& key,
288
+ const Slice& value, const ProtectionInfoKVOS64* kv_prot_info);
289
289
 
290
290
  // If `key` exists in current memtable with type `kTypeValue` and the existing
291
291
  // value is at least as large as the new value, updates it in-place. Otherwise
@@ -212,11 +212,16 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
212
212
  const Slice val = iter->value();
213
213
  PinnableSlice blob_value;
214
214
  const Slice* val_ptr;
215
- if ((kTypeValue == ikey.type || kTypeBlobIndex == ikey.type) &&
215
+ if ((kTypeValue == ikey.type || kTypeBlobIndex == ikey.type ||
216
+ kTypeWideColumnEntity == ikey.type) &&
216
217
  (range_del_agg == nullptr ||
217
218
  !range_del_agg->ShouldDelete(
218
219
  ikey, RangeDelPositioningMode::kForwardTraversal))) {
219
- if (ikey.type == kTypeBlobIndex) {
220
+ if (ikey.type == kTypeWideColumnEntity) {
221
+ // TODO: support wide-column entities
222
+ return Status::NotSupported(
223
+ "Merge currently not supported for wide-column entities");
224
+ } else if (ikey.type == kTypeBlobIndex) {
220
225
  BlobIndex blob_index;
221
226
 
222
227
  s = blob_index.DecodeFrom(val);
@@ -316,7 +316,7 @@ struct FileMetaData {
316
316
  };
317
317
 
318
318
  // A compressed copy of file meta data that just contain minimum data needed
319
- // to server read operations, while still keeping the pointer to full metadata
319
+ // to serve read operations, while still keeping the pointer to full metadata
320
320
  // of the file in case it is needed.
321
321
  struct FdWithKeyRange {
322
322
  FileDescriptor fd;
@@ -2161,6 +2161,10 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
2161
2161
  "Encounter unexpected blob index. Please open DB with "
2162
2162
  "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
2163
2163
  return;
2164
+ case GetContext::kUnexpectedWideColumnEntity:
2165
+ *status =
2166
+ Status::NotSupported("Encountered unexpected wide-column entity");
2167
+ return;
2164
2168
  }
2165
2169
  f = fp.GetNextFile();
2166
2170
  }
@@ -3189,11 +3193,15 @@ void SortFileByOverlappingRatio(
3189
3193
  ttl_boost_score;
3190
3194
  }
3191
3195
 
3192
- std::sort(temp->begin(), temp->end(),
3193
- [&](const Fsize& f1, const Fsize& f2) -> bool {
3194
- return file_to_order[f1.file->fd.GetNumber()] <
3195
- file_to_order[f2.file->fd.GetNumber()];
3196
- });
3196
+ size_t num_to_sort = temp->size() > VersionStorageInfo::kNumberFilesToSort
3197
+ ? VersionStorageInfo::kNumberFilesToSort
3198
+ : temp->size();
3199
+
3200
+ std::partial_sort(temp->begin(), temp->begin() + num_to_sort, temp->end(),
3201
+ [&](const Fsize& f1, const Fsize& f2) -> bool {
3202
+ return file_to_order[f1.file->fd.GetNumber()] <
3203
+ file_to_order[f2.file->fd.GetNumber()];
3204
+ });
3197
3205
  }
3198
3206
 
3199
3207
  void SortFileByRoundRobin(const InternalKeyComparator& icmp,
@@ -141,6 +141,11 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
141
141
  "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
142
142
  file_range.MarkKeyDone(iter);
143
143
  continue;
144
+ case GetContext::kUnexpectedWideColumnEntity:
145
+ *status =
146
+ Status::NotSupported("Encountered unexpected wide-column entity");
147
+ file_range.MarkKeyDone(iter);
148
+ continue;
144
149
  }
145
150
  }
146
151
 
@@ -0,0 +1,213 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include <array>
7
+ #include <memory>
8
+
9
+ #include "db/db_test_util.h"
10
+ #include "port/stack_trace.h"
11
+ #include "test_util/testutil.h"
12
+ #include "utilities/merge_operators.h"
13
+
14
+ namespace ROCKSDB_NAMESPACE {
15
+
16
+ class DBWideBasicTest : public DBTestBase {
17
+ protected:
18
+ explicit DBWideBasicTest()
19
+ : DBTestBase("db_wide_basic_test", /* env_do_fsync */ false) {}
20
+ };
21
+
22
+ TEST_F(DBWideBasicTest, PutEntity) {
23
+ Options options = GetDefaultOptions();
24
+
25
+ // Use the DB::PutEntity API
26
+ constexpr char first_key[] = "first";
27
+ WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
28
+
29
+ ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
30
+ first_key, first_columns));
31
+
32
+ // Use WriteBatch
33
+ constexpr char second_key[] = "second";
34
+ WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
35
+
36
+ WriteBatch batch;
37
+ ASSERT_OK(
38
+ batch.PutEntity(db_->DefaultColumnFamily(), second_key, second_columns));
39
+ ASSERT_OK(db_->Write(WriteOptions(), &batch));
40
+
41
+ // Note: currently, read APIs are supposed to return NotSupported
42
+ auto verify = [&]() {
43
+ {
44
+ PinnableSlice result;
45
+ ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), first_key,
46
+ &result)
47
+ .IsNotSupported());
48
+ }
49
+
50
+ {
51
+ PinnableSlice result;
52
+ ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(),
53
+ second_key, &result)
54
+ .IsNotSupported());
55
+ }
56
+
57
+ {
58
+ constexpr size_t num_keys = 2;
59
+
60
+ std::array<Slice, num_keys> keys{{first_key, second_key}};
61
+ std::array<PinnableSlice, num_keys> values;
62
+ std::array<Status, num_keys> statuses;
63
+
64
+ db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
65
+ &keys[0], &values[0], &statuses[0]);
66
+
67
+ ASSERT_TRUE(values[0].empty());
68
+ ASSERT_TRUE(statuses[0].IsNotSupported());
69
+
70
+ ASSERT_TRUE(values[1].empty());
71
+ ASSERT_TRUE(statuses[1].IsNotSupported());
72
+ }
73
+
74
+ {
75
+ std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
76
+
77
+ iter->SeekToFirst();
78
+ ASSERT_FALSE(iter->Valid());
79
+ ASSERT_TRUE(iter->status().IsNotSupported());
80
+
81
+ iter->SeekToLast();
82
+ ASSERT_FALSE(iter->Valid());
83
+ ASSERT_TRUE(iter->status().IsNotSupported());
84
+ }
85
+ };
86
+
87
+ // Try reading from memtable
88
+ verify();
89
+
90
+ // Try reading after recovery
91
+ Close();
92
+ options.avoid_flush_during_recovery = true;
93
+ Reopen(options);
94
+
95
+ verify();
96
+
97
+ // Try reading from storage
98
+ ASSERT_OK(Flush());
99
+
100
+ verify();
101
+
102
+ // Add a couple of merge operands
103
+ Close();
104
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
105
+ Reopen(options);
106
+
107
+ constexpr char merge_operand[] = "bla";
108
+
109
+ ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key,
110
+ merge_operand));
111
+ ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key,
112
+ merge_operand));
113
+
114
+ // Try reading from memtable
115
+ verify();
116
+
117
+ // Try reading from storage
118
+ ASSERT_OK(Flush());
119
+
120
+ verify();
121
+
122
+ // Do it again, with the Put and the Merge in the same memtable
123
+ ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
124
+ first_key, first_columns));
125
+ ASSERT_OK(db_->Write(WriteOptions(), &batch));
126
+ ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key,
127
+ merge_operand));
128
+ ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key,
129
+ merge_operand));
130
+
131
+ // Try reading from memtable
132
+ verify();
133
+ }
134
+
135
+ TEST_F(DBWideBasicTest, PutEntityColumnFamily) {
136
+ Options options = GetDefaultOptions();
137
+ CreateAndReopenWithCF({"corinthian"}, options);
138
+
139
+ // Use the DB::PutEntity API
140
+ constexpr char first_key[] = "first";
141
+ WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
142
+
143
+ ASSERT_OK(
144
+ db_->PutEntity(WriteOptions(), handles_[1], first_key, first_columns));
145
+
146
+ // Use WriteBatch
147
+ constexpr char second_key[] = "second";
148
+ WideColumns second_columns{{"attr_one", "two"}, {"attr_three", "four"}};
149
+
150
+ WriteBatch batch;
151
+ ASSERT_OK(batch.PutEntity(handles_[1], second_key, second_columns));
152
+ ASSERT_OK(db_->Write(WriteOptions(), &batch));
153
+ }
154
+
155
+ TEST_F(DBWideBasicTest, PutEntityTimestampError) {
156
+ // Note: timestamps are currently not supported
157
+
158
+ Options options = GetDefaultOptions();
159
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
160
+
161
+ ColumnFamilyHandle* handle = nullptr;
162
+ ASSERT_OK(db_->CreateColumnFamily(options, "corinthian", &handle));
163
+ std::unique_ptr<ColumnFamilyHandle> handle_guard(handle);
164
+
165
+ // Use the DB::PutEntity API
166
+ constexpr char first_key[] = "first";
167
+ WideColumns first_columns{{"attr_name1", "foo"}, {"attr_name2", "bar"}};
168
+
169
+ ASSERT_TRUE(db_->PutEntity(WriteOptions(), handle, first_key, first_columns)
170
+ .IsInvalidArgument());
171
+
172
+ // Use WriteBatch
173
+ constexpr char second_key[] = "second";
174
+ WideColumns second_columns{{"doric", "column"}, {"ionic", "column"}};
175
+
176
+ WriteBatch batch;
177
+ ASSERT_TRUE(
178
+ batch.PutEntity(handle, second_key, second_columns).IsInvalidArgument());
179
+ ASSERT_OK(db_->Write(WriteOptions(), &batch));
180
+ }
181
+
182
+ TEST_F(DBWideBasicTest, PutEntitySerializationError) {
183
+ // Make sure duplicate columns are caught
184
+
185
+ Options options = GetDefaultOptions();
186
+
187
+ // Use the DB::PutEntity API
188
+ constexpr char first_key[] = "first";
189
+ WideColumns first_columns{{"foo", "bar"}, {"foo", "baz"}};
190
+
191
+ ASSERT_TRUE(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(),
192
+ first_key, first_columns)
193
+ .IsCorruption());
194
+
195
+ // Use WriteBatch
196
+ constexpr char second_key[] = "second";
197
+ WideColumns second_columns{{"column", "doric"}, {"column", "ionic"}};
198
+
199
+ WriteBatch batch;
200
+ ASSERT_TRUE(
201
+ batch.PutEntity(db_->DefaultColumnFamily(), second_key, second_columns)
202
+ .IsCorruption());
203
+ ASSERT_OK(db_->Write(WriteOptions(), &batch));
204
+ }
205
+
206
+ } // namespace ROCKSDB_NAMESPACE
207
+
208
+ int main(int argc, char** argv) {
209
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
210
+ ::testing::InitGoogleTest(&argc, argv);
211
+ RegisterCustomObjects(argc, argv);
212
+ return RUN_ALL_TESTS();
213
+ }
@@ -17,12 +17,6 @@ namespace ROCKSDB_NAMESPACE {
17
17
 
18
18
  Status WideColumnSerialization::Serialize(const WideColumns& columns,
19
19
  std::string& output) {
20
- // Column names should be strictly ascending
21
- assert(std::adjacent_find(columns.cbegin(), columns.cend(),
22
- [](const WideColumn& lhs, const WideColumn& rhs) {
23
- return lhs.name().compare(rhs.name()) > 0;
24
- }) == columns.cend());
25
-
26
20
  if (columns.size() >
27
21
  static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
28
22
  return Status::InvalidArgument("Too many wide columns");
@@ -32,12 +26,17 @@ Status WideColumnSerialization::Serialize(const WideColumns& columns,
32
26
 
33
27
  PutVarint32(&output, static_cast<uint32_t>(columns.size()));
34
28
 
35
- for (const auto& column : columns) {
29
+ for (size_t i = 0; i < columns.size(); ++i) {
30
+ const WideColumn& column = columns[i];
31
+
36
32
  const Slice& name = column.name();
37
33
  if (name.size() >
38
34
  static_cast<size_t>(std::numeric_limits<uint32_t>::max())) {
39
35
  return Status::InvalidArgument("Wide column name too long");
40
36
  }
37
+ if (i > 0 && columns[i - 1].name().compare(name) >= 0) {
38
+ return Status::Corruption("Wide columns out of order");
39
+ }
41
40
 
42
41
  const Slice& value = column.value();
43
42
  if (value.size() >
@@ -124,6 +124,22 @@ TEST(WideColumnSerializationTest, SerializeDeserialize) {
124
124
  }
125
125
  }
126
126
 
127
+ TEST(WideColumnSerializationTest, SerializeDuplicateError) {
128
+ WideColumns columns{{"foo", "bar"}, {"foo", "baz"}};
129
+ std::string output;
130
+
131
+ ASSERT_TRUE(
132
+ WideColumnSerialization::Serialize(columns, output).IsCorruption());
133
+ }
134
+
135
+ TEST(WideColumnSerializationTest, SerializeOutOfOrderError) {
136
+ WideColumns columns{{"hello", "world"}, {"foo", "bar"}};
137
+ std::string output;
138
+
139
+ ASSERT_TRUE(
140
+ WideColumnSerialization::Serialize(columns, output).IsCorruption());
141
+ }
142
+
127
143
  TEST(WideColumnSerializationTest, DeserializeVersionError) {
128
144
  // Can't decode version
129
145