@nxtedition/rocksdb 13.1.4 → 13.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +43 -16
- package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
- package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
- package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
- package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
- package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
- package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
- package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
- package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
- package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
- package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
- package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
- package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
- package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
- package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
- package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
- package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
- package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
- package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
- package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
- package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
- package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
- package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
- package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
- package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
- package/deps/rocksdb/rocksdb/port/port.h +5 -9
- package/deps/rocksdb/rocksdb/src.mk +8 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/format.cc +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
- package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
- package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
- package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -141,8 +141,9 @@ class InlineSkipList {
|
|
|
141
141
|
// Returns true iff an entry that compares equal to key is in the list.
|
|
142
142
|
bool Contains(const char* key) const;
|
|
143
143
|
|
|
144
|
-
// Return estimated number of entries
|
|
145
|
-
uint64_t
|
|
144
|
+
// Return estimated number of entries from `start_ikey` to `end_ikey`.
|
|
145
|
+
uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
|
146
|
+
const Slice& end_ikey) const;
|
|
146
147
|
|
|
147
148
|
// Validate correctness of the skip-list.
|
|
148
149
|
void TEST_Validate() const;
|
|
@@ -673,31 +674,88 @@ InlineSkipList<Comparator>::FindRandomEntry() const {
|
|
|
673
674
|
}
|
|
674
675
|
|
|
675
676
|
template <class Comparator>
|
|
676
|
-
uint64_t InlineSkipList<Comparator>::
|
|
677
|
+
uint64_t InlineSkipList<Comparator>::ApproximateNumEntries(
|
|
678
|
+
const Slice& start_ikey, const Slice& end_ikey) const {
|
|
679
|
+
// The number of entries at a given level for the given range, in terms of
|
|
680
|
+
// the actual number of entries in that range (level 0), follows a binomial
|
|
681
|
+
// distribution, which is very well approximated by the Poisson distribution.
|
|
682
|
+
// That has stddev sqrt(x) where x is the expected number of entries (mean)
|
|
683
|
+
// at this level, and the best predictor of x is the number of observed
|
|
684
|
+
// entries (at this level). To predict the number of entries on level 0 we use
|
|
685
|
+
// x * kBranchinng ^ level. From the standard deviation, the P99+ relative
|
|
686
|
+
// error is roughly 3 * sqrt(x) / x. Thus, a reasonable approach would be to
|
|
687
|
+
// find the smallest level with at least some moderate constant number entries
|
|
688
|
+
// in range. E.g. with at least ~40 entries, we expect P99+ relative error
|
|
689
|
+
// (approximation accuracy) of ~ 50% = 3 * sqrt(40) / 40; P95 error of
|
|
690
|
+
// ~30%; P75 error of < 20%.
|
|
691
|
+
//
|
|
692
|
+
// However, there are two issues with this approach, and an observation:
|
|
693
|
+
// * Pointer chasing on the larger (bottom) levels is much slower because of
|
|
694
|
+
// cache hierarchy effects, so when the result is smaller, getting the result
|
|
695
|
+
// will be substantially slower, despite traversing a similar number of
|
|
696
|
+
// entries. (We could be clever about pipelining our pointer chasing but
|
|
697
|
+
// that's complicated.)
|
|
698
|
+
// * The larger (bottom) levels also have lower variance because there's a
|
|
699
|
+
// chance (or certainty) that we reach level 0 and return the exact answer.
|
|
700
|
+
// * For applications in query planning, we can also tolerate more variance on
|
|
701
|
+
// small results because the impact of misestimating is likely smaller.
|
|
702
|
+
//
|
|
703
|
+
// These factors point us to an approach in which we have a higher minimum
|
|
704
|
+
// threshold number of samples for higher levels and lower for lower levels
|
|
705
|
+
// (see sufficient_samples below). This seems to yield roughly consistent
|
|
706
|
+
// relative error (stddev around 20%, less for large results) and roughly
|
|
707
|
+
// consistent query time around the time of two memtable point queries.
|
|
708
|
+
//
|
|
709
|
+
// Engineering observation: it is tempting to think that taking into account
|
|
710
|
+
// what we already found in how many entries occur on higher levels, not just
|
|
711
|
+
// the first iterated level with a sufficient number of samples, would yield
|
|
712
|
+
// a more accurate estimate. But that doesn't work because of the particular
|
|
713
|
+
// correlations and independences of the data: each level higher is just an
|
|
714
|
+
// independently probabilistic filtering of the level below it. That
|
|
715
|
+
// filtering from level l to l+1 has no more information about levels
|
|
716
|
+
// 0 .. l-1 than we can get from level l. The structure of RandomHeight() is
|
|
717
|
+
// a clue to these correlations and independences.
|
|
718
|
+
|
|
719
|
+
Node* lb = head_;
|
|
720
|
+
Node* ub = nullptr;
|
|
677
721
|
uint64_t count = 0;
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
Node* next = x->Next(level);
|
|
685
|
-
if (next != nullptr) {
|
|
686
|
-
PREFETCH(next->Next(level), 0, 1);
|
|
722
|
+
for (int level = GetMaxHeight() - 1; level >= 0; level--) {
|
|
723
|
+
auto sufficient_samples = static_cast<uint64_t>(level) * kBranching_ + 10U;
|
|
724
|
+
if (count >= sufficient_samples) {
|
|
725
|
+
// No more counting; apply powers of kBranching and avoid floating point
|
|
726
|
+
count *= kBranching_;
|
|
727
|
+
continue;
|
|
687
728
|
}
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
729
|
+
count = 0;
|
|
730
|
+
Node* next;
|
|
731
|
+
// Get a more precise lower bound (for start key)
|
|
732
|
+
for (;;) {
|
|
733
|
+
next = lb->Next(level);
|
|
734
|
+
if (next == ub) {
|
|
735
|
+
break;
|
|
736
|
+
}
|
|
737
|
+
assert(next != nullptr);
|
|
738
|
+
if (compare_(next->Key(), start_ikey) >= 0) {
|
|
739
|
+
break;
|
|
740
|
+
}
|
|
741
|
+
lb = next;
|
|
742
|
+
}
|
|
743
|
+
// Count entries on this level until upper bound (for end key)
|
|
744
|
+
for (;;) {
|
|
745
|
+
if (next == ub) {
|
|
746
|
+
break;
|
|
747
|
+
}
|
|
748
|
+
assert(next != nullptr);
|
|
749
|
+
if (compare_(next->Key(), end_ikey) >= 0) {
|
|
750
|
+
// Save refined upper bound to potentially save key comparison
|
|
751
|
+
ub = next;
|
|
752
|
+
break;
|
|
695
753
|
}
|
|
696
|
-
} else {
|
|
697
|
-
x = next;
|
|
698
754
|
count++;
|
|
755
|
+
next = next->Next(level);
|
|
699
756
|
}
|
|
700
757
|
}
|
|
758
|
+
return count;
|
|
701
759
|
}
|
|
702
760
|
|
|
703
761
|
template <class Comparator>
|
|
@@ -64,8 +64,9 @@ class SkipList {
|
|
|
64
64
|
// Returns true iff an entry that compares equal to key is in the list.
|
|
65
65
|
bool Contains(const Key& key) const;
|
|
66
66
|
|
|
67
|
-
// Return estimated number of entries
|
|
68
|
-
uint64_t
|
|
67
|
+
// Return estimated number of entries from `start_ikey` to `end_ikey`.
|
|
68
|
+
uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
|
69
|
+
const Slice& end_ikey) const;
|
|
69
70
|
|
|
70
71
|
// Iteration over the contents of a skip list
|
|
71
72
|
class Iterator {
|
|
@@ -383,27 +384,49 @@ typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
|
|
|
383
384
|
}
|
|
384
385
|
|
|
385
386
|
template <typename Key, class Comparator>
|
|
386
|
-
uint64_t SkipList<Key, Comparator>::
|
|
387
|
+
uint64_t SkipList<Key, Comparator>::ApproximateNumEntries(
|
|
388
|
+
const Slice& start_ikey, const Slice& end_ikey) const {
|
|
389
|
+
// See InlineSkipList<Comparator>::ApproximateNumEntries() (copy-paste)
|
|
390
|
+
Node* lb = head_;
|
|
391
|
+
Node* ub = nullptr;
|
|
387
392
|
uint64_t count = 0;
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
393
|
+
for (int level = GetMaxHeight() - 1; level >= 0; level--) {
|
|
394
|
+
auto sufficient_samples = static_cast<uint64_t>(level) * kBranching_ + 10U;
|
|
395
|
+
if (count >= sufficient_samples) {
|
|
396
|
+
// No more counting; apply powers of kBranching and avoid floating point
|
|
397
|
+
count *= kBranching_;
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
count = 0;
|
|
401
|
+
Node* next;
|
|
402
|
+
// Get a more precise lower bound (for start key)
|
|
403
|
+
for (;;) {
|
|
404
|
+
next = lb->Next(level);
|
|
405
|
+
if (next == ub) {
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
assert(next != nullptr);
|
|
409
|
+
if (compare_(next->Key(), start_ikey) >= 0) {
|
|
410
|
+
break;
|
|
411
|
+
}
|
|
412
|
+
lb = next;
|
|
413
|
+
}
|
|
414
|
+
// Count entries on this level until upper bound (for end key)
|
|
415
|
+
for (;;) {
|
|
416
|
+
if (next == ub) {
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
assert(next != nullptr);
|
|
420
|
+
if (compare_(next->Key(), end_ikey) >= 0) {
|
|
421
|
+
// Save refined upper bound to potentially save key comparison
|
|
422
|
+
ub = next;
|
|
423
|
+
break;
|
|
401
424
|
}
|
|
402
|
-
} else {
|
|
403
|
-
x = next;
|
|
404
425
|
count++;
|
|
426
|
+
next = next->Next(level);
|
|
405
427
|
}
|
|
406
428
|
}
|
|
429
|
+
return count;
|
|
407
430
|
}
|
|
408
431
|
|
|
409
432
|
template <typename Key, class Comparator>
|
|
@@ -108,11 +108,7 @@ class SkipListRep : public MemTableRep {
|
|
|
108
108
|
|
|
109
109
|
uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
|
110
110
|
const Slice& end_ikey) override {
|
|
111
|
-
|
|
112
|
-
uint64_t start_count =
|
|
113
|
-
skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey));
|
|
114
|
-
uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey));
|
|
115
|
-
return (end_count >= start_count) ? (end_count - start_count) : 0;
|
|
111
|
+
return skip_list_.ApproximateNumEntries(start_ikey, end_ikey);
|
|
116
112
|
}
|
|
117
113
|
|
|
118
114
|
void UniqueRandomSample(const uint64_t num_entries,
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include "memtable/wbwi_memtable.h"
|
|
7
|
+
|
|
8
|
+
#include "db/memtable.h"
|
|
9
|
+
|
|
10
|
+
namespace ROCKSDB_NAMESPACE {
|
|
11
|
+
|
|
12
|
+
const std::unordered_map<WriteType, ValueType>
|
|
13
|
+
WBWIMemTableIterator::WriteTypeToValueTypeMap = {
|
|
14
|
+
{kPutRecord, kTypeValue},
|
|
15
|
+
{kMergeRecord, kTypeMerge},
|
|
16
|
+
{kDeleteRecord, kTypeDeletion},
|
|
17
|
+
{kSingleDeleteRecord, kTypeSingleDeletion},
|
|
18
|
+
{kDeleteRangeRecord, kTypeRangeDeletion},
|
|
19
|
+
{kPutEntityRecord, kTypeWideColumnEntity},
|
|
20
|
+
// Only the above record types are added to WBWI.
|
|
21
|
+
// kLogDataRecord, kXIDRecord, kUnknownRecord
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
InternalIterator* WBWIMemTable::NewIterator(
|
|
25
|
+
const ReadOptions&, UnownedPtr<const SeqnoToTimeMapping>, Arena* arena,
|
|
26
|
+
const SliceTransform* /* prefix_extractor */, bool for_flush) {
|
|
27
|
+
// Ingested WBWIMemTable should have an assigned seqno
|
|
28
|
+
assert(assigned_seqno_.upper_bound != kMaxSequenceNumber);
|
|
29
|
+
assert(assigned_seqno_.lower_bound != kMaxSequenceNumber);
|
|
30
|
+
assert(arena);
|
|
31
|
+
auto mem = arena->AllocateAligned(sizeof(WBWIMemTableIterator));
|
|
32
|
+
return new (mem) WBWIMemTableIterator(
|
|
33
|
+
std::unique_ptr<WBWIIterator>(wbwi_->NewIterator(cf_id_)),
|
|
34
|
+
assigned_seqno_, comparator_, for_flush);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
inline InternalIterator* WBWIMemTable::NewIterator() const {
|
|
38
|
+
assert(assigned_seqno_.upper_bound != kMaxSequenceNumber);
|
|
39
|
+
assert(assigned_seqno_.lower_bound != kMaxSequenceNumber);
|
|
40
|
+
return new WBWIMemTableIterator(
|
|
41
|
+
std::unique_ptr<WBWIIterator>(wbwi_->NewIterator(cf_id_)),
|
|
42
|
+
assigned_seqno_, comparator_, /*for_flush=*/false);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
bool WBWIMemTable::Get(const LookupKey& key, std::string* value,
|
|
46
|
+
PinnableWideColumns* columns, std::string* timestamp,
|
|
47
|
+
Status* s, MergeContext* merge_context,
|
|
48
|
+
SequenceNumber* max_covering_tombstone_seq,
|
|
49
|
+
SequenceNumber* out_seq, const ReadOptions&,
|
|
50
|
+
bool immutable_memtable, ReadCallback* callback,
|
|
51
|
+
bool* is_blob_index, bool do_merge) {
|
|
52
|
+
(void)immutable_memtable;
|
|
53
|
+
(void)timestamp;
|
|
54
|
+
(void)columns;
|
|
55
|
+
assert(immutable_memtable);
|
|
56
|
+
assert(!timestamp); // TODO: support UDT
|
|
57
|
+
assert(!columns); // TODO: support WideColumn
|
|
58
|
+
assert(assigned_seqno_.upper_bound != kMaxSequenceNumber);
|
|
59
|
+
assert(assigned_seqno_.lower_bound != kMaxSequenceNumber);
|
|
60
|
+
// WBWI does not support DeleteRange yet.
|
|
61
|
+
assert(!wbwi_->GetWriteBatch()->HasDeleteRange());
|
|
62
|
+
|
|
63
|
+
[[maybe_unused]] SequenceNumber read_seq =
|
|
64
|
+
GetInternalKeySeqno(key.internal_key());
|
|
65
|
+
std::unique_ptr<InternalIterator> iter{NewIterator()};
|
|
66
|
+
iter->Seek(key.internal_key());
|
|
67
|
+
const Slice lookup_user_key = key.user_key();
|
|
68
|
+
|
|
69
|
+
while (iter->Valid() && comparator_->EqualWithoutTimestamp(
|
|
70
|
+
ExtractUserKey(iter->key()), lookup_user_key)) {
|
|
71
|
+
uint64_t tag = ExtractInternalKeyFooter(iter->key());
|
|
72
|
+
ValueType type;
|
|
73
|
+
SequenceNumber seq;
|
|
74
|
+
UnPackSequenceAndType(tag, &seq, &type);
|
|
75
|
+
// Unsupported operations.
|
|
76
|
+
assert(type != kTypeBlobIndex);
|
|
77
|
+
assert(type != kTypeWideColumnEntity);
|
|
78
|
+
assert(type != kTypeValuePreferredSeqno);
|
|
79
|
+
assert(type != kTypeDeletionWithTimestamp);
|
|
80
|
+
assert(type != kTypeMerge);
|
|
81
|
+
if (!callback || callback->IsVisible(seq)) {
|
|
82
|
+
if (*out_seq == kMaxSequenceNumber) {
|
|
83
|
+
*out_seq = std::max(seq, *max_covering_tombstone_seq);
|
|
84
|
+
}
|
|
85
|
+
if (*max_covering_tombstone_seq > seq) {
|
|
86
|
+
type = kTypeRangeDeletion;
|
|
87
|
+
}
|
|
88
|
+
switch (type) {
|
|
89
|
+
case kTypeValue: {
|
|
90
|
+
HandleTypeValue(lookup_user_key, iter->value(), iter->IsValuePinned(),
|
|
91
|
+
do_merge, s->IsMergeInProgress(), merge_context,
|
|
92
|
+
moptions_.merge_operator, clock_,
|
|
93
|
+
moptions_.statistics, moptions_.info_log, s, value,
|
|
94
|
+
columns, is_blob_index);
|
|
95
|
+
assert(seq <= read_seq);
|
|
96
|
+
return /*found_final_value=*/true;
|
|
97
|
+
}
|
|
98
|
+
case kTypeDeletion:
|
|
99
|
+
case kTypeSingleDeletion:
|
|
100
|
+
case kTypeRangeDeletion: {
|
|
101
|
+
HandleTypeDeletion(lookup_user_key, s->IsMergeInProgress(),
|
|
102
|
+
merge_context, moptions_.merge_operator, clock_,
|
|
103
|
+
moptions_.statistics, moptions_.info_log, s, value,
|
|
104
|
+
columns);
|
|
105
|
+
assert(seq <= read_seq);
|
|
106
|
+
return /*found_final_value=*/true;
|
|
107
|
+
}
|
|
108
|
+
default: {
|
|
109
|
+
std::string msg("Unrecognized or unsupported value type: " +
|
|
110
|
+
std::to_string(static_cast<int>(type)) + ". ");
|
|
111
|
+
msg.append("User key: " +
|
|
112
|
+
ExtractUserKey(iter->key()).ToString(/*hex=*/true) + ". ");
|
|
113
|
+
msg.append("seq: " + std::to_string(seq) + ".");
|
|
114
|
+
*s = Status::Corruption(msg.c_str());
|
|
115
|
+
return /*found_final_value=*/true;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Current key not visible or we read a merge key
|
|
120
|
+
assert(s->IsMergeInProgress() || (callback && !callback->IsVisible(seq)));
|
|
121
|
+
iter->Next();
|
|
122
|
+
}
|
|
123
|
+
if (!iter->status().ok() &&
|
|
124
|
+
(s->ok() || s->IsMergeInProgress() || s->IsNotFound())) {
|
|
125
|
+
*s = iter->status();
|
|
126
|
+
// stop further look up
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
return /*found_final_value=*/false;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
void WBWIMemTable::MultiGet(const ReadOptions& read_options,
|
|
133
|
+
MultiGetRange* range, ReadCallback* callback,
|
|
134
|
+
bool immutable_memtable) {
|
|
135
|
+
(void)immutable_memtable;
|
|
136
|
+
// Should only be used as immutable memtable.
|
|
137
|
+
assert(immutable_memtable);
|
|
138
|
+
// TODO: reuse the InternalIterator created in Get().
|
|
139
|
+
for (auto iter = range->begin(); iter != range->end(); ++iter) {
|
|
140
|
+
SequenceNumber dummy_seq = 0;
|
|
141
|
+
bool found_final_value =
|
|
142
|
+
Get(*iter->lkey, iter->value ? iter->value->GetSelf() : nullptr,
|
|
143
|
+
iter->columns, iter->timestamp, iter->s, &(iter->merge_context),
|
|
144
|
+
&(iter->max_covering_tombstone_seq), &dummy_seq, read_options, true,
|
|
145
|
+
callback, nullptr, true);
|
|
146
|
+
if (found_final_value) {
|
|
147
|
+
if (iter->s->ok() || iter->s->IsNotFound()) {
|
|
148
|
+
if (iter->value) {
|
|
149
|
+
iter->value->PinSelf();
|
|
150
|
+
range->AddValueSize(iter->value->size());
|
|
151
|
+
} else {
|
|
152
|
+
assert(iter->columns);
|
|
153
|
+
range->AddValueSize(iter->columns->serialized_size());
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
range->MarkKeyDone(iter);
|
|
157
|
+
if (range->GetValueSize() > read_options.value_size_soft_limit) {
|
|
158
|
+
// Set all remaining keys in range to Abort
|
|
159
|
+
for (auto range_iter = range->begin(); range_iter != range->end();
|
|
160
|
+
++range_iter) {
|
|
161
|
+
range->MarkKeyDone(range_iter);
|
|
162
|
+
*(range_iter->s) = Status::Aborted();
|
|
163
|
+
}
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
} // namespace ROCKSDB_NAMESPACE
|