@nxtedition/rocksdb 10.1.4 → 10.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +16 -12
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
- package/deps/rocksdb/rocksdb/Makefile +38 -15
- package/deps/rocksdb/rocksdb/TARGETS +10 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
- package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
- package/deps/rocksdb/rocksdb/db/builder.h +1 -1
- package/deps/rocksdb/rocksdb/db/c.cc +205 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
- package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
- package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
- package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
- package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
- package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
- package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
- package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
- package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
- package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
- package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
- package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
- package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
- package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
- package/deps/rocksdb/rocksdb/env/env.cc +6 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
- package/deps/rocksdb/rocksdb/options/options.cc +3 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
- package/deps/rocksdb/rocksdb/table/format.cc +1 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
- package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/random.cc +2 -1
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
- package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
- package/index.js +1 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +25 -2
- package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
- package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
- package/.tap/test-results/test/batch-test.js.tap +0 -12
- package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
- package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
- package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
- package/.tap/test-results/test/column-test.js.tap +0 -55
- package/.tap/test-results/test/common.js.tap +0 -0
- package/.tap/test-results/test/compression-test.js.tap +0 -30
- package/.tap/test-results/test/db-identity.js.tap +0 -12
- package/.tap/test-results/test/electron.js.tap +0 -0
- package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
- package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
- package/.tap/test-results/test/gc.js.tap +0 -0
- package/.tap/test-results/test/getproperty-test.js.tap +0 -29
- package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
- package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
- package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
- package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
- package/.tap/test-results/test/iterator-test.js.tap +0 -6
- package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
- package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
- package/.tap/test-results/test/leak-tester.js.tap +0 -0
- package/.tap/test-results/test/lock-test.js.tap +0 -18
- package/.tap/test-results/test/lock.js.tap +0 -0
- package/.tap/test-results/test/make.js.tap +0 -0
- package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
- package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
- package/.tap/test-results/test/mkdir-test.js.tap +0 -15
- package/.tap/test-results/test/segfault-test.js.tap +0 -76
- package/.tap/test-results/test/stack-blower.js.tap +0 -0
- package/deps/rocksdb/rocksdb/README.md +0 -29
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
- package/tmp/000099.sst +0 -0
- package/tmp/000102.sst +0 -0
- package/tmp/000103.log +0 -0
- package/tmp/CURRENT +0 -1
- package/tmp/IDENTITY +0 -1
- package/tmp/LOCK +0 -0
- package/tmp/MANIFEST-000104 +0 -0
- package/tmp/OPTIONS-000098 +0 -207
- package/tmp/OPTIONS-000106 +0 -207
|
@@ -65,6 +65,7 @@
|
|
|
65
65
|
#include "port/lang.h"
|
|
66
66
|
#include "rocksdb/merge_operator.h"
|
|
67
67
|
#include "rocksdb/system_clock.h"
|
|
68
|
+
#include "util/aligned_storage.h"
|
|
68
69
|
#include "util/autovector.h"
|
|
69
70
|
#include "util/cast_util.h"
|
|
70
71
|
#include "util/coding.h"
|
|
@@ -1900,7 +1901,7 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
1900
1901
|
// Make creation optional but do not incur
|
|
1901
1902
|
// std::unique_ptr additional allocation
|
|
1902
1903
|
using MemPostInfoMap = std::map<MemTable*, MemTablePostProcessInfo>;
|
|
1903
|
-
using PostMapType =
|
|
1904
|
+
using PostMapType = aligned_storage<MemPostInfoMap>::type;
|
|
1904
1905
|
PostMapType mem_post_info_map_;
|
|
1905
1906
|
// current recovered transaction we are rebuilding (recovery)
|
|
1906
1907
|
WriteBatch* rebuilding_trx_;
|
|
@@ -1914,7 +1915,7 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
1914
1915
|
bool write_before_prepare_;
|
|
1915
1916
|
// Whether this batch was unprepared or not
|
|
1916
1917
|
bool unprepared_batch_;
|
|
1917
|
-
using DupDetector =
|
|
1918
|
+
using DupDetector = aligned_storage<DuplicateDetector>::type;
|
|
1918
1919
|
DupDetector duplicate_detector_;
|
|
1919
1920
|
bool dup_dectector_on_;
|
|
1920
1921
|
|
|
@@ -1922,7 +1923,7 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
1922
1923
|
bool hint_created_;
|
|
1923
1924
|
// Hints for this batch
|
|
1924
1925
|
using HintMap = std::unordered_map<MemTable*, void*>;
|
|
1925
|
-
using HintMapType =
|
|
1926
|
+
using HintMapType = aligned_storage<HintMap>::type;
|
|
1926
1927
|
HintMapType hint_;
|
|
1927
1928
|
|
|
1928
1929
|
HintMap& GetHintMap() {
|
|
@@ -2121,14 +2122,15 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2121
2122
|
return true;
|
|
2122
2123
|
}
|
|
2123
2124
|
|
|
2125
|
+
template <typename RebuildTxnOp>
|
|
2124
2126
|
Status PutCFImpl(uint32_t column_family_id, const Slice& key,
|
|
2125
2127
|
const Slice& value, ValueType value_type,
|
|
2128
|
+
RebuildTxnOp rebuild_txn_op,
|
|
2126
2129
|
const ProtectionInfoKVOS64* kv_prot_info) {
|
|
2127
2130
|
// optimize for non-recovery mode
|
|
2128
2131
|
if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
|
|
2129
2132
|
// TODO(ajkr): propagate `ProtectionInfoKVOS64`.
|
|
2130
|
-
return
|
|
2131
|
-
value);
|
|
2133
|
+
return rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
|
|
2132
2134
|
// else insert the values to the memtable right away
|
|
2133
2135
|
}
|
|
2134
2136
|
|
|
@@ -2139,8 +2141,8 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2139
2141
|
// The CF is probably flushed and hence no need for insert but we still
|
|
2140
2142
|
// need to keep track of the keys for upcoming rollback/commit.
|
|
2141
2143
|
// TODO(ajkr): propagate `ProtectionInfoKVOS64`.
|
|
2142
|
-
ret_status =
|
|
2143
|
-
|
|
2144
|
+
ret_status =
|
|
2145
|
+
rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
|
|
2144
2146
|
if (ret_status.ok()) {
|
|
2145
2147
|
MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
|
|
2146
2148
|
}
|
|
@@ -2264,8 +2266,8 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2264
2266
|
if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
|
|
2265
2267
|
assert(!write_after_commit_);
|
|
2266
2268
|
// TODO(ajkr): propagate `ProtectionInfoKVOS64`.
|
|
2267
|
-
ret_status =
|
|
2268
|
-
|
|
2269
|
+
ret_status =
|
|
2270
|
+
rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
|
|
2269
2271
|
}
|
|
2270
2272
|
return ret_status;
|
|
2271
2273
|
}
|
|
@@ -2274,15 +2276,21 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2274
2276
|
const Slice& value) override {
|
|
2275
2277
|
const auto* kv_prot_info = NextProtectionInfo();
|
|
2276
2278
|
Status ret_status;
|
|
2279
|
+
|
|
2280
|
+
auto rebuild_txn_op = [](WriteBatch* rebuilding_trx, uint32_t cf_id,
|
|
2281
|
+
const Slice& k, const Slice& v) -> Status {
|
|
2282
|
+
return WriteBatchInternal::Put(rebuilding_trx, cf_id, k, v);
|
|
2283
|
+
};
|
|
2284
|
+
|
|
2277
2285
|
if (kv_prot_info != nullptr) {
|
|
2278
2286
|
// Memtable needs seqno, doesn't need CF ID
|
|
2279
2287
|
auto mem_kv_prot_info =
|
|
2280
2288
|
kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
|
|
2281
2289
|
ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
|
|
2282
|
-
&mem_kv_prot_info);
|
|
2290
|
+
rebuild_txn_op, &mem_kv_prot_info);
|
|
2283
2291
|
} else {
|
|
2284
2292
|
ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
|
|
2285
|
-
nullptr /* kv_prot_info */);
|
|
2293
|
+
rebuild_txn_op, nullptr /* kv_prot_info */);
|
|
2286
2294
|
}
|
|
2287
2295
|
// TODO: this assumes that if TryAgain status is returned to the caller,
|
|
2288
2296
|
// the operation is actually tried again. The proper way to do this is to
|
|
@@ -2301,15 +2309,23 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2301
2309
|
std::string value_buf;
|
|
2302
2310
|
Slice packed_value =
|
|
2303
2311
|
PackValueAndWriteTime(value, unix_write_time, &value_buf);
|
|
2312
|
+
|
|
2313
|
+
auto rebuild_txn_op = [](WriteBatch* /* rebuilding_trx */,
|
|
2314
|
+
uint32_t /* cf_id */, const Slice& /* k */,
|
|
2315
|
+
const Slice& /* v */) -> Status {
|
|
2316
|
+
return Status::NotSupported();
|
|
2317
|
+
};
|
|
2318
|
+
|
|
2304
2319
|
if (kv_prot_info != nullptr) {
|
|
2305
2320
|
auto mem_kv_prot_info =
|
|
2306
2321
|
kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
|
|
2307
2322
|
ret_status = PutCFImpl(column_family_id, key, packed_value,
|
|
2308
|
-
kTypeValuePreferredSeqno,
|
|
2323
|
+
kTypeValuePreferredSeqno, rebuild_txn_op,
|
|
2324
|
+
&mem_kv_prot_info);
|
|
2309
2325
|
} else {
|
|
2310
|
-
ret_status =
|
|
2311
|
-
|
|
2312
|
-
|
|
2326
|
+
ret_status = PutCFImpl(column_family_id, key, packed_value,
|
|
2327
|
+
kTypeValuePreferredSeqno, rebuild_txn_op,
|
|
2328
|
+
nullptr /* kv_prot_info */);
|
|
2313
2329
|
}
|
|
2314
2330
|
|
|
2315
2331
|
// TODO: this assumes that if TryAgain status is returned to the caller,
|
|
@@ -2327,14 +2343,27 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2327
2343
|
const auto* kv_prot_info = NextProtectionInfo();
|
|
2328
2344
|
|
|
2329
2345
|
Status s;
|
|
2346
|
+
|
|
2347
|
+
auto rebuild_txn_op = [](WriteBatch* rebuilding_trx, uint32_t cf_id,
|
|
2348
|
+
const Slice& k, Slice entity) -> Status {
|
|
2349
|
+
WideColumns columns;
|
|
2350
|
+
const Status st = WideColumnSerialization::Deserialize(entity, columns);
|
|
2351
|
+
if (!st.ok()) {
|
|
2352
|
+
return st;
|
|
2353
|
+
}
|
|
2354
|
+
|
|
2355
|
+
return WriteBatchInternal::PutEntity(rebuilding_trx, cf_id, k, columns);
|
|
2356
|
+
};
|
|
2357
|
+
|
|
2330
2358
|
if (kv_prot_info) {
|
|
2331
2359
|
// Memtable needs seqno, doesn't need CF ID
|
|
2332
2360
|
auto mem_kv_prot_info =
|
|
2333
2361
|
kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
|
|
2334
2362
|
s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
|
|
2335
|
-
&mem_kv_prot_info);
|
|
2363
|
+
rebuild_txn_op, &mem_kv_prot_info);
|
|
2336
2364
|
} else {
|
|
2337
2365
|
s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
|
|
2366
|
+
rebuild_txn_op,
|
|
2338
2367
|
/* kv_prot_info */ nullptr);
|
|
2339
2368
|
}
|
|
2340
2369
|
|
|
@@ -2521,11 +2550,6 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2521
2550
|
assert(ret_status.ok());
|
|
2522
2551
|
|
|
2523
2552
|
if (db_ != nullptr) {
|
|
2524
|
-
if (db_->immutable_db_options().row_cache) {
|
|
2525
|
-
ret_status.PermitUncheckedError();
|
|
2526
|
-
return Status::NotSupported(
|
|
2527
|
-
"DeleteRange is not compatible with row cache.");
|
|
2528
|
-
}
|
|
2529
2553
|
auto cf_handle = cf_mems_->GetColumnFamilyHandle();
|
|
2530
2554
|
if (cf_handle == nullptr) {
|
|
2531
2555
|
cf_handle = db_->DefaultColumnFamily();
|
|
@@ -2778,16 +2802,23 @@ class MemTableInserter : public WriteBatch::Handler {
|
|
|
2778
2802
|
const Slice& value) override {
|
|
2779
2803
|
const auto* kv_prot_info = NextProtectionInfo();
|
|
2780
2804
|
Status ret_status;
|
|
2805
|
+
|
|
2806
|
+
auto rebuild_txn_op = [](WriteBatch* /* rebuilding_trx */,
|
|
2807
|
+
uint32_t /* cf_id */, const Slice& /* k */,
|
|
2808
|
+
const Slice& /* v */) -> Status {
|
|
2809
|
+
return Status::NotSupported();
|
|
2810
|
+
};
|
|
2811
|
+
|
|
2781
2812
|
if (kv_prot_info != nullptr) {
|
|
2782
2813
|
// Memtable needs seqno, doesn't need CF ID
|
|
2783
2814
|
auto mem_kv_prot_info =
|
|
2784
2815
|
kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
|
|
2785
2816
|
// Same as PutCF except for value type.
|
|
2786
2817
|
ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
|
|
2787
|
-
&mem_kv_prot_info);
|
|
2818
|
+
rebuild_txn_op, &mem_kv_prot_info);
|
|
2788
2819
|
} else {
|
|
2789
2820
|
ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
|
|
2790
|
-
nullptr /* kv_prot_info */);
|
|
2821
|
+
rebuild_txn_op, nullptr /* kv_prot_info */);
|
|
2791
2822
|
}
|
|
2792
2823
|
if (UNLIKELY(ret_status.IsTryAgain())) {
|
|
2793
2824
|
DecrementProtectionInfoIdxForTryAgain();
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
// This source code is licensed under both the GPLv2 (found in the
|
|
3
3
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
4
|
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
-
|
|
6
|
-
|
|
7
5
|
#include "db/write_callback.h"
|
|
8
6
|
|
|
9
7
|
#include <atomic>
|
|
@@ -15,6 +13,7 @@
|
|
|
15
13
|
#include "db/db_impl/db_impl.h"
|
|
16
14
|
#include "port/port.h"
|
|
17
15
|
#include "rocksdb/db.h"
|
|
16
|
+
#include "rocksdb/user_write_callback.h"
|
|
18
17
|
#include "rocksdb/write_batch.h"
|
|
19
18
|
#include "test_util/sync_point.h"
|
|
20
19
|
#include "test_util/testharness.h"
|
|
@@ -84,6 +83,28 @@ class MockWriteCallback : public WriteCallback {
|
|
|
84
83
|
bool AllowWriteBatching() override { return allow_batching_; }
|
|
85
84
|
};
|
|
86
85
|
|
|
86
|
+
class MockUserWriteCallback : public UserWriteCallback {
|
|
87
|
+
public:
|
|
88
|
+
std::atomic<bool> write_enqueued_{false};
|
|
89
|
+
std::atomic<bool> wal_write_done_{false};
|
|
90
|
+
|
|
91
|
+
MockUserWriteCallback() = default;
|
|
92
|
+
|
|
93
|
+
MockUserWriteCallback(const MockUserWriteCallback& other) {
|
|
94
|
+
write_enqueued_.store(other.write_enqueued_.load());
|
|
95
|
+
wal_write_done_.store(other.wal_write_done_.load());
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
void OnWriteEnqueued() override { write_enqueued_.store(true); }
|
|
99
|
+
|
|
100
|
+
void OnWalWriteFinish() override { wal_write_done_.store(true); }
|
|
101
|
+
|
|
102
|
+
void Reset() {
|
|
103
|
+
write_enqueued_.store(false);
|
|
104
|
+
wal_write_done_.store(false);
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
|
|
87
108
|
#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
|
88
109
|
class WriteCallbackPTest
|
|
89
110
|
: public WriteCallbackTest,
|
|
@@ -119,9 +140,11 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
|
|
|
119
140
|
kvs_.clear();
|
|
120
141
|
write_batch_.Clear();
|
|
121
142
|
callback_.was_called_.store(false);
|
|
143
|
+
user_write_cb_.Reset();
|
|
122
144
|
}
|
|
123
145
|
|
|
124
146
|
MockWriteCallback callback_;
|
|
147
|
+
MockUserWriteCallback user_write_cb_;
|
|
125
148
|
WriteBatch write_batch_;
|
|
126
149
|
std::vector<std::pair<string, string>> kvs_;
|
|
127
150
|
};
|
|
@@ -327,18 +350,26 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
|
|
|
327
350
|
ASSERT_OK(WriteBatchInternal::InsertNoop(&write_op.write_batch_));
|
|
328
351
|
const size_t ONE_BATCH = 1;
|
|
329
352
|
s = db_impl->WriteImpl(woptions, &write_op.write_batch_,
|
|
330
|
-
&write_op.callback_,
|
|
331
|
-
ONE_BATCH,
|
|
353
|
+
&write_op.callback_, &write_op.user_write_cb_,
|
|
354
|
+
nullptr, 0, false, nullptr, ONE_BATCH,
|
|
332
355
|
two_queues_ ? &publish_seq_callback : nullptr);
|
|
333
356
|
} else {
|
|
334
357
|
s = db_impl->WriteWithCallback(woptions, &write_op.write_batch_,
|
|
335
|
-
&write_op.callback_
|
|
358
|
+
&write_op.callback_,
|
|
359
|
+
&write_op.user_write_cb_);
|
|
336
360
|
}
|
|
337
361
|
|
|
362
|
+
ASSERT_TRUE(write_op.user_write_cb_.write_enqueued_.load());
|
|
338
363
|
if (write_op.callback_.should_fail_) {
|
|
339
364
|
ASSERT_TRUE(s.IsBusy());
|
|
365
|
+
ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
|
|
340
366
|
} else {
|
|
341
367
|
ASSERT_OK(s);
|
|
368
|
+
if (enable_WAL_) {
|
|
369
|
+
ASSERT_TRUE(write_op.user_write_cb_.wal_write_done_.load());
|
|
370
|
+
} else {
|
|
371
|
+
ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
|
|
372
|
+
}
|
|
342
373
|
}
|
|
343
374
|
};
|
|
344
375
|
|
|
@@ -440,6 +471,16 @@ TEST_F(WriteCallbackTest, WriteCallBackTest) {
|
|
|
440
471
|
ASSERT_OK(s);
|
|
441
472
|
ASSERT_EQ("value.a2", value);
|
|
442
473
|
|
|
474
|
+
MockUserWriteCallback user_write_cb;
|
|
475
|
+
WriteBatch wb4;
|
|
476
|
+
ASSERT_OK(wb4.Put("a", "value.a4"));
|
|
477
|
+
|
|
478
|
+
ASSERT_OK(db->WriteWithCallback(write_options, &wb4, &user_write_cb));
|
|
479
|
+
ASSERT_OK(db->Get(read_options, "a", &value));
|
|
480
|
+
ASSERT_EQ(value, "value.a4");
|
|
481
|
+
ASSERT_TRUE(user_write_cb.write_enqueued_.load());
|
|
482
|
+
ASSERT_TRUE(user_write_cb.wal_write_done_.load());
|
|
483
|
+
|
|
443
484
|
delete db;
|
|
444
485
|
ASSERT_OK(DestroyDB(dbname, options));
|
|
445
486
|
}
|
|
@@ -404,6 +404,8 @@ void WriteThread::JoinBatchGroup(Writer* w) {
|
|
|
404
404
|
|
|
405
405
|
bool linked_as_leader = LinkOne(w, &newest_writer_);
|
|
406
406
|
|
|
407
|
+
w->CheckWriteEnqueuedCallback();
|
|
408
|
+
|
|
407
409
|
if (linked_as_leader) {
|
|
408
410
|
SetState(w, STATE_GROUP_LEADER);
|
|
409
411
|
}
|
|
@@ -428,6 +430,7 @@ void WriteThread::JoinBatchGroup(Writer* w) {
|
|
|
428
430
|
TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:BeganWaiting", w);
|
|
429
431
|
AwaitState(w,
|
|
430
432
|
STATE_GROUP_LEADER | STATE_MEMTABLE_WRITER_LEADER |
|
|
433
|
+
STATE_PARALLEL_MEMTABLE_CALLER |
|
|
431
434
|
STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
|
|
432
435
|
&jbg_ctx);
|
|
433
436
|
TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w);
|
|
@@ -656,12 +659,57 @@ void WriteThread::ExitAsMemTableWriter(Writer* /*self*/,
|
|
|
656
659
|
SetState(leader, STATE_COMPLETED);
|
|
657
660
|
}
|
|
658
661
|
|
|
662
|
+
void WriteThread::SetMemWritersEachStride(Writer* w) {
|
|
663
|
+
WriteGroup* write_group = w->write_group;
|
|
664
|
+
Writer* last_writer = write_group->last_writer;
|
|
665
|
+
|
|
666
|
+
// The stride is the same for each writer in write_group, so w will
|
|
667
|
+
// call the writers with the same number in write_group mod total size
|
|
668
|
+
size_t stride = static_cast<size_t>(std::sqrt(write_group->size));
|
|
669
|
+
size_t count = 0;
|
|
670
|
+
while (w) {
|
|
671
|
+
if (count++ % stride == 0) {
|
|
672
|
+
SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
|
|
673
|
+
}
|
|
674
|
+
w = (w == last_writer) ? nullptr : w->link_newer;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
659
678
|
void WriteThread::LaunchParallelMemTableWriters(WriteGroup* write_group) {
|
|
660
679
|
assert(write_group != nullptr);
|
|
661
|
-
write_group->
|
|
662
|
-
|
|
663
|
-
|
|
680
|
+
size_t group_size = write_group->size;
|
|
681
|
+
write_group->running.store(group_size);
|
|
682
|
+
|
|
683
|
+
// The minimum number to allow the group use parallel caller mode.
|
|
684
|
+
// The number must no lower than 3;
|
|
685
|
+
const size_t MinParallelSize = 20;
|
|
686
|
+
|
|
687
|
+
// The group_size is too small, and there is no need to have
|
|
688
|
+
// the parallel partial callers.
|
|
689
|
+
if (group_size < MinParallelSize) {
|
|
690
|
+
for (auto w : *write_group) {
|
|
691
|
+
SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
|
|
692
|
+
}
|
|
693
|
+
return;
|
|
664
694
|
}
|
|
695
|
+
|
|
696
|
+
// The stride is equal to std::sqrt(group_size) which can minimize
|
|
697
|
+
// the total number of leader SetSate.
|
|
698
|
+
// Set the leader itself STATE_PARALLEL_MEMTABLE_WRITER, and set
|
|
699
|
+
// (stride-1) writers to be STATE_PARALLEL_MEMTABLE_CALLER.
|
|
700
|
+
size_t stride = static_cast<size_t>(std::sqrt(group_size));
|
|
701
|
+
auto w = write_group->leader;
|
|
702
|
+
SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
|
|
703
|
+
|
|
704
|
+
for (size_t i = 1; i < stride; i++) {
|
|
705
|
+
w = w->link_newer;
|
|
706
|
+
SetState(w, STATE_PARALLEL_MEMTABLE_CALLER);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// After setting all STATE_PARALLEL_MEMTABLE_CALLER, the leader also
|
|
710
|
+
// does the job as STATE_PARALLEL_MEMTABLE_CALLER.
|
|
711
|
+
w = w->link_newer;
|
|
712
|
+
SetMemWritersEachStride(w);
|
|
665
713
|
}
|
|
666
714
|
|
|
667
715
|
static WriteThread::AdaptationContext cpmtw_ctx(
|
|
@@ -788,8 +836,8 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
788
836
|
}
|
|
789
837
|
|
|
790
838
|
AwaitState(leader,
|
|
791
|
-
STATE_MEMTABLE_WRITER_LEADER |
|
|
792
|
-
STATE_COMPLETED,
|
|
839
|
+
STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_CALLER |
|
|
840
|
+
STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
|
|
793
841
|
&eabgl_ctx);
|
|
794
842
|
} else {
|
|
795
843
|
Writer* head = newest_writer_.load(std::memory_order_acquire);
|
|
@@ -22,7 +22,9 @@
|
|
|
22
22
|
#include "rocksdb/options.h"
|
|
23
23
|
#include "rocksdb/status.h"
|
|
24
24
|
#include "rocksdb/types.h"
|
|
25
|
+
#include "rocksdb/user_write_callback.h"
|
|
25
26
|
#include "rocksdb/write_batch.h"
|
|
27
|
+
#include "util/aligned_storage.h"
|
|
26
28
|
#include "util/autovector.h"
|
|
27
29
|
|
|
28
30
|
namespace ROCKSDB_NAMESPACE {
|
|
@@ -71,6 +73,12 @@ class WriteThread {
|
|
|
71
73
|
// A state indicating that the thread may be waiting using StateMutex()
|
|
72
74
|
// and StateCondVar()
|
|
73
75
|
STATE_LOCKED_WAITING = 32,
|
|
76
|
+
|
|
77
|
+
// The state used to inform a waiting writer that it has become a
|
|
78
|
+
// caller to call some other waiting writers to write to memtable
|
|
79
|
+
// by calling SetMemWritersEachStride. After doing
|
|
80
|
+
// this, it will also write to memtable.
|
|
81
|
+
STATE_PARALLEL_MEMTABLE_CALLER = 64,
|
|
74
82
|
};
|
|
75
83
|
|
|
76
84
|
struct Writer;
|
|
@@ -127,6 +135,7 @@ class WriteThread {
|
|
|
127
135
|
uint64_t log_used; // log number that this batch was inserted into
|
|
128
136
|
uint64_t log_ref; // log number that memtable insert should reference
|
|
129
137
|
WriteCallback* callback;
|
|
138
|
+
UserWriteCallback* user_write_cb;
|
|
130
139
|
bool made_waitable; // records lazy construction of mutex and cv
|
|
131
140
|
std::atomic<uint8_t> state; // write under StateMutex() or pre-link
|
|
132
141
|
WriteGroup* write_group;
|
|
@@ -134,8 +143,8 @@ class WriteThread {
|
|
|
134
143
|
Status status;
|
|
135
144
|
Status callback_status; // status returned by callback->Callback()
|
|
136
145
|
|
|
137
|
-
|
|
138
|
-
|
|
146
|
+
aligned_storage<std::mutex>::type state_mutex_bytes;
|
|
147
|
+
aligned_storage<std::condition_variable>::type state_cv_bytes;
|
|
139
148
|
Writer* link_older; // read/write only before linking, or as leader
|
|
140
149
|
Writer* link_newer; // lazy, read/write only before linking, or as leader
|
|
141
150
|
|
|
@@ -153,6 +162,7 @@ class WriteThread {
|
|
|
153
162
|
log_used(0),
|
|
154
163
|
log_ref(0),
|
|
155
164
|
callback(nullptr),
|
|
165
|
+
user_write_cb(nullptr),
|
|
156
166
|
made_waitable(false),
|
|
157
167
|
state(STATE_INIT),
|
|
158
168
|
write_group(nullptr),
|
|
@@ -161,8 +171,8 @@ class WriteThread {
|
|
|
161
171
|
link_newer(nullptr) {}
|
|
162
172
|
|
|
163
173
|
Writer(const WriteOptions& write_options, WriteBatch* _batch,
|
|
164
|
-
WriteCallback* _callback,
|
|
165
|
-
size_t _batch_cnt = 0,
|
|
174
|
+
WriteCallback* _callback, UserWriteCallback* _user_write_cb,
|
|
175
|
+
uint64_t _log_ref, bool _disable_memtable, size_t _batch_cnt = 0,
|
|
166
176
|
PreReleaseCallback* _pre_release_callback = nullptr,
|
|
167
177
|
PostMemTableCallback* _post_memtable_callback = nullptr)
|
|
168
178
|
: batch(_batch),
|
|
@@ -180,6 +190,7 @@ class WriteThread {
|
|
|
180
190
|
log_used(0),
|
|
181
191
|
log_ref(_log_ref),
|
|
182
192
|
callback(_callback),
|
|
193
|
+
user_write_cb(_user_write_cb),
|
|
183
194
|
made_waitable(false),
|
|
184
195
|
state(STATE_INIT),
|
|
185
196
|
write_group(nullptr),
|
|
@@ -203,6 +214,18 @@ class WriteThread {
|
|
|
203
214
|
return callback_status.ok();
|
|
204
215
|
}
|
|
205
216
|
|
|
217
|
+
void CheckWriteEnqueuedCallback() {
|
|
218
|
+
if (user_write_cb != nullptr) {
|
|
219
|
+
user_write_cb->OnWriteEnqueued();
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
void CheckPostWalWriteCallback() {
|
|
224
|
+
if (user_write_cb != nullptr) {
|
|
225
|
+
user_write_cb->OnWalWriteFinish();
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
206
229
|
void CreateMutex() {
|
|
207
230
|
if (!made_waitable) {
|
|
208
231
|
// Note that made_waitable is tracked separately from state
|
|
@@ -323,10 +346,19 @@ class WriteThread {
|
|
|
323
346
|
// Causes JoinBatchGroup to return STATE_PARALLEL_MEMTABLE_WRITER for all of
|
|
324
347
|
// the non-leader members of this write batch group. Sets Writer::sequence
|
|
325
348
|
// before waking them up.
|
|
349
|
+
// If the size of write_group n is not small, the leader will call n^0.5
|
|
350
|
+
// members to be PARALLEL_MEMTABLE_CALLER in the write_group to help to set
|
|
351
|
+
// other's status parallel. This ensures that the cost to call SetState
|
|
352
|
+
// sequentially does not exceed 2(n^0.5).
|
|
326
353
|
//
|
|
327
354
|
// WriteGroup* write_group: Extra state used to coordinate the parallel add
|
|
328
355
|
void LaunchParallelMemTableWriters(WriteGroup* write_group);
|
|
329
356
|
|
|
357
|
+
// One of the every stride=N number writer in the WriteGroup are set to the
|
|
358
|
+
// MemTableWriters, where N is equal to square of the total number of this
|
|
359
|
+
// write_group, and all of these MemTableWriters will write to memtable.
|
|
360
|
+
void SetMemWritersEachStride(Writer* w);
|
|
361
|
+
|
|
330
362
|
// Reports the completion of w's batch to the parallel group leader, and
|
|
331
363
|
// waits for the rest of the parallel batch to complete. Returns true
|
|
332
364
|
// if this thread is the last to complete, and hence should advance
|
|
@@ -590,6 +590,11 @@ class BatchedOpsStressTest : public StressTest {
|
|
|
590
590
|
// For half of the time, set the upper bound to the next prefix
|
|
591
591
|
ub_slices[i] = upper_bounds[i];
|
|
592
592
|
ro_copies[i].iterate_upper_bound = &(ub_slices[i]);
|
|
593
|
+
if (FLAGS_use_sqfc_for_range_queries) {
|
|
594
|
+
ro_copies[i].table_filter =
|
|
595
|
+
sqfc_factory_->GetTableFilterForRangeQuery(prefix_slices[i],
|
|
596
|
+
ub_slices[i]);
|
|
597
|
+
}
|
|
593
598
|
}
|
|
594
599
|
|
|
595
600
|
iters[i].reset(db_->NewIterator(ro_copies[i], cfh));
|
|
@@ -73,13 +73,13 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
73
73
|
status = db_->Write(write_opts, &batch);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
-
if (
|
|
76
|
+
if (status.ok()) {
|
|
77
|
+
auto num = static_cast<long>(rand_column_families.size());
|
|
78
|
+
thread->stats.AddBytesForWrites(num, (sz + 1) * num);
|
|
79
|
+
} else if (!IsErrorInjectedAndRetryable(status)) {
|
|
77
80
|
fprintf(stderr, "multi put or merge error: %s\n",
|
|
78
81
|
status.ToString().c_str());
|
|
79
82
|
thread->stats.AddErrors(1);
|
|
80
|
-
} else {
|
|
81
|
-
auto num = static_cast<long>(rand_column_families.size());
|
|
82
|
-
thread->stats.AddBytesForWrites(num, (sz + 1) * num);
|
|
83
83
|
}
|
|
84
84
|
|
|
85
85
|
return status;
|
|
@@ -96,11 +96,11 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
96
96
|
batch.Delete(cfh, key);
|
|
97
97
|
}
|
|
98
98
|
Status s = db_->Write(write_opts, &batch);
|
|
99
|
-
if (
|
|
99
|
+
if (s.ok()) {
|
|
100
|
+
thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
|
|
101
|
+
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
100
102
|
fprintf(stderr, "multidel error: %s\n", s.ToString().c_str());
|
|
101
103
|
thread->stats.AddErrors(1);
|
|
102
|
-
} else {
|
|
103
|
-
thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
|
|
104
104
|
}
|
|
105
105
|
return s;
|
|
106
106
|
}
|
|
@@ -125,12 +125,12 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
125
125
|
batch.DeleteRange(cfh, key, end_key);
|
|
126
126
|
}
|
|
127
127
|
Status s = db_->Write(write_opts, &batch);
|
|
128
|
-
if (
|
|
129
|
-
fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
|
|
130
|
-
thread->stats.AddErrors(1);
|
|
131
|
-
} else {
|
|
128
|
+
if (s.ok()) {
|
|
132
129
|
thread->stats.AddRangeDeletions(
|
|
133
130
|
static_cast<long>(rand_column_families.size()));
|
|
131
|
+
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
132
|
+
fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
|
|
133
|
+
thread->stats.AddErrors(1);
|
|
134
134
|
}
|
|
135
135
|
return s;
|
|
136
136
|
}
|
|
@@ -170,6 +170,15 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
170
170
|
std::string value0;
|
|
171
171
|
s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]],
|
|
172
172
|
key, &value0);
|
|
173
|
+
|
|
174
|
+
// Temporarily disable error injection for verification
|
|
175
|
+
if (fault_fs_guard) {
|
|
176
|
+
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
177
|
+
FaultInjectionIOType::kRead);
|
|
178
|
+
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
179
|
+
FaultInjectionIOType::kMetadataRead);
|
|
180
|
+
}
|
|
181
|
+
|
|
173
182
|
if (s.ok() || s.IsNotFound()) {
|
|
174
183
|
bool found = s.ok();
|
|
175
184
|
for (size_t i = 1; i < rand_column_families.size(); i++) {
|
|
@@ -214,6 +223,13 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
214
223
|
}
|
|
215
224
|
}
|
|
216
225
|
|
|
226
|
+
// Enable back error injection disabled for verification
|
|
227
|
+
if (fault_fs_guard) {
|
|
228
|
+
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
229
|
+
FaultInjectionIOType::kRead);
|
|
230
|
+
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
231
|
+
FaultInjectionIOType::kMetadataRead);
|
|
232
|
+
}
|
|
217
233
|
db_->ReleaseSnapshot(snapshot);
|
|
218
234
|
}
|
|
219
235
|
if (!is_consistent) {
|
|
@@ -225,7 +241,7 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
225
241
|
thread->stats.AddGets(1, 1);
|
|
226
242
|
} else if (s.IsNotFound()) {
|
|
227
243
|
thread->stats.AddGets(1, 0);
|
|
228
|
-
} else {
|
|
244
|
+
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
229
245
|
fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str());
|
|
230
246
|
thread->stats.AddErrors(1);
|
|
231
247
|
}
|
|
@@ -261,7 +277,7 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
261
277
|
} else if (s.IsNotFound()) {
|
|
262
278
|
// not found case
|
|
263
279
|
thread->stats.AddGets(1, 0);
|
|
264
|
-
} else {
|
|
280
|
+
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
265
281
|
// errors case
|
|
266
282
|
fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str());
|
|
267
283
|
thread->stats.AddErrors(1);
|
|
@@ -323,6 +339,14 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
323
339
|
column_families_[rand_column_families[0]], key,
|
|
324
340
|
&cmp_result);
|
|
325
341
|
|
|
342
|
+
// Temporarily disable error injection for verification
|
|
343
|
+
if (fault_fs_guard) {
|
|
344
|
+
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
345
|
+
FaultInjectionIOType::kRead);
|
|
346
|
+
fault_fs_guard->DisableThreadLocalErrorInjection(
|
|
347
|
+
FaultInjectionIOType::kMetadataRead);
|
|
348
|
+
}
|
|
349
|
+
|
|
326
350
|
if (s.ok() || s.IsNotFound()) {
|
|
327
351
|
const bool cmp_found = s.ok();
|
|
328
352
|
|
|
@@ -458,6 +482,14 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
458
482
|
}
|
|
459
483
|
}
|
|
460
484
|
}
|
|
485
|
+
|
|
486
|
+
// Enable back error injection disabled for verification
|
|
487
|
+
if (fault_fs_guard) {
|
|
488
|
+
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
489
|
+
FaultInjectionIOType::kRead);
|
|
490
|
+
fault_fs_guard->EnableThreadLocalErrorInjection(
|
|
491
|
+
FaultInjectionIOType::kMetadataRead);
|
|
492
|
+
}
|
|
461
493
|
}
|
|
462
494
|
|
|
463
495
|
if (!is_consistent) {
|
|
@@ -469,7 +501,7 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
469
501
|
thread->stats.AddGets(1, 1);
|
|
470
502
|
} else if (s.IsNotFound()) {
|
|
471
503
|
thread->stats.AddGets(1, 0);
|
|
472
|
-
} else {
|
|
504
|
+
} else if (!IsErrorInjectedAndRetryable(s)) {
|
|
473
505
|
fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str());
|
|
474
506
|
thread->stats.AddErrors(1);
|
|
475
507
|
}
|
|
@@ -540,7 +572,9 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
540
572
|
for (size_t j = 0; j < num_cfs; ++j) {
|
|
541
573
|
const Status& s = result[j].status();
|
|
542
574
|
const WideColumns& columns = result[j].columns();
|
|
543
|
-
if (!s.ok() &&
|
|
575
|
+
if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
|
|
576
|
+
break;
|
|
577
|
+
} else if (!s.ok() && !s.IsNotFound()) {
|
|
544
578
|
fprintf(stderr, "TestMultiGetEntity (AttributeGroup) error: %s\n",
|
|
545
579
|
s.ToString().c_str());
|
|
546
580
|
thread->stats.AddErrors(1);
|
|
@@ -645,7 +679,9 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
645
679
|
const Status& s = statuses[j];
|
|
646
680
|
const WideColumns& columns = results[j].columns();
|
|
647
681
|
|
|
648
|
-
if (!s.ok() &&
|
|
682
|
+
if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
|
|
683
|
+
break;
|
|
684
|
+
} else if (!s.ok() && !s.IsNotFound()) {
|
|
649
685
|
fprintf(stderr, "TestMultiGetEntity error: %s\n",
|
|
650
686
|
s.ToString().c_str());
|
|
651
687
|
thread->stats.AddErrors(1);
|
|
@@ -746,6 +782,10 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
746
782
|
if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) {
|
|
747
783
|
ub_slice = Slice(upper_bound);
|
|
748
784
|
ro_copy.iterate_upper_bound = &ub_slice;
|
|
785
|
+
if (FLAGS_use_sqfc_for_range_queries) {
|
|
786
|
+
ro_copy.table_filter =
|
|
787
|
+
sqfc_factory_->GetTableFilterForRangeQuery(prefix, ub_slice);
|
|
788
|
+
}
|
|
749
789
|
}
|
|
750
790
|
|
|
751
791
|
ColumnFamilyHandle* const cfh =
|
|
@@ -776,7 +816,7 @@ class CfConsistencyStressTest : public StressTest {
|
|
|
776
816
|
s = iter->status();
|
|
777
817
|
}
|
|
778
818
|
|
|
779
|
-
if (!s.ok()) {
|
|
819
|
+
if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
|
|
780
820
|
fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
|
|
781
821
|
thread->stats.AddErrors(1);
|
|
782
822
|
|