@nxtedition/rocksdb 7.1.2 → 7.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +26 -0
- package/deps/rocksdb/iostats.patch +19 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -40
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +103 -28
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +33 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +177 -38
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +125 -71
- package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +36 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +50 -52
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +41 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +17 -8
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
- package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
- package/deps/rocksdb/rocksdb/db/db_test2.cc +18 -7
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
- package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/memtable.cc +49 -14
- package/deps/rocksdb/rocksdb/db/memtable.h +60 -14
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
- package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
- package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +8 -14
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
- package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
- package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
- package/deps/rocksdb/rocksdb/db/version_set.cc +34 -10
- package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
- package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +6 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
- package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
- package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
- package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
- package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -3
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -5
- package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -1
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
- package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +50 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +7 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -1
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
- package/index.js +7 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
|
@@ -4,8 +4,10 @@
|
|
|
4
4
|
// (found in the LICENSE.Apache file in the root directory).
|
|
5
5
|
|
|
6
6
|
#include "db/write_thread.h"
|
|
7
|
+
|
|
7
8
|
#include <chrono>
|
|
8
9
|
#include <thread>
|
|
10
|
+
|
|
9
11
|
#include "db/column_family.h"
|
|
10
12
|
#include "monitoring/perf_context_imp.h"
|
|
11
13
|
#include "port/port.h"
|
|
@@ -293,17 +295,6 @@ void WriteThread::CreateMissingNewerLinks(Writer* head) {
|
|
|
293
295
|
}
|
|
294
296
|
}
|
|
295
297
|
|
|
296
|
-
WriteThread::Writer* WriteThread::FindNextLeader(Writer* from,
|
|
297
|
-
Writer* boundary) {
|
|
298
|
-
assert(from != nullptr && from != boundary);
|
|
299
|
-
Writer* current = from;
|
|
300
|
-
while (current->link_older != boundary) {
|
|
301
|
-
current = current->link_older;
|
|
302
|
-
assert(current != nullptr);
|
|
303
|
-
}
|
|
304
|
-
return current;
|
|
305
|
-
}
|
|
306
|
-
|
|
307
298
|
void WriteThread::CompleteLeader(WriteGroup& write_group) {
|
|
308
299
|
assert(write_group.size > 0);
|
|
309
300
|
Writer* leader = write_group.leader;
|
|
@@ -640,6 +631,9 @@ void WriteThread::ExitAsBatchGroupFollower(Writer* w) {
|
|
|
640
631
|
static WriteThread::AdaptationContext eabgl_ctx("ExitAsBatchGroupLeader");
|
|
641
632
|
void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
642
633
|
Status& status) {
|
|
634
|
+
TEST_SYNC_POINT_CALLBACK("WriteThread::ExitAsBatchGroupLeader:Start",
|
|
635
|
+
&write_group);
|
|
636
|
+
|
|
643
637
|
Writer* leader = write_group.leader;
|
|
644
638
|
Writer* last_writer = write_group.last_writer;
|
|
645
639
|
assert(leader->link_older == nullptr);
|
|
@@ -656,7 +650,36 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
656
650
|
}
|
|
657
651
|
|
|
658
652
|
if (enable_pipelined_write_) {
|
|
659
|
-
//
|
|
653
|
+
// We insert a dummy Writer right before our current write_group. This
|
|
654
|
+
// allows us to unlink our write_group without the risk that a subsequent
|
|
655
|
+
// writer becomes a new leader and might overtake us and add itself to the
|
|
656
|
+
// memtable-writer-list before we can do so. This ensures that writers are
|
|
657
|
+
// added to the memtable-writer-list in the exact same order in which they
|
|
658
|
+
// were in the newest_writer list.
|
|
659
|
+
// This must happen before completing the writers from our group to prevent
|
|
660
|
+
// a race where the owning thread of one of these writers can start a new
|
|
661
|
+
// write operation.
|
|
662
|
+
Writer dummy;
|
|
663
|
+
Writer* head = newest_writer_.load(std::memory_order_acquire);
|
|
664
|
+
if (head != last_writer ||
|
|
665
|
+
!newest_writer_.compare_exchange_strong(head, &dummy)) {
|
|
666
|
+
// Either last_writer wasn't the head during the load(), or it was the
|
|
667
|
+
// head during the load() but somebody else pushed onto the list before
|
|
668
|
+
// we did the compare_exchange_strong (causing it to fail). In the latter
|
|
669
|
+
// case compare_exchange_strong has the effect of re-reading its first
|
|
670
|
+
// param (head). No need to retry a failing CAS, because only a departing
|
|
671
|
+
// leader (which we are at the moment) can remove nodes from the list.
|
|
672
|
+
assert(head != last_writer);
|
|
673
|
+
|
|
674
|
+
// After walking link_older starting from head (if not already done) we
|
|
675
|
+
// will be able to traverse w->link_newer below.
|
|
676
|
+
CreateMissingNewerLinks(head);
|
|
677
|
+
assert(last_writer->link_newer != nullptr);
|
|
678
|
+
last_writer->link_newer->link_older = &dummy;
|
|
679
|
+
dummy.link_newer = last_writer->link_newer;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// Complete writers that don't write to memtable
|
|
660
683
|
for (Writer* w = last_writer; w != leader;) {
|
|
661
684
|
Writer* next = w->link_older;
|
|
662
685
|
w->status = status;
|
|
@@ -669,23 +692,11 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
669
692
|
CompleteLeader(write_group);
|
|
670
693
|
}
|
|
671
694
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
// pending writers, place a dummy writer at the tail of the queue
|
|
676
|
-
// so we know the boundary of the current write group.
|
|
677
|
-
Writer dummy;
|
|
678
|
-
Writer* expected = last_writer;
|
|
679
|
-
bool has_dummy = newest_writer_.compare_exchange_strong(expected, &dummy);
|
|
680
|
-
if (!has_dummy) {
|
|
681
|
-
// We find at least one pending writer when we insert dummy. We search
|
|
682
|
-
// for next leader from there.
|
|
683
|
-
next_leader = FindNextLeader(expected, last_writer);
|
|
684
|
-
assert(next_leader != nullptr && next_leader != last_writer);
|
|
685
|
-
}
|
|
695
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
696
|
+
"WriteThread::ExitAsBatchGroupLeader:AfterCompleteWriters",
|
|
697
|
+
&write_group);
|
|
686
698
|
|
|
687
|
-
// Link the
|
|
688
|
-
//
|
|
699
|
+
// Link the remaining of the group to memtable writer list.
|
|
689
700
|
// We have to link our group to memtable writer queue before wake up the
|
|
690
701
|
// next leader or set newest_writer_ to null, otherwise the next leader
|
|
691
702
|
// can run ahead of us and link to memtable writer queue before we do.
|
|
@@ -696,24 +707,17 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
696
707
|
}
|
|
697
708
|
}
|
|
698
709
|
|
|
699
|
-
//
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
next_leader = FindNextLeader(expected, &dummy);
|
|
709
|
-
assert(next_leader != nullptr && next_leader != &dummy);
|
|
710
|
-
}
|
|
710
|
+
// Unlink the dummy writer from the list and identify the new leader
|
|
711
|
+
head = newest_writer_.load(std::memory_order_acquire);
|
|
712
|
+
if (head != &dummy ||
|
|
713
|
+
!newest_writer_.compare_exchange_strong(head, nullptr)) {
|
|
714
|
+
CreateMissingNewerLinks(head);
|
|
715
|
+
Writer* new_leader = dummy.link_newer;
|
|
716
|
+
assert(new_leader != nullptr);
|
|
717
|
+
new_leader->link_older = nullptr;
|
|
718
|
+
SetState(new_leader, STATE_GROUP_LEADER);
|
|
711
719
|
}
|
|
712
720
|
|
|
713
|
-
if (next_leader != nullptr) {
|
|
714
|
-
next_leader->link_older = nullptr;
|
|
715
|
-
SetState(next_leader, STATE_GROUP_LEADER);
|
|
716
|
-
}
|
|
717
721
|
AwaitState(leader, STATE_MEMTABLE_WRITER_LEADER |
|
|
718
722
|
STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
|
|
719
723
|
&eabgl_ctx);
|
|
@@ -721,8 +725,8 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
721
725
|
Writer* head = newest_writer_.load(std::memory_order_acquire);
|
|
722
726
|
if (head != last_writer ||
|
|
723
727
|
!newest_writer_.compare_exchange_strong(head, nullptr)) {
|
|
724
|
-
// Either
|
|
725
|
-
// during the load() but somebody else pushed onto the list before
|
|
728
|
+
// Either last_writer wasn't the head during the load(), or it was the
|
|
729
|
+
// head during the load() but somebody else pushed onto the list before
|
|
726
730
|
// we did the compare_exchange_strong (causing it to fail). In the
|
|
727
731
|
// latter case compare_exchange_strong has the effect of re-reading
|
|
728
732
|
// its first param (head). No need to retry a failing CAS, because
|
|
@@ -738,6 +742,7 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
|
|
|
738
742
|
// to MarkJoined, so we can definitely conclude that no other leader
|
|
739
743
|
// work is going on here (with or without db mutex).
|
|
740
744
|
CreateMissingNewerLinks(head);
|
|
745
|
+
assert(last_writer->link_newer != nullptr);
|
|
741
746
|
assert(last_writer->link_newer->link_older == last_writer);
|
|
742
747
|
last_writer->link_newer->link_older = nullptr;
|
|
743
748
|
|
|
@@ -428,10 +428,6 @@ class WriteThread {
|
|
|
428
428
|
// concurrently with itself.
|
|
429
429
|
void CreateMissingNewerLinks(Writer* head);
|
|
430
430
|
|
|
431
|
-
// Starting from a pending writer, follow link_older to search for next
|
|
432
|
-
// leader, until we hit boundary.
|
|
433
|
-
Writer* FindNextLeader(Writer* pending_writer, Writer* boundary);
|
|
434
|
-
|
|
435
431
|
// Set the leader in write_group to completed state and remove it from the
|
|
436
432
|
// write group.
|
|
437
433
|
void CompleteLeader(WriteGroup& write_group);
|
|
@@ -307,6 +307,10 @@ DECLARE_int32(create_timestamped_snapshot_one_in);
|
|
|
307
307
|
|
|
308
308
|
DECLARE_bool(allow_data_in_errors);
|
|
309
309
|
|
|
310
|
+
// Tiered storage
|
|
311
|
+
DECLARE_bool(enable_tiered_storage); // set last_level_temperature
|
|
312
|
+
DECLARE_int64(preclude_last_level_data_seconds);
|
|
313
|
+
|
|
310
314
|
constexpr long KB = 1024;
|
|
311
315
|
constexpr int kRandomValueMaxFactor = 3;
|
|
312
316
|
constexpr int kValueMaxLen = 100;
|
|
@@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0,
|
|
|
483
483
|
"[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
|
|
484
484
|
"to disable and 1 to insert during flush.");
|
|
485
485
|
|
|
486
|
+
DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature");
|
|
487
|
+
|
|
488
|
+
DEFINE_int64(preclude_last_level_data_seconds, 0,
|
|
489
|
+
"Preclude data from the last level. Used with tiered storage "
|
|
490
|
+
"feature to preclude new data from comacting to the last level.");
|
|
491
|
+
|
|
486
492
|
static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
|
|
487
493
|
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
|
|
488
494
|
|
|
@@ -3063,6 +3063,12 @@ void InitializeOptionsFromFlags(
|
|
|
3063
3063
|
options.wal_compression =
|
|
3064
3064
|
StringToCompressionType(FLAGS_wal_compression.c_str());
|
|
3065
3065
|
|
|
3066
|
+
if (FLAGS_enable_tiered_storage) {
|
|
3067
|
+
options.bottommost_temperature = Temperature::kCold;
|
|
3068
|
+
}
|
|
3069
|
+
options.preclude_last_level_data_seconds =
|
|
3070
|
+
FLAGS_preclude_last_level_data_seconds;
|
|
3071
|
+
|
|
3066
3072
|
switch (FLAGS_rep_factory) {
|
|
3067
3073
|
case kSkipList:
|
|
3068
3074
|
// no need to do anything
|
|
@@ -55,10 +55,10 @@
|
|
|
55
55
|
|
|
56
56
|
#include "env/composite_env_wrapper.h"
|
|
57
57
|
#include "env/io_posix.h"
|
|
58
|
-
#include "logging/posix_logger.h"
|
|
59
58
|
#include "monitoring/iostats_context_imp.h"
|
|
60
59
|
#include "monitoring/thread_status_updater.h"
|
|
61
60
|
#include "port/port.h"
|
|
61
|
+
#include "port/sys_time.h"
|
|
62
62
|
#include "rocksdb/env.h"
|
|
63
63
|
#include "rocksdb/options.h"
|
|
64
64
|
#include "rocksdb/slice.h"
|
|
@@ -1078,11 +1078,20 @@ class IoctlFriendlyTmpdir {
|
|
|
1078
1078
|
}
|
|
1079
1079
|
}
|
|
1080
1080
|
|
|
1081
|
+
// check if it's running test within a docker container, in which case, the
|
|
1082
|
+
// file system inside `overlayfs` may not support FS_IOC_GETVERSION
|
|
1083
|
+
// skip the tests
|
|
1084
|
+
struct stat buffer;
|
|
1085
|
+
if (stat("/.dockerenv", &buffer) == 0) {
|
|
1086
|
+
is_supported_ = false;
|
|
1087
|
+
return;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1081
1090
|
fprintf(stderr, "failed to find an ioctl-friendly temporary directory;"
|
|
1082
1091
|
" specify one via the TEST_IOCTL_FRIENDLY_TMPDIR envvar\n");
|
|
1083
1092
|
std::abort();
|
|
1084
1093
|
#endif
|
|
1085
|
-
}
|
|
1094
|
+
}
|
|
1086
1095
|
|
|
1087
1096
|
~IoctlFriendlyTmpdir() {
|
|
1088
1097
|
rmdir(dir_.c_str());
|
|
@@ -1092,8 +1101,12 @@ class IoctlFriendlyTmpdir {
|
|
|
1092
1101
|
return dir_;
|
|
1093
1102
|
}
|
|
1094
1103
|
|
|
1104
|
+
bool is_supported() const { return is_supported_; }
|
|
1105
|
+
|
|
1095
1106
|
private:
|
|
1096
1107
|
std::string dir_;
|
|
1108
|
+
|
|
1109
|
+
bool is_supported_ = true;
|
|
1097
1110
|
};
|
|
1098
1111
|
|
|
1099
1112
|
#ifndef ROCKSDB_LITE
|
|
@@ -1102,8 +1115,10 @@ TEST_F(EnvPosixTest, PositionedAppend) {
|
|
|
1102
1115
|
EnvOptions options;
|
|
1103
1116
|
options.use_direct_writes = true;
|
|
1104
1117
|
options.use_mmap_writes = false;
|
|
1105
|
-
|
|
1106
|
-
|
|
1118
|
+
std::string fname = test::PerThreadDBPath(env_, "positioned_append");
|
|
1119
|
+
SetupSyncPointsToMockDirectIO();
|
|
1120
|
+
|
|
1121
|
+
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, options));
|
|
1107
1122
|
const size_t kBlockSize = 4096;
|
|
1108
1123
|
const size_t kDataSize = kPageSize;
|
|
1109
1124
|
// Write a page worth of 'a'
|
|
@@ -1119,7 +1134,7 @@ TEST_F(EnvPosixTest, PositionedAppend) {
|
|
|
1119
1134
|
|
|
1120
1135
|
// Verify the above
|
|
1121
1136
|
std::unique_ptr<SequentialFile> seq_file;
|
|
1122
|
-
ASSERT_OK(env_->NewSequentialFile(
|
|
1137
|
+
ASSERT_OK(env_->NewSequentialFile(fname, &seq_file, options));
|
|
1123
1138
|
size_t scratch_len = kPageSize * 2;
|
|
1124
1139
|
std::unique_ptr<char[]> scratch(new char[scratch_len]);
|
|
1125
1140
|
Slice result;
|
|
@@ -1139,6 +1154,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
|
|
|
1139
1154
|
EnvOptions soptions;
|
|
1140
1155
|
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
|
|
1141
1156
|
IoctlFriendlyTmpdir ift;
|
|
1157
|
+
if (!ift.is_supported()) {
|
|
1158
|
+
ROCKSDB_GTEST_BYPASS(
|
|
1159
|
+
"FS_IOC_GETVERSION is not supported by the filesystem");
|
|
1160
|
+
return;
|
|
1161
|
+
}
|
|
1142
1162
|
std::string fname = ift.name() + "/testfile";
|
|
1143
1163
|
std::unique_ptr<WritableFile> wfile;
|
|
1144
1164
|
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
|
|
@@ -1181,13 +1201,13 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
|
|
|
1181
1201
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
|
1182
1202
|
TEST_P(EnvPosixTestWithParam, AllocateTest) {
|
|
1183
1203
|
if (env_ == Env::Default()) {
|
|
1184
|
-
|
|
1185
|
-
std::string fname =
|
|
1186
|
-
|
|
1204
|
+
SetupSyncPointsToMockDirectIO();
|
|
1205
|
+
std::string fname = test::PerThreadDBPath(env_, "preallocate_testfile");
|
|
1187
1206
|
// Try fallocate in a file to see whether the target file system supports
|
|
1188
1207
|
// it.
|
|
1189
1208
|
// Skip the test if fallocate is not supported.
|
|
1190
|
-
std::string fname_test_fallocate =
|
|
1209
|
+
std::string fname_test_fallocate =
|
|
1210
|
+
test::PerThreadDBPath(env_, "preallocate_testfile_2");
|
|
1191
1211
|
int fd = -1;
|
|
1192
1212
|
do {
|
|
1193
1213
|
fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
|
|
@@ -1277,6 +1297,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) {
|
|
|
1277
1297
|
|
|
1278
1298
|
// Create the files
|
|
1279
1299
|
IoctlFriendlyTmpdir ift;
|
|
1300
|
+
if (!ift.is_supported()) {
|
|
1301
|
+
ROCKSDB_GTEST_BYPASS(
|
|
1302
|
+
"FS_IOC_GETVERSION is not supported by the filesystem");
|
|
1303
|
+
return;
|
|
1304
|
+
}
|
|
1280
1305
|
std::vector<std::string> fnames;
|
|
1281
1306
|
for (int i = 0; i < 1000; ++i) {
|
|
1282
1307
|
fnames.push_back(ift.name() + "/" + "testfile" + std::to_string(i));
|
|
@@ -1318,6 +1343,11 @@ TEST_P(EnvPosixTestWithParam, DISABLED_RandomAccessUniqueIDDeletes) {
|
|
|
1318
1343
|
soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
|
|
1319
1344
|
|
|
1320
1345
|
IoctlFriendlyTmpdir ift;
|
|
1346
|
+
if (!ift.is_supported()) {
|
|
1347
|
+
ROCKSDB_GTEST_BYPASS(
|
|
1348
|
+
"FS_IOC_GETVERSION is not supported by the filesystem");
|
|
1349
|
+
return;
|
|
1350
|
+
}
|
|
1321
1351
|
std::string fname = ift.name() + "/" + "testfile";
|
|
1322
1352
|
|
|
1323
1353
|
// Check that after file is deleted we don't get same ID again in a new
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include "env/env_encryption_ctr.h"
|
|
11
11
|
#include "env/fs_readonly.h"
|
|
12
12
|
#include "env/mock_env.h"
|
|
13
|
+
#include "logging/env_logger.h"
|
|
13
14
|
#include "options/db_options.h"
|
|
14
15
|
#include "rocksdb/convenience.h"
|
|
15
16
|
#include "rocksdb/utilities/customizable_util.h"
|
|
@@ -115,6 +116,25 @@ IOStatus FileSystem::ReuseWritableFile(const std::string& fname,
|
|
|
115
116
|
return NewWritableFile(fname, opts, result, dbg);
|
|
116
117
|
}
|
|
117
118
|
|
|
119
|
+
IOStatus FileSystem::NewLogger(const std::string& fname,
|
|
120
|
+
const IOOptions& io_opts,
|
|
121
|
+
std::shared_ptr<Logger>* result,
|
|
122
|
+
IODebugContext* dbg) {
|
|
123
|
+
FileOptions options;
|
|
124
|
+
options.io_options = io_opts;
|
|
125
|
+
// TODO: Tune the buffer size.
|
|
126
|
+
options.writable_file_max_buffer_size = 1024 * 1024;
|
|
127
|
+
std::unique_ptr<FSWritableFile> writable_file;
|
|
128
|
+
const IOStatus status = NewWritableFile(fname, options, &writable_file, dbg);
|
|
129
|
+
if (!status.ok()) {
|
|
130
|
+
return status;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
*result = std::make_shared<EnvLogger>(std::move(writable_file), fname,
|
|
134
|
+
options, Env::Default());
|
|
135
|
+
return IOStatus::OK();
|
|
136
|
+
}
|
|
137
|
+
|
|
118
138
|
FileOptions FileSystem::OptimizeForLogRead(
|
|
119
139
|
const FileOptions& file_options) const {
|
|
120
140
|
FileOptions optimized_file_options(file_options);
|
|
@@ -48,7 +48,6 @@
|
|
|
48
48
|
|
|
49
49
|
#include "env/composite_env_wrapper.h"
|
|
50
50
|
#include "env/io_posix.h"
|
|
51
|
-
#include "logging/posix_logger.h"
|
|
52
51
|
#include "monitoring/iostats_context_imp.h"
|
|
53
52
|
#include "monitoring/thread_status_updater.h"
|
|
54
53
|
#include "port/lang.h"
|
|
@@ -84,8 +83,6 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) {
|
|
|
84
83
|
return allow_non_owner_access ? 0644 : 0600;
|
|
85
84
|
}
|
|
86
85
|
|
|
87
|
-
static uint64_t gettid() { return Env::Default()->GetThreadID(); }
|
|
88
|
-
|
|
89
86
|
// list of pathnames that are locked
|
|
90
87
|
// Only used for error message.
|
|
91
88
|
struct LockHoldingInfo {
|
|
@@ -555,47 +552,6 @@ class PosixFileSystem : public FileSystem {
|
|
|
555
552
|
return IOStatus::OK();
|
|
556
553
|
}
|
|
557
554
|
|
|
558
|
-
IOStatus NewLogger(const std::string& fname, const IOOptions& /*opts*/,
|
|
559
|
-
std::shared_ptr<Logger>* result,
|
|
560
|
-
IODebugContext* /*dbg*/) override {
|
|
561
|
-
FILE* f = nullptr;
|
|
562
|
-
int fd;
|
|
563
|
-
{
|
|
564
|
-
IOSTATS_TIMER_GUARD(open_nanos);
|
|
565
|
-
fd = open(fname.c_str(),
|
|
566
|
-
cloexec_flags(O_WRONLY | O_CREAT | O_TRUNC, nullptr),
|
|
567
|
-
GetDBFileMode(allow_non_owner_access_));
|
|
568
|
-
if (fd != -1) {
|
|
569
|
-
f = fdopen(fd,
|
|
570
|
-
"w"
|
|
571
|
-
#ifdef __GLIBC_PREREQ
|
|
572
|
-
#if __GLIBC_PREREQ(2, 7)
|
|
573
|
-
"e" // glibc extension to enable O_CLOEXEC
|
|
574
|
-
#endif
|
|
575
|
-
#endif
|
|
576
|
-
);
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
if (fd == -1) {
|
|
580
|
-
result->reset();
|
|
581
|
-
return status_to_io_status(
|
|
582
|
-
IOError("when open a file for new logger", fname, errno));
|
|
583
|
-
}
|
|
584
|
-
if (f == nullptr) {
|
|
585
|
-
close(fd);
|
|
586
|
-
result->reset();
|
|
587
|
-
return status_to_io_status(
|
|
588
|
-
IOError("when fdopen a file for new logger", fname, errno));
|
|
589
|
-
} else {
|
|
590
|
-
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
|
591
|
-
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024);
|
|
592
|
-
#endif
|
|
593
|
-
SetFD_CLOEXEC(fd, nullptr);
|
|
594
|
-
result->reset(new PosixLogger(f, &gettid, Env::Default()));
|
|
595
|
-
return IOStatus::OK();
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
|
|
599
555
|
IOStatus FileExists(const std::string& fname, const IOOptions& /*opts*/,
|
|
600
556
|
IODebugContext* /*dbg*/) override {
|
|
601
557
|
int result = access(fname.c_str(), F_OK);
|
|
@@ -883,8 +839,8 @@ class PosixFileSystem : public FileSystem {
|
|
|
883
839
|
return IOStatus::OK();
|
|
884
840
|
}
|
|
885
841
|
|
|
886
|
-
char the_path[
|
|
887
|
-
char* ret = getcwd(the_path,
|
|
842
|
+
char the_path[4096];
|
|
843
|
+
char* ret = getcwd(the_path, 4096);
|
|
888
844
|
if (ret == nullptr) {
|
|
889
845
|
return IOStatus::IOError(errnoStr(errno).c_str());
|
|
890
846
|
}
|
|
@@ -1678,6 +1678,7 @@ IOStatus PosixDirectory::Close(const IOOptions& /*opts*/,
|
|
|
1678
1678
|
IOStatus PosixDirectory::FsyncWithDirOptions(
|
|
1679
1679
|
const IOOptions& /*opts*/, IODebugContext* /*dbg*/,
|
|
1680
1680
|
const DirFsyncOptions& dir_fsync_options) {
|
|
1681
|
+
assert(fd_ >= 0); // Check use after close
|
|
1681
1682
|
IOStatus s = IOStatus::OK();
|
|
1682
1683
|
#ifndef OS_AIX
|
|
1683
1684
|
if (is_btrfs_) {
|