@nxtedition/rocksdb 7.1.2 → 7.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/binding.cc +26 -0
  2. package/deps/rocksdb/iostats.patch +19 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
  4. package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
  5. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -40
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
  7. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +103 -28
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +33 -1
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +177 -38
  10. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +2 -2
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +125 -71
  13. package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
  14. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
  15. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
  17. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
  18. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  19. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +36 -0
  20. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
  21. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
  22. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +50 -52
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +41 -10
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
  31. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
  32. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
  33. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
  34. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +17 -8
  35. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
  36. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
  37. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
  38. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
  39. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
  40. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
  44. package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
  45. package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
  46. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
  47. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  48. package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
  49. package/deps/rocksdb/rocksdb/db/db_test2.cc +18 -7
  50. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
  51. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
  52. package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
  53. package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
  54. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  55. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
  56. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
  57. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
  58. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +1 -1
  59. package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
  60. package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
  61. package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
  62. package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
  63. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
  64. package/deps/rocksdb/rocksdb/db/memtable.cc +49 -14
  65. package/deps/rocksdb/rocksdb/db/memtable.h +60 -14
  66. package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
  67. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
  68. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
  69. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
  70. package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
  71. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
  72. package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
  73. package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
  74. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +8 -14
  75. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
  76. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
  77. package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
  78. package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
  79. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
  80. package/deps/rocksdb/rocksdb/db/version_set.cc +34 -10
  81. package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
  82. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -6
  83. package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
  84. package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
  85. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
  86. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
  87. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
  89. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  90. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
  91. package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
  92. package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
  93. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  94. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +6 -0
  95. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +6 -0
  96. package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
  97. package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
  98. package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
  99. package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
  100. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
  101. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
  102. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -1
  104. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
  108. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
  109. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
  110. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
  111. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
  112. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
  113. package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
  114. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
  115. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
  116. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
  117. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
  118. package/deps/rocksdb/rocksdb/options/cf_options.cc +6 -3
  119. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -5
  120. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -1
  121. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  122. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
  123. package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
  124. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +50 -8
  125. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -0
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +7 -0
  127. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
  128. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
  129. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
  130. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
  131. package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
  132. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  133. package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
  134. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -1
  135. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
  136. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
  137. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
  138. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
  139. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
  140. package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
  141. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
  142. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
  143. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
  144. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
  145. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
  146. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  147. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
  148. package/index.js +7 -1
  149. package/package.json +1 -1
  150. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  151. package/prebuilds/darwin-x64/node.napi.node +0 -0
  152. package/prebuilds/linux-x64/node.napi.node +0 -0
  153. package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
@@ -4,8 +4,10 @@
4
4
  // (found in the LICENSE.Apache file in the root directory).
5
5
 
6
6
  #include "db/write_thread.h"
7
+
7
8
  #include <chrono>
8
9
  #include <thread>
10
+
9
11
  #include "db/column_family.h"
10
12
  #include "monitoring/perf_context_imp.h"
11
13
  #include "port/port.h"
@@ -293,17 +295,6 @@ void WriteThread::CreateMissingNewerLinks(Writer* head) {
293
295
  }
294
296
  }
295
297
 
296
- WriteThread::Writer* WriteThread::FindNextLeader(Writer* from,
297
- Writer* boundary) {
298
- assert(from != nullptr && from != boundary);
299
- Writer* current = from;
300
- while (current->link_older != boundary) {
301
- current = current->link_older;
302
- assert(current != nullptr);
303
- }
304
- return current;
305
- }
306
-
307
298
  void WriteThread::CompleteLeader(WriteGroup& write_group) {
308
299
  assert(write_group.size > 0);
309
300
  Writer* leader = write_group.leader;
@@ -640,6 +631,9 @@ void WriteThread::ExitAsBatchGroupFollower(Writer* w) {
640
631
  static WriteThread::AdaptationContext eabgl_ctx("ExitAsBatchGroupLeader");
641
632
  void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
642
633
  Status& status) {
634
+ TEST_SYNC_POINT_CALLBACK("WriteThread::ExitAsBatchGroupLeader:Start",
635
+ &write_group);
636
+
643
637
  Writer* leader = write_group.leader;
644
638
  Writer* last_writer = write_group.last_writer;
645
639
  assert(leader->link_older == nullptr);
@@ -656,7 +650,36 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
656
650
  }
657
651
 
658
652
  if (enable_pipelined_write_) {
659
- // Notify writers don't write to memtable to exit.
653
+ // We insert a dummy Writer right before our current write_group. This
654
+ // allows us to unlink our write_group without the risk that a subsequent
655
+ // writer becomes a new leader and might overtake us and add itself to the
656
+ // memtable-writer-list before we can do so. This ensures that writers are
657
+ // added to the memtable-writer-list in the exact same order in which they
658
+ // were in the newest_writer list.
659
+ // This must happen before completing the writers from our group to prevent
660
+ // a race where the owning thread of one of these writers can start a new
661
+ // write operation.
662
+ Writer dummy;
663
+ Writer* head = newest_writer_.load(std::memory_order_acquire);
664
+ if (head != last_writer ||
665
+ !newest_writer_.compare_exchange_strong(head, &dummy)) {
666
+ // Either last_writer wasn't the head during the load(), or it was the
667
+ // head during the load() but somebody else pushed onto the list before
668
+ // we did the compare_exchange_strong (causing it to fail). In the latter
669
+ // case compare_exchange_strong has the effect of re-reading its first
670
+ // param (head). No need to retry a failing CAS, because only a departing
671
+ // leader (which we are at the moment) can remove nodes from the list.
672
+ assert(head != last_writer);
673
+
674
+ // After walking link_older starting from head (if not already done) we
675
+ // will be able to traverse w->link_newer below.
676
+ CreateMissingNewerLinks(head);
677
+ assert(last_writer->link_newer != nullptr);
678
+ last_writer->link_newer->link_older = &dummy;
679
+ dummy.link_newer = last_writer->link_newer;
680
+ }
681
+
682
+ // Complete writers that don't write to memtable
660
683
  for (Writer* w = last_writer; w != leader;) {
661
684
  Writer* next = w->link_older;
662
685
  w->status = status;
@@ -669,23 +692,11 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
669
692
  CompleteLeader(write_group);
670
693
  }
671
694
 
672
- Writer* next_leader = nullptr;
673
-
674
- // Look for next leader before we call LinkGroup. If there isn't
675
- // pending writers, place a dummy writer at the tail of the queue
676
- // so we know the boundary of the current write group.
677
- Writer dummy;
678
- Writer* expected = last_writer;
679
- bool has_dummy = newest_writer_.compare_exchange_strong(expected, &dummy);
680
- if (!has_dummy) {
681
- // We find at least one pending writer when we insert dummy. We search
682
- // for next leader from there.
683
- next_leader = FindNextLeader(expected, last_writer);
684
- assert(next_leader != nullptr && next_leader != last_writer);
685
- }
695
+ TEST_SYNC_POINT_CALLBACK(
696
+ "WriteThread::ExitAsBatchGroupLeader:AfterCompleteWriters",
697
+ &write_group);
686
698
 
687
- // Link the ramaining of the group to memtable writer list.
688
- //
699
+ // Link the remaining of the group to memtable writer list.
689
700
  // We have to link our group to memtable writer queue before wake up the
690
701
  // next leader or set newest_writer_ to null, otherwise the next leader
691
702
  // can run ahead of us and link to memtable writer queue before we do.
@@ -696,24 +707,17 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
696
707
  }
697
708
  }
698
709
 
699
- // If we have inserted dummy in the queue, remove it now and check if there
700
- // are pending writer join the queue since we insert the dummy. If so,
701
- // look for next leader again.
702
- if (has_dummy) {
703
- assert(next_leader == nullptr);
704
- expected = &dummy;
705
- bool has_pending_writer =
706
- !newest_writer_.compare_exchange_strong(expected, nullptr);
707
- if (has_pending_writer) {
708
- next_leader = FindNextLeader(expected, &dummy);
709
- assert(next_leader != nullptr && next_leader != &dummy);
710
- }
710
+ // Unlink the dummy writer from the list and identify the new leader
711
+ head = newest_writer_.load(std::memory_order_acquire);
712
+ if (head != &dummy ||
713
+ !newest_writer_.compare_exchange_strong(head, nullptr)) {
714
+ CreateMissingNewerLinks(head);
715
+ Writer* new_leader = dummy.link_newer;
716
+ assert(new_leader != nullptr);
717
+ new_leader->link_older = nullptr;
718
+ SetState(new_leader, STATE_GROUP_LEADER);
711
719
  }
712
720
 
713
- if (next_leader != nullptr) {
714
- next_leader->link_older = nullptr;
715
- SetState(next_leader, STATE_GROUP_LEADER);
716
- }
717
721
  AwaitState(leader, STATE_MEMTABLE_WRITER_LEADER |
718
722
  STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
719
723
  &eabgl_ctx);
@@ -721,8 +725,8 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
721
725
  Writer* head = newest_writer_.load(std::memory_order_acquire);
722
726
  if (head != last_writer ||
723
727
  !newest_writer_.compare_exchange_strong(head, nullptr)) {
724
- // Either w wasn't the head during the load(), or it was the head
725
- // during the load() but somebody else pushed onto the list before
728
+ // Either last_writer wasn't the head during the load(), or it was the
729
+ // head during the load() but somebody else pushed onto the list before
726
730
  // we did the compare_exchange_strong (causing it to fail). In the
727
731
  // latter case compare_exchange_strong has the effect of re-reading
728
732
  // its first param (head). No need to retry a failing CAS, because
@@ -738,6 +742,7 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
738
742
  // to MarkJoined, so we can definitely conclude that no other leader
739
743
  // work is going on here (with or without db mutex).
740
744
  CreateMissingNewerLinks(head);
745
+ assert(last_writer->link_newer != nullptr);
741
746
  assert(last_writer->link_newer->link_older == last_writer);
742
747
  last_writer->link_newer->link_older = nullptr;
743
748
 
@@ -428,10 +428,6 @@ class WriteThread {
428
428
  // concurrently with itself.
429
429
  void CreateMissingNewerLinks(Writer* head);
430
430
 
431
- // Starting from a pending writer, follow link_older to search for next
432
- // leader, until we hit boundary.
433
- Writer* FindNextLeader(Writer* pending_writer, Writer* boundary);
434
-
435
431
  // Set the leader in write_group to completed state and remove it from the
436
432
  // write group.
437
433
  void CompleteLeader(WriteGroup& write_group);
@@ -307,6 +307,10 @@ DECLARE_int32(create_timestamped_snapshot_one_in);
307
307
 
308
308
  DECLARE_bool(allow_data_in_errors);
309
309
 
310
+ // Tiered storage
311
+ DECLARE_bool(enable_tiered_storage); // set last_level_temperature
312
+ DECLARE_int64(preclude_last_level_data_seconds);
313
+
310
314
  constexpr long KB = 1024;
311
315
  constexpr int kRandomValueMaxFactor = 3;
312
316
  constexpr int kValueMaxLen = 100;
@@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0,
483
483
  "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
484
484
  "to disable and 1 to insert during flush.");
485
485
 
486
+ DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature");
487
+
488
+ DEFINE_int64(preclude_last_level_data_seconds, 0,
489
+ "Preclude data from the last level. Used with tiered storage "
490
+ "feature to preclude new data from comacting to the last level.");
491
+
486
492
  static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
487
493
  RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
488
494
 
@@ -3063,6 +3063,12 @@ void InitializeOptionsFromFlags(
3063
3063
  options.wal_compression =
3064
3064
  StringToCompressionType(FLAGS_wal_compression.c_str());
3065
3065
 
3066
+ if (FLAGS_enable_tiered_storage) {
3067
+ options.bottommost_temperature = Temperature::kCold;
3068
+ }
3069
+ options.preclude_last_level_data_seconds =
3070
+ FLAGS_preclude_last_level_data_seconds;
3071
+
3066
3072
  switch (FLAGS_rep_factory) {
3067
3073
  case kSkipList:
3068
3074
  // no need to do anything
@@ -55,10 +55,10 @@
55
55
 
56
56
  #include "env/composite_env_wrapper.h"
57
57
  #include "env/io_posix.h"
58
- #include "logging/posix_logger.h"
59
58
  #include "monitoring/iostats_context_imp.h"
60
59
  #include "monitoring/thread_status_updater.h"
61
60
  #include "port/port.h"
61
+ #include "port/sys_time.h"
62
62
  #include "rocksdb/env.h"
63
63
  #include "rocksdb/options.h"
64
64
  #include "rocksdb/slice.h"
@@ -1078,11 +1078,20 @@ class IoctlFriendlyTmpdir {
1078
1078
  }
1079
1079
  }
1080
1080
 
1081
+ // check if it's running test within a docker container, in which case, the
1082
+ // file system inside `overlayfs` may not support FS_IOC_GETVERSION
1083
+ // skip the tests
1084
+ struct stat buffer;
1085
+ if (stat("/.dockerenv", &buffer) == 0) {
1086
+ is_supported_ = false;
1087
+ return;
1088
+ }
1089
+
1081
1090
  fprintf(stderr, "failed to find an ioctl-friendly temporary directory;"
1082
1091
  " specify one via the TEST_IOCTL_FRIENDLY_TMPDIR envvar\n");
1083
1092
  std::abort();
1084
1093
  #endif
1085
- }
1094
+ }
1086
1095
 
1087
1096
  ~IoctlFriendlyTmpdir() {
1088
1097
  rmdir(dir_.c_str());
@@ -1092,8 +1101,12 @@ class IoctlFriendlyTmpdir {
1092
1101
  return dir_;
1093
1102
  }
1094
1103
 
1104
+ bool is_supported() const { return is_supported_; }
1105
+
1095
1106
  private:
1096
1107
  std::string dir_;
1108
+
1109
+ bool is_supported_ = true;
1097
1110
  };
1098
1111
 
1099
1112
  #ifndef ROCKSDB_LITE
@@ -1102,8 +1115,10 @@ TEST_F(EnvPosixTest, PositionedAppend) {
1102
1115
  EnvOptions options;
1103
1116
  options.use_direct_writes = true;
1104
1117
  options.use_mmap_writes = false;
1105
- IoctlFriendlyTmpdir ift;
1106
- ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
1118
+ std::string fname = test::PerThreadDBPath(env_, "positioned_append");
1119
+ SetupSyncPointsToMockDirectIO();
1120
+
1121
+ ASSERT_OK(env_->NewWritableFile(fname, &writable_file, options));
1107
1122
  const size_t kBlockSize = 4096;
1108
1123
  const size_t kDataSize = kPageSize;
1109
1124
  // Write a page worth of 'a'
@@ -1119,7 +1134,7 @@ TEST_F(EnvPosixTest, PositionedAppend) {
1119
1134
 
1120
1135
  // Verify the above
1121
1136
  std::unique_ptr<SequentialFile> seq_file;
1122
- ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options));
1137
+ ASSERT_OK(env_->NewSequentialFile(fname, &seq_file, options));
1123
1138
  size_t scratch_len = kPageSize * 2;
1124
1139
  std::unique_ptr<char[]> scratch(new char[scratch_len]);
1125
1140
  Slice result;
@@ -1139,6 +1154,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
1139
1154
  EnvOptions soptions;
1140
1155
  soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
1141
1156
  IoctlFriendlyTmpdir ift;
1157
+ if (!ift.is_supported()) {
1158
+ ROCKSDB_GTEST_BYPASS(
1159
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1160
+ return;
1161
+ }
1142
1162
  std::string fname = ift.name() + "/testfile";
1143
1163
  std::unique_ptr<WritableFile> wfile;
1144
1164
  ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
@@ -1181,13 +1201,13 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
1181
1201
  #ifdef ROCKSDB_FALLOCATE_PRESENT
1182
1202
  TEST_P(EnvPosixTestWithParam, AllocateTest) {
1183
1203
  if (env_ == Env::Default()) {
1184
- IoctlFriendlyTmpdir ift;
1185
- std::string fname = ift.name() + "/preallocate_testfile";
1186
-
1204
+ SetupSyncPointsToMockDirectIO();
1205
+ std::string fname = test::PerThreadDBPath(env_, "preallocate_testfile");
1187
1206
  // Try fallocate in a file to see whether the target file system supports
1188
1207
  // it.
1189
1208
  // Skip the test if fallocate is not supported.
1190
- std::string fname_test_fallocate = ift.name() + "/preallocate_testfile_2";
1209
+ std::string fname_test_fallocate =
1210
+ test::PerThreadDBPath(env_, "preallocate_testfile_2");
1191
1211
  int fd = -1;
1192
1212
  do {
1193
1213
  fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
@@ -1277,6 +1297,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) {
1277
1297
 
1278
1298
  // Create the files
1279
1299
  IoctlFriendlyTmpdir ift;
1300
+ if (!ift.is_supported()) {
1301
+ ROCKSDB_GTEST_BYPASS(
1302
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1303
+ return;
1304
+ }
1280
1305
  std::vector<std::string> fnames;
1281
1306
  for (int i = 0; i < 1000; ++i) {
1282
1307
  fnames.push_back(ift.name() + "/" + "testfile" + std::to_string(i));
@@ -1318,6 +1343,11 @@ TEST_P(EnvPosixTestWithParam, DISABLED_RandomAccessUniqueIDDeletes) {
1318
1343
  soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
1319
1344
 
1320
1345
  IoctlFriendlyTmpdir ift;
1346
+ if (!ift.is_supported()) {
1347
+ ROCKSDB_GTEST_BYPASS(
1348
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1349
+ return;
1350
+ }
1321
1351
  std::string fname = ift.name() + "/" + "testfile";
1322
1352
 
1323
1353
  // Check that after file is deleted we don't get same ID again in a new
@@ -10,6 +10,7 @@
10
10
  #include "env/env_encryption_ctr.h"
11
11
  #include "env/fs_readonly.h"
12
12
  #include "env/mock_env.h"
13
+ #include "logging/env_logger.h"
13
14
  #include "options/db_options.h"
14
15
  #include "rocksdb/convenience.h"
15
16
  #include "rocksdb/utilities/customizable_util.h"
@@ -115,6 +116,25 @@ IOStatus FileSystem::ReuseWritableFile(const std::string& fname,
115
116
  return NewWritableFile(fname, opts, result, dbg);
116
117
  }
117
118
 
119
+ IOStatus FileSystem::NewLogger(const std::string& fname,
120
+ const IOOptions& io_opts,
121
+ std::shared_ptr<Logger>* result,
122
+ IODebugContext* dbg) {
123
+ FileOptions options;
124
+ options.io_options = io_opts;
125
+ // TODO: Tune the buffer size.
126
+ options.writable_file_max_buffer_size = 1024 * 1024;
127
+ std::unique_ptr<FSWritableFile> writable_file;
128
+ const IOStatus status = NewWritableFile(fname, options, &writable_file, dbg);
129
+ if (!status.ok()) {
130
+ return status;
131
+ }
132
+
133
+ *result = std::make_shared<EnvLogger>(std::move(writable_file), fname,
134
+ options, Env::Default());
135
+ return IOStatus::OK();
136
+ }
137
+
118
138
  FileOptions FileSystem::OptimizeForLogRead(
119
139
  const FileOptions& file_options) const {
120
140
  FileOptions optimized_file_options(file_options);
@@ -48,7 +48,6 @@
48
48
 
49
49
  #include "env/composite_env_wrapper.h"
50
50
  #include "env/io_posix.h"
51
- #include "logging/posix_logger.h"
52
51
  #include "monitoring/iostats_context_imp.h"
53
52
  #include "monitoring/thread_status_updater.h"
54
53
  #include "port/lang.h"
@@ -84,8 +83,6 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) {
84
83
  return allow_non_owner_access ? 0644 : 0600;
85
84
  }
86
85
 
87
- static uint64_t gettid() { return Env::Default()->GetThreadID(); }
88
-
89
86
  // list of pathnames that are locked
90
87
  // Only used for error message.
91
88
  struct LockHoldingInfo {
@@ -555,47 +552,6 @@ class PosixFileSystem : public FileSystem {
555
552
  return IOStatus::OK();
556
553
  }
557
554
 
558
- IOStatus NewLogger(const std::string& fname, const IOOptions& /*opts*/,
559
- std::shared_ptr<Logger>* result,
560
- IODebugContext* /*dbg*/) override {
561
- FILE* f = nullptr;
562
- int fd;
563
- {
564
- IOSTATS_TIMER_GUARD(open_nanos);
565
- fd = open(fname.c_str(),
566
- cloexec_flags(O_WRONLY | O_CREAT | O_TRUNC, nullptr),
567
- GetDBFileMode(allow_non_owner_access_));
568
- if (fd != -1) {
569
- f = fdopen(fd,
570
- "w"
571
- #ifdef __GLIBC_PREREQ
572
- #if __GLIBC_PREREQ(2, 7)
573
- "e" // glibc extension to enable O_CLOEXEC
574
- #endif
575
- #endif
576
- );
577
- }
578
- }
579
- if (fd == -1) {
580
- result->reset();
581
- return status_to_io_status(
582
- IOError("when open a file for new logger", fname, errno));
583
- }
584
- if (f == nullptr) {
585
- close(fd);
586
- result->reset();
587
- return status_to_io_status(
588
- IOError("when fdopen a file for new logger", fname, errno));
589
- } else {
590
- #ifdef ROCKSDB_FALLOCATE_PRESENT
591
- fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024);
592
- #endif
593
- SetFD_CLOEXEC(fd, nullptr);
594
- result->reset(new PosixLogger(f, &gettid, Env::Default()));
595
- return IOStatus::OK();
596
- }
597
- }
598
-
599
555
  IOStatus FileExists(const std::string& fname, const IOOptions& /*opts*/,
600
556
  IODebugContext* /*dbg*/) override {
601
557
  int result = access(fname.c_str(), F_OK);
@@ -883,8 +839,8 @@ class PosixFileSystem : public FileSystem {
883
839
  return IOStatus::OK();
884
840
  }
885
841
 
886
- char the_path[256];
887
- char* ret = getcwd(the_path, 256);
842
+ char the_path[4096];
843
+ char* ret = getcwd(the_path, 4096);
888
844
  if (ret == nullptr) {
889
845
  return IOStatus::IOError(errnoStr(errno).c_str());
890
846
  }
@@ -1678,6 +1678,7 @@ IOStatus PosixDirectory::Close(const IOOptions& /*opts*/,
1678
1678
  IOStatus PosixDirectory::FsyncWithDirOptions(
1679
1679
  const IOOptions& /*opts*/, IODebugContext* /*dbg*/,
1680
1680
  const DirFsyncOptions& dir_fsync_options) {
1681
+ assert(fd_ >= 0); // Check use after close
1681
1682
  IOStatus s = IOStatus::OK();
1682
1683
  #ifndef OS_AIX
1683
1684
  if (is_btrfs_) {