@nxtedition/rocksdb 7.1.4 → 7.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/binding.cc +32 -14
  2. package/deps/rocksdb/iostats.patch +19 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
  4. package/deps/rocksdb/rocksdb/cache/cache.cc +4 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +6 -8
  6. package/deps/rocksdb/rocksdb/cache/cache_key.cc +184 -164
  7. package/deps/rocksdb/rocksdb/cache/cache_key.h +38 -29
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +4 -4
  9. package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +92 -42
  11. package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +114 -37
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +34 -2
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +187 -38
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +88 -19
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +48 -8
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +481 -224
  19. package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
  20. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +3 -7
  22. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
  24. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +4 -5
  26. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -3
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +12 -4
  28. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  29. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +105 -0
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +56 -53
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +45 -11
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
  41. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
  42. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
  43. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
  44. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +239 -190
  45. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
  54. package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
  55. package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
  56. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +239 -23
  57. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
  58. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  59. package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
  60. package/deps/rocksdb/rocksdb/db/db_test2.cc +24 -9
  61. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
  62. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
  63. package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
  64. package/deps/rocksdb/rocksdb/db/event_helpers.cc +2 -1
  65. package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  67. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
  68. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
  69. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
  70. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +7 -1
  71. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -0
  72. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
  73. package/deps/rocksdb/rocksdb/db/kv_checksum.h +8 -4
  74. package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
  75. package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
  76. package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
  77. package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +222 -47
  80. package/deps/rocksdb/rocksdb/db/memtable.h +70 -14
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
  82. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
  83. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
  84. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
  85. package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
  86. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
  87. package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
  88. package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
  89. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +10 -15
  90. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
  91. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
  92. package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
  93. package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
  94. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
  95. package/deps/rocksdb/rocksdb/db/version_set.cc +71 -28
  96. package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
  97. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
  99. package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
  100. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
  101. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
  102. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
  103. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
  104. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
  107. package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
  108. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +5 -0
  109. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +12 -0
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +8 -0
  111. package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
  112. package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
  113. package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
  114. package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
  115. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
  116. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
  117. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +29 -1
  119. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +31 -6
  120. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  122. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
  124. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
  127. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
  128. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
  129. package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
  130. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -1
  131. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
  132. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
  133. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
  134. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +10 -3
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -5
  137. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -1
  138. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  139. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
  140. package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
  141. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -6
  142. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -0
  143. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +52 -12
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -7
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +9 -1
  146. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
  147. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
  149. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
  150. package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
  151. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  152. package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
  153. package/deps/rocksdb/rocksdb/table/unique_id.cc +22 -24
  154. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +2 -1
  155. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +7 -0
  156. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  157. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
  158. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
  159. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
  160. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
  161. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +2 -1
  162. package/deps/rocksdb/rocksdb/util/async_file_reader.h +3 -3
  163. package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -1
  164. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
  165. package/deps/rocksdb/rocksdb/util/hash_test.cc +67 -0
  166. package/deps/rocksdb/rocksdb/util/math.h +41 -0
  167. package/deps/rocksdb/rocksdb/util/math128.h +6 -0
  168. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +2 -1
  169. package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
  170. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
  171. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +3 -6
  172. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
  173. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
  174. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +5 -0
  175. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +6 -0
  176. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
  177. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
  178. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  179. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
  180. package/index.js +17 -8
  181. package/package.json +1 -1
  182. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  183. package/prebuilds/darwin-x64/node.napi.node +0 -0
  184. package/prebuilds/linux-x64/node.napi.node +0 -0
  185. package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
@@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0,
483
483
  "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
484
484
  "to disable and 1 to insert during flush.");
485
485
 
486
+ DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature");
487
+
488
+ DEFINE_int64(preclude_last_level_data_seconds, 0,
489
+ "Preclude data from the last level. Used with tiered storage "
490
+ "feature to preclude new data from comacting to the last level.");
491
+
486
492
  static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
487
493
  RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
488
494
 
@@ -935,6 +941,12 @@ DEFINE_uint64(batch_protection_bytes_per_key, 0,
935
941
  "specified number of bytes per key. Currently the only supported "
936
942
  "nonzero value is eight.");
937
943
 
944
+ DEFINE_uint32(
945
+ memtable_protection_bytes_per_key, 0,
946
+ "If nonzero, enables integrity protection in memtable entries at the "
947
+ "specified number of bytes per key. Currently the supported "
948
+ "nonzero values are 1, 2, 4 and 8.");
949
+
938
950
  DEFINE_string(file_checksum_impl, "none",
939
951
  "Name of an implementation for file_checksum_gen_factory, or "
940
952
  "\"none\" for null.");
@@ -3016,6 +3016,8 @@ void InitializeOptionsFromFlags(
3016
3016
  options.track_and_verify_wals_in_manifest = true;
3017
3017
  options.verify_sst_unique_id_in_manifest =
3018
3018
  FLAGS_verify_sst_unique_id_in_manifest;
3019
+ options.memtable_protection_bytes_per_key =
3020
+ FLAGS_memtable_protection_bytes_per_key;
3019
3021
 
3020
3022
  // Integrated BlobDB
3021
3023
  options.enable_blob_files = FLAGS_enable_blob_files;
@@ -3063,6 +3065,12 @@ void InitializeOptionsFromFlags(
3063
3065
  options.wal_compression =
3064
3066
  StringToCompressionType(FLAGS_wal_compression.c_str());
3065
3067
 
3068
+ if (FLAGS_enable_tiered_storage) {
3069
+ options.bottommost_temperature = Temperature::kCold;
3070
+ }
3071
+ options.preclude_last_level_data_seconds =
3072
+ FLAGS_preclude_last_level_data_seconds;
3073
+
3066
3074
  switch (FLAGS_rep_factory) {
3067
3075
  case kSkipList:
3068
3076
  // no need to do anything
@@ -55,10 +55,10 @@
55
55
 
56
56
  #include "env/composite_env_wrapper.h"
57
57
  #include "env/io_posix.h"
58
- #include "logging/posix_logger.h"
59
58
  #include "monitoring/iostats_context_imp.h"
60
59
  #include "monitoring/thread_status_updater.h"
61
60
  #include "port/port.h"
61
+ #include "port/sys_time.h"
62
62
  #include "rocksdb/env.h"
63
63
  #include "rocksdb/options.h"
64
64
  #include "rocksdb/slice.h"
@@ -1078,11 +1078,20 @@ class IoctlFriendlyTmpdir {
1078
1078
  }
1079
1079
  }
1080
1080
 
1081
+ // check if it's running test within a docker container, in which case, the
1082
+ // file system inside `overlayfs` may not support FS_IOC_GETVERSION
1083
+ // skip the tests
1084
+ struct stat buffer;
1085
+ if (stat("/.dockerenv", &buffer) == 0) {
1086
+ is_supported_ = false;
1087
+ return;
1088
+ }
1089
+
1081
1090
  fprintf(stderr, "failed to find an ioctl-friendly temporary directory;"
1082
1091
  " specify one via the TEST_IOCTL_FRIENDLY_TMPDIR envvar\n");
1083
1092
  std::abort();
1084
1093
  #endif
1085
- }
1094
+ }
1086
1095
 
1087
1096
  ~IoctlFriendlyTmpdir() {
1088
1097
  rmdir(dir_.c_str());
@@ -1092,8 +1101,12 @@ class IoctlFriendlyTmpdir {
1092
1101
  return dir_;
1093
1102
  }
1094
1103
 
1104
+ bool is_supported() const { return is_supported_; }
1105
+
1095
1106
  private:
1096
1107
  std::string dir_;
1108
+
1109
+ bool is_supported_ = true;
1097
1110
  };
1098
1111
 
1099
1112
  #ifndef ROCKSDB_LITE
@@ -1102,8 +1115,10 @@ TEST_F(EnvPosixTest, PositionedAppend) {
1102
1115
  EnvOptions options;
1103
1116
  options.use_direct_writes = true;
1104
1117
  options.use_mmap_writes = false;
1105
- IoctlFriendlyTmpdir ift;
1106
- ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
1118
+ std::string fname = test::PerThreadDBPath(env_, "positioned_append");
1119
+ SetupSyncPointsToMockDirectIO();
1120
+
1121
+ ASSERT_OK(env_->NewWritableFile(fname, &writable_file, options));
1107
1122
  const size_t kBlockSize = 4096;
1108
1123
  const size_t kDataSize = kPageSize;
1109
1124
  // Write a page worth of 'a'
@@ -1119,7 +1134,7 @@ TEST_F(EnvPosixTest, PositionedAppend) {
1119
1134
 
1120
1135
  // Verify the above
1121
1136
  std::unique_ptr<SequentialFile> seq_file;
1122
- ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options));
1137
+ ASSERT_OK(env_->NewSequentialFile(fname, &seq_file, options));
1123
1138
  size_t scratch_len = kPageSize * 2;
1124
1139
  std::unique_ptr<char[]> scratch(new char[scratch_len]);
1125
1140
  Slice result;
@@ -1139,6 +1154,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
1139
1154
  EnvOptions soptions;
1140
1155
  soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
1141
1156
  IoctlFriendlyTmpdir ift;
1157
+ if (!ift.is_supported()) {
1158
+ ROCKSDB_GTEST_BYPASS(
1159
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1160
+ return;
1161
+ }
1142
1162
  std::string fname = ift.name() + "/testfile";
1143
1163
  std::unique_ptr<WritableFile> wfile;
1144
1164
  ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
@@ -1181,13 +1201,13 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
1181
1201
  #ifdef ROCKSDB_FALLOCATE_PRESENT
1182
1202
  TEST_P(EnvPosixTestWithParam, AllocateTest) {
1183
1203
  if (env_ == Env::Default()) {
1184
- IoctlFriendlyTmpdir ift;
1185
- std::string fname = ift.name() + "/preallocate_testfile";
1186
-
1204
+ SetupSyncPointsToMockDirectIO();
1205
+ std::string fname = test::PerThreadDBPath(env_, "preallocate_testfile");
1187
1206
  // Try fallocate in a file to see whether the target file system supports
1188
1207
  // it.
1189
1208
  // Skip the test if fallocate is not supported.
1190
- std::string fname_test_fallocate = ift.name() + "/preallocate_testfile_2";
1209
+ std::string fname_test_fallocate =
1210
+ test::PerThreadDBPath(env_, "preallocate_testfile_2");
1191
1211
  int fd = -1;
1192
1212
  do {
1193
1213
  fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
@@ -1277,6 +1297,11 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) {
1277
1297
 
1278
1298
  // Create the files
1279
1299
  IoctlFriendlyTmpdir ift;
1300
+ if (!ift.is_supported()) {
1301
+ ROCKSDB_GTEST_BYPASS(
1302
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1303
+ return;
1304
+ }
1280
1305
  std::vector<std::string> fnames;
1281
1306
  for (int i = 0; i < 1000; ++i) {
1282
1307
  fnames.push_back(ift.name() + "/" + "testfile" + std::to_string(i));
@@ -1318,6 +1343,11 @@ TEST_P(EnvPosixTestWithParam, DISABLED_RandomAccessUniqueIDDeletes) {
1318
1343
  soptions.use_direct_reads = soptions.use_direct_writes = direct_io_;
1319
1344
 
1320
1345
  IoctlFriendlyTmpdir ift;
1346
+ if (!ift.is_supported()) {
1347
+ ROCKSDB_GTEST_BYPASS(
1348
+ "FS_IOC_GETVERSION is not supported by the filesystem");
1349
+ return;
1350
+ }
1321
1351
  std::string fname = ift.name() + "/" + "testfile";
1322
1352
 
1323
1353
  // Check that after file is deleted we don't get same ID again in a new
@@ -10,6 +10,7 @@
10
10
  #include "env/env_encryption_ctr.h"
11
11
  #include "env/fs_readonly.h"
12
12
  #include "env/mock_env.h"
13
+ #include "logging/env_logger.h"
13
14
  #include "options/db_options.h"
14
15
  #include "rocksdb/convenience.h"
15
16
  #include "rocksdb/utilities/customizable_util.h"
@@ -115,6 +116,25 @@ IOStatus FileSystem::ReuseWritableFile(const std::string& fname,
115
116
  return NewWritableFile(fname, opts, result, dbg);
116
117
  }
117
118
 
119
+ IOStatus FileSystem::NewLogger(const std::string& fname,
120
+ const IOOptions& io_opts,
121
+ std::shared_ptr<Logger>* result,
122
+ IODebugContext* dbg) {
123
+ FileOptions options;
124
+ options.io_options = io_opts;
125
+ // TODO: Tune the buffer size.
126
+ options.writable_file_max_buffer_size = 1024 * 1024;
127
+ std::unique_ptr<FSWritableFile> writable_file;
128
+ const IOStatus status = NewWritableFile(fname, options, &writable_file, dbg);
129
+ if (!status.ok()) {
130
+ return status;
131
+ }
132
+
133
+ *result = std::make_shared<EnvLogger>(std::move(writable_file), fname,
134
+ options, Env::Default());
135
+ return IOStatus::OK();
136
+ }
137
+
118
138
  FileOptions FileSystem::OptimizeForLogRead(
119
139
  const FileOptions& file_options) const {
120
140
  FileOptions optimized_file_options(file_options);
@@ -48,7 +48,6 @@
48
48
 
49
49
  #include "env/composite_env_wrapper.h"
50
50
  #include "env/io_posix.h"
51
- #include "logging/posix_logger.h"
52
51
  #include "monitoring/iostats_context_imp.h"
53
52
  #include "monitoring/thread_status_updater.h"
54
53
  #include "port/lang.h"
@@ -84,8 +83,6 @@ inline mode_t GetDBFileMode(bool allow_non_owner_access) {
84
83
  return allow_non_owner_access ? 0644 : 0600;
85
84
  }
86
85
 
87
- static uint64_t gettid() { return Env::Default()->GetThreadID(); }
88
-
89
86
  // list of pathnames that are locked
90
87
  // Only used for error message.
91
88
  struct LockHoldingInfo {
@@ -555,47 +552,6 @@ class PosixFileSystem : public FileSystem {
555
552
  return IOStatus::OK();
556
553
  }
557
554
 
558
- IOStatus NewLogger(const std::string& fname, const IOOptions& /*opts*/,
559
- std::shared_ptr<Logger>* result,
560
- IODebugContext* /*dbg*/) override {
561
- FILE* f = nullptr;
562
- int fd;
563
- {
564
- IOSTATS_TIMER_GUARD(open_nanos);
565
- fd = open(fname.c_str(),
566
- cloexec_flags(O_WRONLY | O_CREAT | O_TRUNC, nullptr),
567
- GetDBFileMode(allow_non_owner_access_));
568
- if (fd != -1) {
569
- f = fdopen(fd,
570
- "w"
571
- #ifdef __GLIBC_PREREQ
572
- #if __GLIBC_PREREQ(2, 7)
573
- "e" // glibc extension to enable O_CLOEXEC
574
- #endif
575
- #endif
576
- );
577
- }
578
- }
579
- if (fd == -1) {
580
- result->reset();
581
- return status_to_io_status(
582
- IOError("when open a file for new logger", fname, errno));
583
- }
584
- if (f == nullptr) {
585
- close(fd);
586
- result->reset();
587
- return status_to_io_status(
588
- IOError("when fdopen a file for new logger", fname, errno));
589
- } else {
590
- #ifdef ROCKSDB_FALLOCATE_PRESENT
591
- fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024);
592
- #endif
593
- SetFD_CLOEXEC(fd, nullptr);
594
- result->reset(new PosixLogger(f, &gettid, Env::Default()));
595
- return IOStatus::OK();
596
- }
597
- }
598
-
599
555
  IOStatus FileExists(const std::string& fname, const IOOptions& /*opts*/,
600
556
  IODebugContext* /*dbg*/) override {
601
557
  int result = access(fname.c_str(), F_OK);
@@ -883,8 +839,8 @@ class PosixFileSystem : public FileSystem {
883
839
  return IOStatus::OK();
884
840
  }
885
841
 
886
- char the_path[256];
887
- char* ret = getcwd(the_path, 256);
842
+ char the_path[4096];
843
+ char* ret = getcwd(the_path, 4096);
888
844
  if (ret == nullptr) {
889
845
  return IOStatus::IOError(errnoStr(errno).c_str());
890
846
  }
@@ -1678,6 +1678,7 @@ IOStatus PosixDirectory::Close(const IOOptions& /*opts*/,
1678
1678
  IOStatus PosixDirectory::FsyncWithDirOptions(
1679
1679
  const IOOptions& /*opts*/, IODebugContext* /*dbg*/,
1680
1680
  const DirFsyncOptions& dir_fsync_options) {
1681
+ assert(fd_ >= 0); // Check use after close
1681
1682
  IOStatus s = IOStatus::OK();
1682
1683
  #ifndef OS_AIX
1683
1684
  if (is_btrfs_) {
@@ -16,6 +16,7 @@
16
16
  #include "monitoring/histogram.h"
17
17
  #include "monitoring/iostats_context_imp.h"
18
18
  #include "port/port.h"
19
+ #include "rocksdb/io_status.h"
19
20
  #include "rocksdb/system_clock.h"
20
21
  #include "test_util/sync_point.h"
21
22
  #include "util/crc32c.h"
@@ -23,6 +24,13 @@
23
24
  #include "util/rate_limiter.h"
24
25
 
25
26
  namespace ROCKSDB_NAMESPACE {
27
+ namespace {
28
+ IOStatus AssertFalseAndGetStatusForPrevError() {
29
+ assert(false);
30
+ return IOStatus::IOError("Writer has previous error.");
31
+ }
32
+ } // namespace
33
+
26
34
  IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
27
35
  const std::string& fname,
28
36
  const FileOptions& file_opts,
@@ -43,6 +51,10 @@ IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
43
51
 
44
52
  IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
45
53
  Env::IOPriority op_rate_limiter_priority) {
54
+ if (seen_error_) {
55
+ return AssertFalseAndGetStatusForPrevError();
56
+ }
57
+
46
58
  const char* src = data.data();
47
59
  size_t left = data.size();
48
60
  IOStatus s;
@@ -85,6 +97,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
85
97
  if (buf_.CurrentSize() > 0) {
86
98
  s = Flush(op_rate_limiter_priority);
87
99
  if (!s.ok()) {
100
+ seen_error_ = true;
88
101
  return s;
89
102
  }
90
103
  }
@@ -165,12 +178,17 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
165
178
  if (s.ok()) {
166
179
  uint64_t cur_size = filesize_.load(std::memory_order_acquire);
167
180
  filesize_.store(cur_size + data.size(), std::memory_order_release);
181
+ } else {
182
+ seen_error_ = true;
168
183
  }
169
184
  return s;
170
185
  }
171
186
 
172
187
  IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
173
188
  Env::IOPriority op_rate_limiter_priority) {
189
+ if (seen_error_) {
190
+ return AssertFalseAndGetStatusForPrevError();
191
+ }
174
192
  assert(pad_bytes < kDefaultPageSize);
175
193
  size_t left = pad_bytes;
176
194
  size_t cap = buf_.Capacity() - buf_.CurrentSize();
@@ -186,6 +204,7 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
186
204
  if (left > 0) {
187
205
  IOStatus s = Flush(op_rate_limiter_priority);
188
206
  if (!s.ok()) {
207
+ seen_error_ = true;
189
208
  return s;
190
209
  }
191
210
  }
@@ -203,17 +222,31 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
203
222
  }
204
223
 
205
224
  IOStatus WritableFileWriter::Close() {
225
+ if (seen_error_) {
226
+ IOStatus interim;
227
+ if (writable_file_.get() != nullptr) {
228
+ interim = writable_file_->Close(IOOptions(), nullptr);
229
+ writable_file_.reset();
230
+ }
231
+ if (interim.ok()) {
232
+ return IOStatus::IOError(
233
+ "File is closed but data not flushed as writer has previous error.");
234
+ } else {
235
+ return interim;
236
+ }
237
+ }
238
+
206
239
  // Do not quit immediately on failure the file MUST be closed
207
- IOStatus s;
208
240
 
209
241
  // Possible to close it twice now as we MUST close
210
242
  // in __dtor, simply flushing is not enough
211
243
  // Windows when pre-allocating does not fill with zeros
212
244
  // also with unbuffered access we also set the end of data.
213
245
  if (writable_file_.get() == nullptr) {
214
- return s;
246
+ return IOStatus::OK();
215
247
  }
216
248
 
249
+ IOStatus s;
217
250
  s = Flush(); // flush cache to OS
218
251
 
219
252
  IOStatus interim;
@@ -294,9 +327,13 @@ IOStatus WritableFileWriter::Close() {
294
327
  writable_file_.reset();
295
328
  TEST_KILL_RANDOM("WritableFileWriter::Close:1");
296
329
 
297
- if (s.ok() && checksum_generator_ != nullptr && !checksum_finalized_) {
298
- checksum_generator_->Finalize();
299
- checksum_finalized_ = true;
330
+ if (s.ok()) {
331
+ if (checksum_generator_ != nullptr && !checksum_finalized_) {
332
+ checksum_generator_->Finalize();
333
+ checksum_finalized_ = true;
334
+ }
335
+ } else {
336
+ seen_error_ = true;
300
337
  }
301
338
 
302
339
  return s;
@@ -305,6 +342,10 @@ IOStatus WritableFileWriter::Close() {
305
342
  // write out the cached data to the OS cache or storage if direct I/O
306
343
  // enabled
307
344
  IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
345
+ if (seen_error_) {
346
+ return AssertFalseAndGetStatusForPrevError();
347
+ }
348
+
308
349
  IOStatus s;
309
350
  TEST_KILL_RANDOM_WITH_WEIGHT("WritableFileWriter::Flush:0", REDUCE_ODDS2);
310
351
 
@@ -329,6 +370,7 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
329
370
  }
330
371
  }
331
372
  if (!s.ok()) {
373
+ seen_error_ = true;
332
374
  return s;
333
375
  }
334
376
  }
@@ -357,6 +399,7 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
357
399
  }
358
400
 
359
401
  if (!s.ok()) {
402
+ seen_error_ = true;
360
403
  return s;
361
404
  }
362
405
 
@@ -383,6 +426,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
383
426
  if (offset_sync_to > 0 &&
384
427
  offset_sync_to - last_sync_size_ >= bytes_per_sync_) {
385
428
  s = RangeSync(last_sync_size_, offset_sync_to - last_sync_size_);
429
+ if (!s.ok()) {
430
+ seen_error_ = true;
431
+ }
386
432
  last_sync_size_ = offset_sync_to;
387
433
  }
388
434
  }
@@ -409,14 +455,20 @@ const char* WritableFileWriter::GetFileChecksumFuncName() const {
409
455
  }
410
456
 
411
457
  IOStatus WritableFileWriter::Sync(bool use_fsync) {
458
+ if (seen_error_) {
459
+ return AssertFalseAndGetStatusForPrevError();
460
+ }
461
+
412
462
  IOStatus s = Flush();
413
463
  if (!s.ok()) {
464
+ seen_error_ = true;
414
465
  return s;
415
466
  }
416
467
  TEST_KILL_RANDOM("WritableFileWriter::Sync:0");
417
468
  if (!use_direct_io() && pending_sync_) {
418
469
  s = SyncInternal(use_fsync);
419
470
  if (!s.ok()) {
471
+ seen_error_ = true;
420
472
  return s;
421
473
  }
422
474
  }
@@ -426,6 +478,10 @@ IOStatus WritableFileWriter::Sync(bool use_fsync) {
426
478
  }
427
479
 
428
480
  IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
481
+ if (seen_error_) {
482
+ return AssertFalseAndGetStatusForPrevError();
483
+ }
484
+
429
485
  if (!writable_file_->IsSyncThreadSafe()) {
430
486
  return IOStatus::NotSupported(
431
487
  "Can't WritableFileWriter::SyncWithoutFlush() because "
@@ -434,10 +490,17 @@ IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
434
490
  TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:1");
435
491
  IOStatus s = SyncInternal(use_fsync);
436
492
  TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:2");
493
+ if (!s.ok()) {
494
+ seen_error_ = true;
495
+ }
437
496
  return s;
438
497
  }
439
498
 
440
499
  IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
500
+ if (seen_error_) {
501
+ return AssertFalseAndGetStatusForPrevError();
502
+ }
503
+
441
504
  IOStatus s;
442
505
  IOSTATS_TIMER_GUARD(fsync_nanos);
443
506
  TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0");
@@ -473,10 +536,17 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
473
536
  }
474
537
  #endif
475
538
  SetPerfLevel(prev_perf_level);
539
+ if (!s.ok()) {
540
+ seen_error_ = true;
541
+ }
476
542
  return s;
477
543
  }
478
544
 
479
545
  IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
546
+ if (seen_error_) {
547
+ return AssertFalseAndGetStatusForPrevError();
548
+ }
549
+
480
550
  IOSTATS_TIMER_GUARD(range_sync_nanos);
481
551
  TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
482
552
  #ifndef ROCKSDB_LITE
@@ -488,6 +558,9 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
488
558
  IOOptions io_options;
489
559
  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
490
560
  IOStatus s = writable_file_->RangeSync(offset, nbytes, io_options, nullptr);
561
+ if (!s.ok()) {
562
+ seen_error_ = true;
563
+ }
491
564
  #ifndef ROCKSDB_LITE
492
565
  if (ShouldNotifyListeners()) {
493
566
  auto finish_ts = std::chrono::steady_clock::now();
@@ -505,6 +578,10 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
505
578
  // limiter if available
506
579
  IOStatus WritableFileWriter::WriteBuffered(
507
580
  const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
581
+ if (seen_error_) {
582
+ return AssertFalseAndGetStatusForPrevError();
583
+ }
584
+
508
585
  IOStatus s;
509
586
  assert(!use_direct_io());
510
587
  const char* src = data;
@@ -576,6 +653,7 @@ IOStatus WritableFileWriter::WriteBuffered(
576
653
  }
577
654
  #endif
578
655
  if (!s.ok()) {
656
+ seen_error_ = true;
579
657
  return s;
580
658
  }
581
659
  }
@@ -590,11 +668,18 @@ IOStatus WritableFileWriter::WriteBuffered(
590
668
  }
591
669
  buf_.Size(0);
592
670
  buffered_data_crc32c_checksum_ = 0;
671
+ if (!s.ok()) {
672
+ seen_error_ = true;
673
+ }
593
674
  return s;
594
675
  }
595
676
 
596
677
  IOStatus WritableFileWriter::WriteBufferedWithChecksum(
597
678
  const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
679
+ if (seen_error_) {
680
+ return AssertFalseAndGetStatusForPrevError();
681
+ }
682
+
598
683
  IOStatus s;
599
684
  assert(!use_direct_io());
600
685
  assert(perform_data_verification_ && buffered_data_with_checksum_);
@@ -666,6 +751,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
666
751
  // and let caller determine error handling.
667
752
  buf_.Size(0);
668
753
  buffered_data_crc32c_checksum_ = 0;
754
+ seen_error_ = true;
669
755
  return s;
670
756
  }
671
757
  }
@@ -679,6 +765,9 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
679
765
  buffered_data_crc32c_checksum_ = 0;
680
766
  uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
681
767
  flushed_size_.store(cur_size + left, std::memory_order_release);
768
+ if (!s.ok()) {
769
+ seen_error_ = true;
770
+ }
682
771
  return s;
683
772
  }
684
773
 
@@ -712,6 +801,12 @@ void WritableFileWriter::Crc32cHandoffChecksumCalculation(const char* data,
712
801
  #ifndef ROCKSDB_LITE
713
802
  IOStatus WritableFileWriter::WriteDirect(
714
803
  Env::IOPriority op_rate_limiter_priority) {
804
+ if (seen_error_) {
805
+ assert(false);
806
+
807
+ return IOStatus::IOError("Writer has previous error.");
808
+ }
809
+
715
810
  assert(use_direct_io());
716
811
  IOStatus s;
717
812
  const size_t alignment = buf_.Alignment();
@@ -778,6 +873,7 @@ IOStatus WritableFileWriter::WriteDirect(
778
873
  }
779
874
  if (!s.ok()) {
780
875
  buf_.Size(file_advance + leftover_tail);
876
+ seen_error_ = true;
781
877
  return s;
782
878
  }
783
879
  }
@@ -801,12 +897,18 @@ IOStatus WritableFileWriter::WriteDirect(
801
897
  // is a multiple of whole pages otherwise filesize_ is leftover_tail
802
898
  // behind
803
899
  next_write_offset_ += file_advance;
900
+ } else {
901
+ seen_error_ = true;
804
902
  }
805
903
  return s;
806
904
  }
807
905
 
808
906
  IOStatus WritableFileWriter::WriteDirectWithChecksum(
809
907
  Env::IOPriority op_rate_limiter_priority) {
908
+ if (seen_error_) {
909
+ return AssertFalseAndGetStatusForPrevError();
910
+ }
911
+
810
912
  assert(use_direct_io());
811
913
  assert(perform_data_verification_ && buffered_data_with_checksum_);
812
914
  IOStatus s;
@@ -884,6 +986,7 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
884
986
  buf_.Size(file_advance + leftover_tail);
885
987
  buffered_data_crc32c_checksum_ =
886
988
  crc32c::Value(buf_.BufferStart(), buf_.CurrentSize());
989
+ seen_error_ = true;
887
990
  return s;
888
991
  }
889
992
  }
@@ -907,6 +1010,8 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
907
1010
  // is a multiple of whole pages otherwise filesize_ is leftover_tail
908
1011
  // behind
909
1012
  next_write_offset_ += file_advance;
1013
+ } else {
1014
+ seen_error_ = true;
910
1015
  }
911
1016
  return s;
912
1017
  }
@@ -151,6 +151,7 @@ class WritableFileWriter {
151
151
  uint64_t next_write_offset_;
152
152
  #endif // ROCKSDB_LITE
153
153
  bool pending_sync_;
154
+ bool seen_error_;
154
155
  uint64_t last_sync_size_;
155
156
  uint64_t bytes_per_sync_;
156
157
  RateLimiter* rate_limiter_;
@@ -186,6 +187,7 @@ class WritableFileWriter {
186
187
  next_write_offset_(0),
187
188
  #endif // ROCKSDB_LITE
188
189
  pending_sync_(false),
190
+ seen_error_(false),
189
191
  last_sync_size_(0),
190
192
  bytes_per_sync_(options.bytes_per_sync),
191
193
  rate_limiter_(options.rate_limiter),
@@ -288,6 +290,11 @@ class WritableFileWriter {
288
290
 
289
291
  const char* GetFileChecksumFuncName() const;
290
292
 
293
+ bool seen_error() const { return seen_error_; }
294
+ // For options of relaxed consistency, users might hope to continue
295
+ // operating on the file after an error happens.
296
+ void reset_seen_error() { seen_error_ = false; }
297
+
291
298
  private:
292
299
  // Decide the Rate Limiter priority.
293
300
  static Env::IOPriority DecideRateLimiterPriority(