@nxtedition/rocksdb 13.5.13 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -16,6 +16,31 @@
16
16
  #include "util/string_util.h"
17
17
 
18
18
  namespace ROCKSDB_NAMESPACE {
19
+ namespace {
20
+ // Format of values in CompressedSecondaryCache:
21
+ // If enable_custom_split_merge:
22
+ // * A chain of CacheValueChunk representing the sequence of bytes for a tagged
23
+ // value. The overall length of the tagged value is determined by the chain
24
+ // of CacheValueChunks.
25
+ // If !enable_custom_split_merge:
26
+ // * A LengthPrefixedSlice (starts with varint64 size) of a tagged value.
27
+ //
28
+ // A tagged value has a 2-byte header before the "saved" or compressed block
29
+ // data:
30
+ // * 1 byte for "source" CacheTier indicating which tier is responsible for
31
+ // compression/decompression.
32
+ // * 1 byte for compression type which is generated/used by
33
+ // CompressedSecondaryCache iff source == CacheTier::kVolatileCompressedTier
34
+ // (original entry passed in was uncompressed). Otherwise, the compression
35
+ // type is preserved from the entry passed in.
36
+ constexpr uint32_t kTagSize = 2;
37
+
38
+ // Size of tag + varint size prefix when applicable
39
+ uint32_t GetHeaderSize(size_t data_size, bool enable_split_merge) {
40
+ return (enable_split_merge ? 0 : VarintLength(kTagSize + data_size)) +
41
+ kTagSize;
42
+ }
43
+ } // namespace
19
44
 
20
45
  CompressedSecondaryCache::CompressedSecondaryCache(
21
46
  const CompressedSecondaryCacheOptions& opts)
@@ -40,13 +65,9 @@ std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
40
65
  Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase,
41
66
  Statistics* stats, bool& kept_in_sec_cache) {
42
67
  assert(helper);
43
- // This is a minor optimization. Its ok to skip it in TSAN in order to
44
- // avoid a false positive.
45
- #ifndef __SANITIZE_THREAD__
46
- if (disable_cache_) {
68
+ if (disable_cache_.LoadRelaxed()) {
47
69
  return nullptr;
48
70
  }
49
- #endif
50
71
 
51
72
  std::unique_ptr<SecondaryCacheResultHandle> handle;
52
73
  kept_in_sec_cache = false;
@@ -62,75 +83,58 @@ std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
62
83
  return nullptr;
63
84
  }
64
85
 
65
- CacheAllocationPtr* ptr{nullptr};
66
- CacheAllocationPtr merged_value;
67
- size_t handle_value_charge{0};
68
- const char* data_ptr = nullptr;
69
- CacheTier source = CacheTier::kVolatileCompressedTier;
70
- CompressionType type = cache_options_.compression_type;
86
+ std::string merged_value;
87
+ Slice tagged_data;
71
88
  if (cache_options_.enable_custom_split_merge) {
72
89
  CacheValueChunk* value_chunk_ptr =
73
- reinterpret_cast<CacheValueChunk*>(handle_value);
74
- merged_value = MergeChunksIntoValue(value_chunk_ptr, handle_value_charge);
75
- ptr = &merged_value;
76
- data_ptr = ptr->get();
90
+ static_cast<CacheValueChunk*>(handle_value);
91
+ merged_value = MergeChunksIntoValue(value_chunk_ptr);
92
+ tagged_data = Slice(merged_value);
77
93
  } else {
78
- uint32_t type_32 = static_cast<uint32_t>(type);
79
- uint32_t source_32 = static_cast<uint32_t>(source);
80
- ptr = reinterpret_cast<CacheAllocationPtr*>(handle_value);
81
- handle_value_charge = cache_->GetCharge(lru_handle);
82
- data_ptr = ptr->get();
83
- const char* limit = ptr->get() + handle_value_charge;
84
- data_ptr =
85
- GetVarint32Ptr(data_ptr, limit, static_cast<uint32_t*>(&type_32));
86
- type = static_cast<CompressionType>(type_32);
87
- data_ptr =
88
- GetVarint32Ptr(data_ptr, limit, static_cast<uint32_t*>(&source_32));
89
- source = static_cast<CacheTier>(source_32);
90
- uint64_t data_size = 0;
91
- data_ptr =
92
- GetVarint64Ptr(data_ptr, limit, static_cast<uint64_t*>(&data_size));
93
- assert(handle_value_charge > data_size);
94
- handle_value_charge = data_size;
94
+ tagged_data = GetLengthPrefixedSlice(static_cast<char*>(handle_value));
95
95
  }
96
- MemoryAllocator* allocator = cache_options_.memory_allocator.get();
97
96
 
98
- Status s;
99
- Cache::ObjectPtr value{nullptr};
100
- size_t charge{0};
97
+ auto source = lossless_cast<CacheTier>(tagged_data[0]);
98
+ auto type = lossless_cast<CompressionType>(tagged_data[1]);
99
+
100
+ std::unique_ptr<char[]> uncompressed;
101
+ Slice saved(tagged_data.data() + kTagSize, tagged_data.size() - kTagSize);
101
102
  if (source == CacheTier::kVolatileCompressedTier) {
102
- if (cache_options_.compression_type == kNoCompression ||
103
- cache_options_.do_not_compress_roles.Contains(helper->role)) {
104
- s = helper->create_cb(Slice(data_ptr, handle_value_charge),
105
- kNoCompression, CacheTier::kVolatileTier,
106
- create_context, allocator, &value, &charge);
107
- } else {
108
- // TODO: can we work some magic with create_cb, which might be based on
109
- // custom compression, to decompress without an extra copy in create_cb?
103
+ if (type != kNoCompression) {
104
+ // TODO: can we do something to avoid yet another allocation?
110
105
  Decompressor::Args args;
111
- args.compressed_data = Slice(data_ptr, handle_value_charge);
112
- args.compression_type = cache_options_.compression_type;
113
- s = decompressor_->ExtractUncompressedSize(args);
114
- assert(s.ok());
106
+ args.compressed_data = saved;
107
+ args.compression_type = type;
108
+ Status s = decompressor_->ExtractUncompressedSize(args);
109
+ assert(s.ok()); // in-memory data
115
110
  if (s.ok()) {
116
- auto uncompressed = std::make_unique<char[]>(args.uncompressed_size);
111
+ uncompressed = std::make_unique<char[]>(args.uncompressed_size);
117
112
  s = decompressor_->DecompressBlock(args, uncompressed.get());
118
- assert(s.ok());
119
- if (s.ok()) {
120
- s = helper->create_cb(
121
- Slice(uncompressed.get(), args.uncompressed_size), kNoCompression,
122
- CacheTier::kVolatileTier, create_context, allocator, &value,
123
- &charge);
124
- }
113
+ assert(s.ok()); // in-memory data
125
114
  }
115
+ if (!s.ok()) {
116
+ cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
117
+ return nullptr;
118
+ }
119
+ saved = Slice(uncompressed.get(), args.uncompressed_size);
120
+ type = kNoCompression;
121
+ // Free temporary compressed data as early as we can. This could matter
122
+ // for unusually large blocks because we also have
123
+ // * Another compressed copy above (from lru_cache).
124
+ // * The uncompressed copy in `uncompressed`.
125
+ // * Another uncompressed copy in `result_value` below.
126
+ // Let's try to max out at 3 copies instead of 4.
127
+ merged_value = std::string();
126
128
  }
127
- } else {
128
- // The item was not compressed by us. Let the helper create_cb
129
- // uncompress it
130
- s = helper->create_cb(Slice(data_ptr, handle_value_charge), type, source,
131
- create_context, allocator, &value, &charge);
129
+ // Reduced as if it came from primary cache
130
+ source = CacheTier::kVolatileTier;
132
131
  }
133
132
 
133
+ Cache::ObjectPtr result_value = nullptr;
134
+ size_t result_charge = 0;
135
+ Status s = helper->create_cb(saved, type, source, create_context,
136
+ cache_options_.memory_allocator.get(),
137
+ &result_value, &result_charge);
134
138
  if (!s.ok()) {
135
139
  cache_->Release(lru_handle, /*erase_if_last_ref=*/true);
136
140
  return nullptr;
@@ -148,7 +152,8 @@ std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
148
152
  kept_in_sec_cache = true;
149
153
  cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
150
154
  }
151
- handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
155
+ handle.reset(
156
+ new CompressedSecondaryCacheResultHandle(result_value, result_charge));
152
157
  RecordTick(stats, COMPRESSED_SECONDARY_CACHE_HITS);
153
158
  return handle;
154
159
  }
@@ -171,85 +176,111 @@ bool CompressedSecondaryCache::MaybeInsertDummy(const Slice& key) {
171
176
 
172
177
  Status CompressedSecondaryCache::InsertInternal(
173
178
  const Slice& key, Cache::ObjectPtr value,
174
- const Cache::CacheItemHelper* helper, CompressionType type,
179
+ const Cache::CacheItemHelper* helper, CompressionType from_type,
175
180
  CacheTier source) {
176
- if (source != CacheTier::kVolatileCompressedTier &&
177
- cache_options_.enable_custom_split_merge) {
178
- // We don't support custom split/merge for the tiered case
179
- return Status::OK();
180
- }
181
-
182
- auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge);
183
- char header[20];
184
- char* payload = header;
185
- payload = EncodeVarint32(payload, static_cast<uint32_t>(type));
186
- payload = EncodeVarint32(payload, static_cast<uint32_t>(source));
187
- size_t data_size = (*helper->size_cb)(value);
188
- char* data_size_ptr = payload;
189
- payload = EncodeVarint64(payload, data_size);
190
-
191
- size_t header_size = payload - header;
192
- size_t total_size = data_size + header_size;
193
- CacheAllocationPtr ptr =
194
- AllocateBlock(total_size, cache_options_.memory_allocator.get());
195
- char* data_ptr = ptr.get() + header_size;
196
-
197
- Status s = (*helper->saveto_cb)(value, 0, data_size, data_ptr);
181
+ bool enable_split_merge = cache_options_.enable_custom_split_merge;
182
+ const Cache::CacheItemHelper* internal_helper = GetHelper(enable_split_merge);
183
+
184
+ // TODO: variant of size_cb that also returns a pointer to the data if
185
+ // already available. Saves an allocation if we keep the compressed version.
186
+ const size_t data_size_original = (*helper->size_cb)(value);
187
+
188
+ // Allocate enough memory for header/tag + original data because (a) we might
189
+ // not be attempting compression at all, and (b) we might keep the original if
190
+ // compression is insufficient. But we don't need the length prefix with
191
+ // enable_split_merge. TODO: be smarter with CacheValueChunk to save an
192
+ // allocation in the enable_split_merge case.
193
+ size_t header_size = GetHeaderSize(data_size_original, enable_split_merge);
194
+ CacheAllocationPtr allocation = AllocateBlock(
195
+ header_size + data_size_original, cache_options_.memory_allocator.get());
196
+ char* data_ptr = allocation.get() + header_size;
197
+ Slice tagged_data(data_ptr - kTagSize, data_size_original + kTagSize);
198
+ assert(tagged_data.data() >= allocation.get());
199
+
200
+ Status s = (*helper->saveto_cb)(value, 0, data_size_original, data_ptr);
198
201
  if (!s.ok()) {
199
202
  return s;
200
203
  }
201
- Slice val(data_ptr, data_size);
202
204
 
203
- std::string compressed_val;
204
- if (cache_options_.compression_type != kNoCompression &&
205
- type == kNoCompression &&
205
+ std::unique_ptr<char[]> tagged_compressed_data;
206
+ CompressionType to_type = kNoCompression;
207
+ if (compressor_ && from_type == kNoCompression &&
206
208
  !cache_options_.do_not_compress_roles.Contains(helper->role)) {
207
- PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, data_size);
208
-
209
- CompressionType to_type = kNoCompression;
210
- s = compressor_->CompressBlock(val, &compressed_val, &to_type,
209
+ assert(source == CacheTier::kVolatileCompressedTier);
210
+
211
+ // TODO: consider malloc sizes for max acceptable compressed size
212
+ // Or maybe max_compressed_bytes_per_kb
213
+ size_t data_size_compressed = data_size_original - 1;
214
+ tagged_compressed_data =
215
+ std::make_unique<char[]>(data_size_compressed + kTagSize);
216
+ s = compressor_->CompressBlock(Slice(data_ptr, data_size_original),
217
+ tagged_compressed_data.get() + kTagSize,
218
+ &data_size_compressed, &to_type,
211
219
  nullptr /*working_area*/);
212
220
  if (!s.ok()) {
213
221
  return s;
214
222
  }
215
- // TODO: allow values not compressed when there's no size savings?
216
- assert(to_type == cache_options_.compression_type);
217
- if (to_type != cache_options_.compression_type) {
218
- return Status::Corruption("Failed to compress value.");
219
- }
220
-
221
- val = Slice(compressed_val);
222
- data_size = compressed_val.size();
223
- payload = EncodeVarint64(data_size_ptr, data_size);
224
- header_size = payload - header;
225
- total_size = header_size + data_size;
226
- PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes, data_size);
227
-
228
- if (!cache_options_.enable_custom_split_merge) {
229
- ptr = AllocateBlock(total_size, cache_options_.memory_allocator.get());
230
- data_ptr = ptr.get() + header_size;
231
- memcpy(data_ptr, compressed_val.data(), data_size);
223
+ PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes,
224
+ data_size_original);
225
+ if (to_type == kNoCompression) {
226
+ // Compression rejected or otherwise aborted/failed
227
+ to_type = kNoCompression;
228
+ tagged_compressed_data.reset();
229
+ // TODO: consider separate counters for rejected compressions
230
+ PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes,
231
+ data_size_original);
232
+ } else {
233
+ PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes,
234
+ data_size_compressed);
235
+ if (enable_split_merge) {
236
+ // Only need tagged_data for copying into CacheValueChunks.
237
+ tagged_data = Slice(tagged_compressed_data.get(),
238
+ data_size_compressed + kTagSize);
239
+ allocation.reset();
240
+ } else {
241
+ // Replace allocation with compressed version, copied from string
242
+ header_size = GetHeaderSize(data_size_compressed, enable_split_merge);
243
+ allocation = AllocateBlock(header_size + data_size_compressed,
244
+ cache_options_.memory_allocator.get());
245
+ data_ptr = allocation.get() + header_size;
246
+ // Ignore unpopulated tag on tagged_compressed_data; will only be
247
+ // populated on the new allocation.
248
+ std::memcpy(data_ptr, tagged_compressed_data.get() + kTagSize,
249
+ data_size_compressed);
250
+ tagged_data =
251
+ Slice(data_ptr - kTagSize, data_size_compressed + kTagSize);
252
+ assert(tagged_data.data() >= allocation.get());
253
+ }
232
254
  }
233
255
  }
234
256
 
235
257
  PERF_COUNTER_ADD(compressed_sec_cache_insert_real_count, 1);
236
- if (cache_options_.enable_custom_split_merge) {
258
+
259
+ // Save the tag fields
260
+ const_cast<char*>(tagged_data.data())[0] = lossless_cast<char>(source);
261
+ const_cast<char*>(tagged_data.data())[1] = lossless_cast<char>(
262
+ source == CacheTier::kVolatileCompressedTier ? to_type : from_type);
263
+
264
+ if (enable_split_merge) {
237
265
  size_t split_charge{0};
238
- CacheValueChunk* value_chunks_head = SplitValueIntoChunks(
239
- val, cache_options_.compression_type, split_charge);
240
- return cache_->Insert(key, value_chunks_head, internal_helper,
241
- split_charge);
266
+ CacheValueChunk* value_chunks_head =
267
+ SplitValueIntoChunks(tagged_data, split_charge);
268
+ s = cache_->Insert(key, value_chunks_head, internal_helper, split_charge);
269
+ assert(s.ok()); // LRUCache::Insert() with handle==nullptr always OK
242
270
  } else {
271
+ // Save the size prefix
272
+ char* ptr = allocation.get();
273
+ ptr = EncodeVarint64(ptr, tagged_data.size());
274
+ assert(ptr == tagged_data.data());
243
275
  #ifdef ROCKSDB_MALLOC_USABLE_SIZE
244
- size_t charge = malloc_usable_size(ptr.get());
276
+ size_t charge = malloc_usable_size(allocation.get());
245
277
  #else
246
- size_t charge = total_size;
278
+ size_t charge = tagged_data.size();
247
279
  #endif
248
- std::memcpy(ptr.get(), header, header_size);
249
- CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
250
- charge += sizeof(CacheAllocationPtr);
251
- return cache_->Insert(key, buf, internal_helper, charge);
280
+ s = cache_->Insert(key, allocation.release(), internal_helper, charge);
281
+ assert(s.ok()); // LRUCache::Insert() with handle==nullptr always OK
252
282
  }
283
+ return Status::OK();
253
284
  }
254
285
 
255
286
  Status CompressedSecondaryCache::Insert(const Slice& key,
@@ -271,7 +302,17 @@ Status CompressedSecondaryCache::Insert(const Slice& key,
271
302
  Status CompressedSecondaryCache::InsertSaved(
272
303
  const Slice& key, const Slice& saved, CompressionType type = kNoCompression,
273
304
  CacheTier source = CacheTier::kVolatileTier) {
305
+ if (source == CacheTier::kVolatileCompressedTier) {
306
+ // Unexpected, would violate InsertInternal preconditions
307
+ assert(source != CacheTier::kVolatileCompressedTier);
308
+ return Status::OK();
309
+ }
274
310
  if (type == kNoCompression) {
311
+ // Not currently supported (why?)
312
+ return Status::OK();
313
+ }
314
+ if (cache_options_.enable_custom_split_merge) {
315
+ // We don't support custom split/merge for the tiered case (why?)
275
316
  return Status::OK();
276
317
  }
277
318
 
@@ -291,7 +332,7 @@ Status CompressedSecondaryCache::SetCapacity(size_t capacity) {
291
332
  MutexLock l(&capacity_mutex_);
292
333
  cache_options_.capacity = capacity;
293
334
  cache_->SetCapacity(capacity);
294
- disable_cache_ = capacity == 0;
335
+ disable_cache_.StoreRelaxed(capacity == 0);
295
336
  return Status::OK();
296
337
  }
297
338
 
@@ -321,9 +362,14 @@ std::string CompressedSecondaryCache::GetPrintableOptions() const {
321
362
  return ret;
322
363
  }
323
364
 
365
+ // FIXME: this could use a lot of attention, including:
366
+ // * Use allocator
367
+ // * We shouldn't be worse than non-split; be more pro-actively aware of
368
+ // internal fragmentation
369
+ // * Consider a unified object/chunk structure that may or may not split
370
+ // * Optimize size overhead of chunks
324
371
  CompressedSecondaryCache::CacheValueChunk*
325
372
  CompressedSecondaryCache::SplitValueIntoChunks(const Slice& value,
326
- CompressionType compression_type,
327
373
  size_t& charge) {
328
374
  assert(!value.empty());
329
375
  const char* src_ptr = value.data();
@@ -344,15 +390,14 @@ CompressedSecondaryCache::SplitValueIntoChunks(const Slice& value,
344
390
  // size, or there is no compression.
345
391
  if (upper == malloc_bin_sizes_.begin() ||
346
392
  upper == malloc_bin_sizes_.end() ||
347
- *upper - predicted_chunk_size < malloc_bin_sizes_.front() ||
348
- compression_type == kNoCompression) {
393
+ *upper - predicted_chunk_size < malloc_bin_sizes_.front()) {
349
394
  tmp_size = predicted_chunk_size;
350
395
  } else {
351
396
  tmp_size = *(--upper);
352
397
  }
353
398
 
354
399
  CacheValueChunk* new_chunk =
355
- reinterpret_cast<CacheValueChunk*>(new char[tmp_size]);
400
+ static_cast<CacheValueChunk*>(static_cast<void*>(new char[tmp_size]));
356
401
  current_chunk->next = new_chunk;
357
402
  current_chunk = current_chunk->next;
358
403
  actual_chunk_size = tmp_size - sizeof(CacheValueChunk) + 1;
@@ -367,28 +412,24 @@ CompressedSecondaryCache::SplitValueIntoChunks(const Slice& value,
367
412
  return dummy_head.next;
368
413
  }
369
414
 
370
- CacheAllocationPtr CompressedSecondaryCache::MergeChunksIntoValue(
371
- const void* chunks_head, size_t& charge) {
372
- const CacheValueChunk* head =
373
- reinterpret_cast<const CacheValueChunk*>(chunks_head);
415
+ std::string CompressedSecondaryCache::MergeChunksIntoValue(
416
+ const CacheValueChunk* head) {
374
417
  const CacheValueChunk* current_chunk = head;
375
- charge = 0;
418
+ size_t total_size = 0;
376
419
  while (current_chunk != nullptr) {
377
- charge += current_chunk->size;
420
+ total_size += current_chunk->size;
378
421
  current_chunk = current_chunk->next;
379
422
  }
380
423
 
381
- CacheAllocationPtr ptr =
382
- AllocateBlock(charge, cache_options_.memory_allocator.get());
424
+ std::string result;
425
+ result.reserve(total_size);
383
426
  current_chunk = head;
384
- size_t pos{0};
385
427
  while (current_chunk != nullptr) {
386
- memcpy(ptr.get() + pos, current_chunk->data, current_chunk->size);
387
- pos += current_chunk->size;
428
+ result.append(current_chunk->data, current_chunk->size);
388
429
  current_chunk = current_chunk->next;
389
430
  }
390
-
391
- return ptr;
431
+ assert(result.size() == total_size);
432
+ return result;
392
433
  }
393
434
 
394
435
  const Cache::CacheItemHelper* CompressedSecondaryCache::GetHelper(
@@ -402,16 +443,16 @@ const Cache::CacheItemHelper* CompressedSecondaryCache::GetHelper(
402
443
  CacheValueChunk* tmp_chunk = chunks_head;
403
444
  chunks_head = chunks_head->next;
404
445
  tmp_chunk->Free();
405
- obj = nullptr;
406
446
  }
407
447
  }};
408
448
  return &kHelper;
409
449
  } else {
410
450
  static const Cache::CacheItemHelper kHelper{
411
451
  CacheEntryRole::kMisc,
412
- [](Cache::ObjectPtr obj, MemoryAllocator* /*alloc*/) {
413
- delete static_cast<CacheAllocationPtr*>(obj);
414
- obj = nullptr;
452
+ [](Cache::ObjectPtr obj, MemoryAllocator* alloc) {
453
+ if (obj != nullptr) {
454
+ CacheAllocationDeleter{alloc}(static_cast<char*>(obj));
455
+ }
415
456
  }};
416
457
  return &kHelper;
417
458
  }
@@ -422,12 +463,7 @@ size_t CompressedSecondaryCache::TEST_GetCharge(const Slice& key) {
422
463
  if (lru_handle == nullptr) {
423
464
  return 0;
424
465
  }
425
-
426
466
  size_t charge = cache_->GetCharge(lru_handle);
427
- if (cache_->Value(lru_handle) != nullptr &&
428
- !cache_options_.enable_custom_split_merge) {
429
- charge -= 10;
430
- }
431
467
  cache_->Release(lru_handle, /*erase_if_last_ref=*/false);
432
468
  return charge;
433
469
  }
@@ -10,13 +10,12 @@
10
10
  #include <memory>
11
11
 
12
12
  #include "cache/cache_reservation_manager.h"
13
- #include "cache/lru_cache.h"
14
13
  #include "memory/memory_allocator_impl.h"
14
+ #include "rocksdb/advanced_compression.h"
15
15
  #include "rocksdb/secondary_cache.h"
16
16
  #include "rocksdb/slice.h"
17
17
  #include "rocksdb/status.h"
18
- #include "util/compression.h"
19
- #include "util/mutexlock.h"
18
+ #include "util/atomic.h"
20
19
 
21
20
  namespace ROCKSDB_NAMESPACE {
22
21
 
@@ -124,14 +123,9 @@ class CompressedSecondaryCache : public SecondaryCache {
124
123
  // Split value into chunks to better fit into jemalloc bins. The chunks
125
124
  // are stored in CacheValueChunk and extra charge is needed for each chunk,
126
125
  // so the cache charge is recalculated here.
127
- CacheValueChunk* SplitValueIntoChunks(const Slice& value,
128
- CompressionType compression_type,
129
- size_t& charge);
126
+ CacheValueChunk* SplitValueIntoChunks(const Slice& value, size_t& charge);
130
127
 
131
- // After merging chunks, the extra charge for each chunk is removed, so
132
- // the charge is recalculated.
133
- CacheAllocationPtr MergeChunksIntoValue(const void* chunks_head,
134
- size_t& charge);
128
+ std::string MergeChunksIntoValue(const CacheValueChunk* head);
135
129
 
136
130
  bool MaybeInsertDummy(const Slice& key);
137
131
 
@@ -149,7 +143,7 @@ class CompressedSecondaryCache : public SecondaryCache {
149
143
  std::shared_ptr<Decompressor> decompressor_;
150
144
  mutable port::Mutex capacity_mutex_;
151
145
  std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
152
- bool disable_cache_;
146
+ RelaxedAtomic<bool> disable_cache_;
153
147
  };
154
148
 
155
149
  } // namespace ROCKSDB_NAMESPACE