@nxtedition/rocksdb 13.1.4 → 13.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/binding.cc +43 -16
  2. package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
  7. package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
  8. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
  9. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
  11. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  12. package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
  13. package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
  14. package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
  15. package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
  16. package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
  39. package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
  41. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
  42. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
  52. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
  53. package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
  54. package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
  55. package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
  56. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
  57. package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
  58. package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
  59. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
  60. package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
  61. package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
  62. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  63. package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
  64. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
  65. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
  67. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
  68. package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
  69. package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
  70. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
  71. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
  72. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
  74. package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
  75. package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
  76. package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
  77. package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
  78. package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
  79. package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
  80. package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
  81. package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
  82. package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
  83. package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
  84. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
  85. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
  86. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
  87. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
  88. package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
  89. package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
  90. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
  91. package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
  92. package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
  93. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
  94. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
  95. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
  96. package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
  97. package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
  98. package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
  99. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
  100. package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
  101. package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
  102. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
  103. package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
  104. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
  105. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
  106. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
  107. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
  108. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
  109. package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
  110. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
  111. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  112. package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
  113. package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
  114. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
  115. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
  116. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  117. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
  118. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
  119. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
  120. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
  121. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
  122. package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
  123. package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
  124. package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
  125. package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
  126. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
  127. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
  128. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
  129. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
  130. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  131. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  132. package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
  133. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
  134. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
  135. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
  136. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
  137. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
  138. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
  139. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
  140. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
  142. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
  144. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
  146. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
  147. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
  148. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
  150. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
  151. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
  152. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  153. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
  154. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
  155. package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
  156. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
  157. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
  158. package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
  159. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
  160. package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
  161. package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
  162. package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
  163. package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
  164. package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
  165. package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
  166. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
  167. package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
  168. package/deps/rocksdb/rocksdb/port/port.h +5 -9
  169. package/deps/rocksdb/rocksdb/src.mk +8 -0
  170. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
  171. package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
  172. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
  174. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
  175. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
  177. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
  178. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
  181. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
  182. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
  183. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
  184. package/deps/rocksdb/rocksdb/table/format.cc +3 -3
  185. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
  186. package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
  187. package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
  188. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
  189. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  190. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
  191. package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
  192. package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
  193. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
  194. package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
  196. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
  197. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
  198. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  199. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
  200. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
  201. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
  202. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
  203. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
  204. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
  205. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
  206. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
  207. package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
  208. package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
  209. package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
  210. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
  211. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
  212. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
  213. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
  214. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
  215. package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
  216. package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
  217. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
  218. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
  219. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
  220. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
  221. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
  222. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
  223. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
  224. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
  225. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
  226. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
  227. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
  228. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
  229. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
  230. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
  231. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
  232. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
  233. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
  234. package/deps/rocksdb/rocksdb.gyp +2 -0
  235. package/package.json +1 -1
  236. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  237. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
@@ -178,11 +178,9 @@ std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex,
178
178
  result += "<redacted>";
179
179
  }
180
180
 
181
- char buf[50];
182
- snprintf(buf, sizeof(buf), "' seq:%" PRIu64 ", type:%d", sequence,
183
- static_cast<int>(type));
181
+ result += "' seq:" + std::to_string(sequence);
182
+ result += ", type:" + std::to_string(type);
184
183
 
185
- result += buf;
186
184
  return result;
187
185
  }
188
186
 
@@ -272,11 +270,23 @@ LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
272
270
 
273
271
  void IterKey::EnlargeBuffer(size_t key_size) {
274
272
  // If size is smaller than buffer size, continue using current buffer,
275
- // or the static allocated one, as default
273
+ // or the inline one, as default
276
274
  assert(key_size > buf_size_);
277
275
  // Need to enlarge the buffer.
278
276
  ResetBuffer();
279
277
  buf_ = new char[key_size];
280
278
  buf_size_ = key_size;
281
279
  }
280
+
281
+ void IterKey::EnlargeSecondaryBufferIfNeeded(size_t key_size) {
282
+ // If size is smaller than buffer size, continue using current buffer,
283
+ // or the inline one, as default
284
+ if (key_size <= secondary_buf_size_) {
285
+ return;
286
+ }
287
+ // Need to enlarge the secondary buffer.
288
+ ResetSecondaryBuffer();
289
+ secondary_buf_ = new char[key_size];
290
+ secondary_buf_size_ = key_size;
291
+ }
282
292
  } // namespace ROCKSDB_NAMESPACE
@@ -10,6 +10,7 @@
10
10
  #pragma once
11
11
  #include <stdio.h>
12
12
 
13
+ #include <array>
13
14
  #include <memory>
14
15
  #include <optional>
15
16
  #include <string>
@@ -562,18 +563,28 @@ inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
562
563
  // allocation for smaller keys.
563
564
  // 3. It tracks user key or internal key, and allow conversion between them.
564
565
  class IterKey {
566
+ static constexpr size_t kInlineBufferSize = 39;
567
+ // This is only used by user-defined timestamps in MemTable only feature,
568
+ // which only supports uint64_t timestamps.
569
+ static constexpr char kTsMin[] = "\x00\x00\x00\x00\x00\x00\x00\x00";
570
+
565
571
  public:
566
572
  IterKey()
567
573
  : buf_(space_),
568
574
  key_(buf_),
569
575
  key_size_(0),
570
- buf_size_(sizeof(space_)),
571
- is_user_key_(true) {}
576
+ buf_size_(kInlineBufferSize),
577
+ is_user_key_(true),
578
+ secondary_buf_(space_for_secondary_buf_),
579
+ secondary_buf_size_(kInlineBufferSize) {}
572
580
  // No copying allowed
573
581
  IterKey(const IterKey&) = delete;
574
582
  void operator=(const IterKey&) = delete;
575
583
 
576
- ~IterKey() { ResetBuffer(); }
584
+ ~IterKey() {
585
+ ResetBuffer();
586
+ ResetSecondaryBuffer();
587
+ }
577
588
 
578
589
  // The bool will be picked up by the next calls to SetKey
579
590
  void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
@@ -641,13 +652,15 @@ class IterKey {
641
652
  const char* non_shared_data,
642
653
  const size_t non_shared_len,
643
654
  const size_t ts_sz) {
644
- std::string kTsMin(ts_sz, static_cast<unsigned char>(0));
645
- std::string key_with_ts;
646
- std::vector<Slice> key_parts_with_ts;
655
+ // This function is only used by the UDT in memtable feature, which only
656
+ // support built in comparators with uint64 timestamps.
657
+ assert(ts_sz == sizeof(uint64_t));
658
+ size_t next_key_slice_index = 0;
647
659
  if (IsUserKey()) {
648
- key_parts_with_ts = {Slice(key_, shared_len),
649
- Slice(non_shared_data, non_shared_len),
650
- Slice(kTsMin)};
660
+ key_slices_[next_key_slice_index++] = Slice(key_, shared_len);
661
+ key_slices_[next_key_slice_index++] =
662
+ Slice(non_shared_data, non_shared_len);
663
+ key_slices_[next_key_slice_index++] = Slice(kTsMin, ts_sz);
651
664
  } else {
652
665
  assert(shared_len + non_shared_len >= kNumInternalBytes);
653
666
  // Invaraint: shared_user_key_len + shared_internal_bytes_len = shared_len
@@ -664,30 +677,46 @@ class IterKey {
664
677
 
665
678
  // One Slice among the three Slices will get split into two Slices, plus
666
679
  // a timestamp slice.
667
- key_parts_with_ts.reserve(5);
668
680
  bool ts_added = false;
669
681
  // Add slice parts and find the right location to add the min timestamp.
670
682
  MaybeAddKeyPartsWithTimestamp(
671
683
  key_, shared_user_key_len,
672
684
  shared_internal_bytes_len + non_shared_len < kNumInternalBytes,
673
- shared_len + non_shared_len - kNumInternalBytes, kTsMin,
674
- key_parts_with_ts, &ts_added);
685
+ shared_len + non_shared_len - kNumInternalBytes, ts_sz,
686
+ &next_key_slice_index, &ts_added);
675
687
  MaybeAddKeyPartsWithTimestamp(
676
688
  key_ + user_key_len, shared_internal_bytes_len,
677
689
  non_shared_len < kNumInternalBytes,
678
- shared_internal_bytes_len + non_shared_len - kNumInternalBytes,
679
- kTsMin, key_parts_with_ts, &ts_added);
690
+ shared_internal_bytes_len + non_shared_len - kNumInternalBytes, ts_sz,
691
+ &next_key_slice_index, &ts_added);
680
692
  MaybeAddKeyPartsWithTimestamp(non_shared_data, non_shared_len,
681
693
  non_shared_len >= kNumInternalBytes,
682
- non_shared_len - kNumInternalBytes, kTsMin,
683
- key_parts_with_ts, &ts_added);
694
+ non_shared_len - kNumInternalBytes, ts_sz,
695
+ &next_key_slice_index, &ts_added);
684
696
  assert(ts_added);
685
697
  }
698
+ SetKeyImpl(next_key_slice_index,
699
+ /* total_bytes= */ shared_len + non_shared_len + ts_sz);
700
+ }
686
701
 
687
- Slice new_key(SliceParts(&key_parts_with_ts.front(),
688
- static_cast<int>(key_parts_with_ts.size())),
689
- &key_with_ts);
690
- SetKey(new_key);
702
+ Slice SetKeyWithPaddedMinTimestamp(const Slice& key, size_t ts_sz) {
703
+ // This function is only used by the UDT in memtable feature, which only
704
+ // support built in comparators with uint64 timestamps.
705
+ assert(ts_sz == sizeof(uint64_t));
706
+ size_t num_key_slices = 0;
707
+ if (is_user_key_) {
708
+ key_slices_[0] = key;
709
+ key_slices_[1] = Slice(kTsMin, ts_sz);
710
+ num_key_slices = 2;
711
+ } else {
712
+ assert(key.size() >= kNumInternalBytes);
713
+ size_t user_key_size = key.size() - kNumInternalBytes;
714
+ key_slices_[0] = Slice(key.data(), user_key_size);
715
+ key_slices_[1] = Slice(kTsMin, ts_sz);
716
+ key_slices_[2] = Slice(key.data() + user_key_size, kNumInternalBytes);
717
+ num_key_slices = 3;
718
+ }
719
+ return SetKeyImpl(num_key_slices, key.size() + ts_sz);
691
720
  }
692
721
 
693
722
  Slice SetKey(const Slice& key, bool copy = true) {
@@ -718,15 +747,6 @@ class IterKey {
718
747
  return Slice(key_, key_n);
719
748
  }
720
749
 
721
- // Copy the key into IterKey own buf_
722
- void OwnKey() {
723
- assert(IsKeyPinned() == true);
724
-
725
- Reserve(key_size_);
726
- memcpy(buf_, key_, key_size_);
727
- key_ = buf_;
728
- }
729
-
730
750
  // Update the sequence number in the internal key. Guarantees not to
731
751
  // invalidate slices to the key (and the user key).
732
752
  void UpdateInternalKey(uint64_t seq, ValueType t, const Slice* ts = nullptr) {
@@ -738,10 +758,15 @@ class IterKey {
738
758
  ts->size());
739
759
  }
740
760
  uint64_t newval = (seq << 8) | t;
741
- EncodeFixed64(&buf_[key_size_ - kNumInternalBytes], newval);
761
+ if (key_ == buf_) {
762
+ EncodeFixed64(&buf_[key_size_ - kNumInternalBytes], newval);
763
+ } else {
764
+ assert(key_ == secondary_buf_);
765
+ EncodeFixed64(&secondary_buf_[key_size_ - kNumInternalBytes], newval);
766
+ }
742
767
  }
743
768
 
744
- bool IsKeyPinned() const { return (key_ != buf_); }
769
+ bool IsKeyPinned() const { return key_ != buf_ && key_ != secondary_buf_; }
745
770
 
746
771
  // If `ts` is provided, user_key should not contain timestamp,
747
772
  // and `ts` is appended after user_key.
@@ -806,8 +831,24 @@ class IterKey {
806
831
  const char* key_;
807
832
  size_t key_size_;
808
833
  size_t buf_size_;
809
- char space_[39]; // Avoid allocation for short keys
834
+ char space_[kInlineBufferSize]; // Avoid allocation for short keys
810
835
  bool is_user_key_;
836
+ // Below variables are only used by user-defined timestamps in MemTable only
837
+ // feature for iterating keys in an index block or a data block.
838
+ //
839
+ // We will alternate between buf_ and secondary_buf_ to hold the key. key_
840
+ // will be modified in accordance to point to the right one. This is to avoid
841
+ // an extra copy when we need to copy some shared bytes from previous key
842
+ // (delta encoding), and we need to pad a min timestamp at the right location.
843
+ char space_for_secondary_buf_[kInlineBufferSize]; // Avoid allocation for
844
+ // short keys
845
+ char* secondary_buf_;
846
+ size_t secondary_buf_size_;
847
+ // Use to track the pieces that together make the whole key. We then copy
848
+ // these pieces in order either into buf_ or secondary_buf_ depending on where
849
+ // the previous key is held.
850
+ std::array<Slice, 5> key_slices_;
851
+ // End of variables used by user-defined timestamps in MemTable only feature.
811
852
 
812
853
  Slice SetKeyImpl(const Slice& key, bool copy) {
813
854
  size_t size = key.size();
@@ -824,18 +865,64 @@ class IterKey {
824
865
  return Slice(key_, key_size_);
825
866
  }
826
867
 
868
+ Slice SetKeyImpl(size_t num_key_slices, size_t total_bytes) {
869
+ assert(num_key_slices <= 5);
870
+ char* buf_start = nullptr;
871
+ if (key_ == buf_) {
872
+ // If the previous key is in buf_, we copy key_slices_ in order into
873
+ // secondary_buf_.
874
+ EnlargeSecondaryBufferIfNeeded(total_bytes);
875
+ buf_start = secondary_buf_;
876
+ key_ = secondary_buf_;
877
+ } else {
878
+ // Copy key_slices_ in order into buf_.
879
+ EnlargeBufferIfNeeded(total_bytes);
880
+ buf_start = buf_;
881
+ key_ = buf_;
882
+ }
883
+ #ifndef NDEBUG
884
+ size_t actual_total_bytes = 0;
885
+ #endif // NDEBUG
886
+ for (size_t i = 0; i < num_key_slices; i++) {
887
+ size_t key_slice_size = key_slices_[i].size();
888
+ memcpy(buf_start, key_slices_[i].data(), key_slice_size);
889
+ buf_start += key_slice_size;
890
+ #ifndef NDEBUG
891
+ actual_total_bytes += key_slice_size;
892
+ #endif // NDEBUG
893
+ }
894
+ #ifndef NDEBUG
895
+ assert(actual_total_bytes == total_bytes);
896
+ #endif // NDEBUG
897
+ key_size_ = total_bytes;
898
+ return Slice(key_, key_size_);
899
+ }
900
+
827
901
  void ResetBuffer() {
902
+ if (key_ == buf_) {
903
+ key_size_ = 0;
904
+ }
828
905
  if (buf_ != space_) {
829
906
  delete[] buf_;
830
907
  buf_ = space_;
831
908
  }
832
- buf_size_ = sizeof(space_);
833
- key_size_ = 0;
909
+ buf_size_ = kInlineBufferSize;
910
+ }
911
+
912
+ void ResetSecondaryBuffer() {
913
+ if (key_ == secondary_buf_) {
914
+ key_size_ = 0;
915
+ }
916
+ if (secondary_buf_ != space_for_secondary_buf_) {
917
+ delete[] secondary_buf_;
918
+ secondary_buf_ = space_for_secondary_buf_;
919
+ }
920
+ secondary_buf_size_ = kInlineBufferSize;
834
921
  }
835
922
 
836
923
  // Enlarge the buffer size if needed based on key_size.
837
- // By default, static allocated buffer is used. Once there is a key
838
- // larger than the static allocated buffer, another buffer is dynamically
924
+ // By default, inline buffer is used. Once there is a key
925
+ // larger than the inline buffer, another buffer is dynamically
839
926
  // allocated, until a larger key buffer is requested. In that case, we
840
927
  // reallocate buffer and delete the old one.
841
928
  void EnlargeBufferIfNeeded(size_t key_size) {
@@ -846,23 +933,27 @@ class IterKey {
846
933
  }
847
934
  }
848
935
 
936
+ void EnlargeSecondaryBufferIfNeeded(size_t key_size);
937
+
849
938
  void EnlargeBuffer(size_t key_size);
850
939
 
851
940
  void MaybeAddKeyPartsWithTimestamp(const char* slice_data,
852
941
  const size_t slice_sz, bool add_timestamp,
853
- const size_t left_sz,
854
- const std::string& min_timestamp,
855
- std::vector<Slice>& key_parts,
942
+ const size_t left_sz, const size_t ts_sz,
943
+ size_t* next_key_slice_idx,
856
944
  bool* ts_added) {
945
+ assert(next_key_slice_idx);
857
946
  if (add_timestamp && !*ts_added) {
858
947
  assert(slice_sz >= left_sz);
859
- key_parts.emplace_back(slice_data, left_sz);
860
- key_parts.emplace_back(min_timestamp);
861
- key_parts.emplace_back(slice_data + left_sz, slice_sz - left_sz);
948
+ key_slices_[(*next_key_slice_idx)++] = Slice(slice_data, left_sz);
949
+ key_slices_[(*next_key_slice_idx)++] = Slice(kTsMin, ts_sz);
950
+ key_slices_[(*next_key_slice_idx)++] =
951
+ Slice(slice_data + left_sz, slice_sz - left_sz);
862
952
  *ts_added = true;
863
953
  } else {
864
- key_parts.emplace_back(slice_data, slice_sz);
954
+ key_slices_[(*next_key_slice_idx)++] = Slice(slice_data, slice_sz);
865
955
  }
956
+ assert(*next_key_slice_idx <= 5);
866
957
  }
867
958
  };
868
959
 
@@ -936,22 +1027,13 @@ struct RangeTombstone {
936
1027
  // User-defined timestamp is enabled, `sk` and `ek` should be user key
937
1028
  // with timestamp, `ts` will replace the timestamps in `sk` and
938
1029
  // `ek`.
939
- // When `logical_strip_timestamp` is true, the timestamps in `sk` and `ek`
940
- // will be replaced with min timestamp.
941
- RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts,
942
- bool logical_strip_timestamp)
943
- : seq_(sn) {
1030
+ RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts) : seq_(sn) {
944
1031
  const size_t ts_sz = ts.size();
945
1032
  assert(ts_sz > 0);
946
1033
  pinned_start_key_.reserve(sk.size());
947
1034
  pinned_end_key_.reserve(ek.size());
948
- if (logical_strip_timestamp) {
949
- AppendUserKeyWithMinTimestamp(&pinned_start_key_, sk, ts_sz);
950
- AppendUserKeyWithMinTimestamp(&pinned_end_key_, ek, ts_sz);
951
- } else {
952
- AppendUserKeyWithDifferentTimestamp(&pinned_start_key_, sk, ts);
953
- AppendUserKeyWithDifferentTimestamp(&pinned_end_key_, ek, ts);
954
- }
1035
+ AppendUserKeyWithDifferentTimestamp(&pinned_start_key_, sk, ts);
1036
+ AppendUserKeyWithDifferentTimestamp(&pinned_end_key_, ek, ts);
955
1037
  start_key_ = pinned_start_key_;
956
1038
  end_key_ = pinned_end_key_;
957
1039
  ts_ = Slice(pinned_start_key_.data() + sk.size() - ts_sz, ts_sz);
@@ -132,6 +132,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
132
132
  << table_properties.compression_name << "compression_options"
133
133
  << table_properties.compression_options << "creation_time"
134
134
  << table_properties.creation_time << "oldest_key_time"
135
+ << table_properties.newest_key_time << "newest_key_time"
135
136
  << table_properties.oldest_key_time << "file_creation_time"
136
137
  << table_properties.file_creation_time
137
138
  << "slow_compression_estimated_data_size"
@@ -13,6 +13,8 @@
13
13
  #include <vector>
14
14
 
15
15
  #include "db/db_impl/db_impl.h"
16
+ #include "db/manifest_ops.h"
17
+ #include "db/version_edit_handler.h"
16
18
  #include "db/version_util.h"
17
19
  #include "logging/logging.h"
18
20
  #include "util/atomic.h"
@@ -40,6 +42,58 @@ Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) {
40
42
  return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end);
41
43
  }
42
44
 
45
+ Status GetFileChecksumsFromCurrentManifest(FileSystem* fs,
46
+ const std::string& dbname,
47
+ FileChecksumList* checksum_list) {
48
+ std::string manifest_path;
49
+ uint64_t manifest_file_number;
50
+ Status s = GetCurrentManifestPath(dbname, fs, true /* is_retry */,
51
+ &manifest_path, &manifest_file_number);
52
+ if (!s.ok()) {
53
+ return s;
54
+ }
55
+
56
+ if (checksum_list == nullptr) {
57
+ return Status::InvalidArgument("checksum_list is nullptr");
58
+ }
59
+ assert(checksum_list);
60
+
61
+ const ReadOptions read_options(Env::IOActivity::kReadManifest);
62
+ checksum_list->reset();
63
+
64
+ std::unique_ptr<SequentialFileReader> file_reader;
65
+ {
66
+ std::unique_ptr<FSSequentialFile> file;
67
+ s = fs->NewSequentialFile(manifest_path,
68
+ fs->OptimizeForManifestRead(FileOptions()), &file,
69
+ nullptr /* dbg */);
70
+ if (!s.ok()) {
71
+ return s;
72
+ }
73
+ file_reader.reset(new SequentialFileReader(std::move(file), manifest_path));
74
+ }
75
+
76
+ struct LogReporter : public log::Reader::Reporter {
77
+ Status* status_ptr;
78
+ void Corruption(size_t /*bytes*/, const Status& st) override {
79
+ if (status_ptr->ok()) {
80
+ *status_ptr = st;
81
+ }
82
+ }
83
+ } reporter;
84
+ reporter.status_ptr = &s;
85
+ log::Reader reader(nullptr, std::move(file_reader), &reporter,
86
+ true /* checksum */, 0 /* log_number */);
87
+
88
+ // Read all records from the manifest file...
89
+ uint64_t manifest_file_size = std::numeric_limits<uint64_t>::max();
90
+ FileChecksumRetriever retriever(read_options, manifest_file_size,
91
+ *checksum_list);
92
+ retriever.Iterate(reader, &s);
93
+
94
+ return retriever.status();
95
+ }
96
+
43
97
  Status UpdateManifestForFilesState(
44
98
  const DBOptions& db_opts, const std::string& db_name,
45
99
  const std::vector<ColumnFamilyDescriptor>& column_families,