rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -65,11 +65,11 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
65
65
  largest = file_to_import.largest_internal_key;
66
66
  } else {
67
67
  if (cfd_->internal_comparator().Compare(
68
- smallest, file_to_import.smallest_internal_key) < 0) {
68
+ smallest, file_to_import.smallest_internal_key) > 0) {
69
69
  smallest = file_to_import.smallest_internal_key;
70
70
  }
71
71
  if (cfd_->internal_comparator().Compare(
72
- largest, file_to_import.largest_internal_key) > 0) {
72
+ largest, file_to_import.largest_internal_key) < 0) {
73
73
  largest = file_to_import.largest_internal_key;
74
74
  }
75
75
  }
@@ -175,22 +175,29 @@ Status ImportColumnFamilyJob::Run() {
175
175
  static_cast<uint64_t>(temp_current_time);
176
176
  }
177
177
 
178
- // Recover files' epoch number using dummy VersionStorageInfo
179
- VersionBuilder dummy_version_builder(
180
- cfd_->current()->version_set()->file_options(), cfd_->ioptions(),
181
- cfd_->table_cache(), cfd_->current()->storage_info(),
182
- cfd_->current()->version_set(),
183
- cfd_->GetFileMetadataCacheReservationManager());
184
- VersionStorageInfo dummy_vstorage(
185
- &cfd_->internal_comparator(), cfd_->user_comparator(),
186
- cfd_->NumberLevels(), cfd_->ioptions()->compaction_style,
187
- nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks,
188
- EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock,
189
- cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay,
190
- cfd_->current()->version_set()->offpeak_time_option());
191
178
  Status s;
192
-
179
+ // When importing multiple CFs, we should not reuse epoch number from ingested
180
+ // files. Since these epoch numbers were assigned by different CFs, there may
181
+ // be different files from different CFs with the same epoch number. With a
182
+ // subsequent intra-L0 compaction we may end up with files with overlapping
183
+ // key range but the same epoch number. Here we will create a dummy
184
+ // VersionStorageInfo per CF being imported. Each CF's files will be assigned
185
+ // increasing epoch numbers to avoid duplicated epoch number. This is done by
186
+ // only resetting epoch number of the new CF in the first call to
187
+ // RecoverEpochNumbers() below.
193
188
  for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) {
189
+ VersionBuilder dummy_version_builder(
190
+ cfd_->current()->version_set()->file_options(), cfd_->ioptions(),
191
+ cfd_->table_cache(), cfd_->current()->storage_info(),
192
+ cfd_->current()->version_set(),
193
+ cfd_->GetFileMetadataCacheReservationManager());
194
+ VersionStorageInfo dummy_vstorage(
195
+ &cfd_->internal_comparator(), cfd_->user_comparator(),
196
+ cfd_->NumberLevels(), cfd_->ioptions()->compaction_style,
197
+ nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks,
198
+ EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock,
199
+ cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay,
200
+ cfd_->current()->version_set()->offpeak_time_option());
194
201
  for (size_t j = 0; s.ok() && j < files_to_import_[i].size(); ++j) {
195
202
  const auto& f = files_to_import_[i][j];
196
203
  const auto& file_metadata = *metadatas_[i][j];
@@ -218,42 +225,39 @@ Status ImportColumnFamilyJob::Run() {
218
225
  f.table_properties.user_defined_timestamps_persisted));
219
226
  s = dummy_version_builder.Apply(&dummy_version_edit);
220
227
  }
221
- }
222
-
223
- if (s.ok()) {
224
- s = dummy_version_builder.SaveTo(&dummy_vstorage);
225
- }
226
- if (s.ok()) {
227
- dummy_vstorage.RecoverEpochNumbers(cfd_);
228
- }
229
-
230
- // Record changes from this CF import in VersionEdit, including files with
231
- // recovered epoch numbers
232
- if (s.ok()) {
233
- edit_.SetColumnFamily(cfd_->GetID());
234
-
228
+ if (s.ok()) {
229
+ s = dummy_version_builder.SaveTo(&dummy_vstorage);
230
+ }
231
+ if (s.ok()) {
232
+ // force resetting epoch number for each file
233
+ dummy_vstorage.RecoverEpochNumbers(cfd_, /*restart_epoch=*/i == 0,
234
+ /*force=*/true);
235
+ edit_.SetColumnFamily(cfd_->GetID());
236
+
237
+ for (int level = 0; level < dummy_vstorage.num_levels(); level++) {
238
+ for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
239
+ edit_.AddFile(level, *file_meta);
240
+ // If incoming sequence number is higher, update local sequence
241
+ // number.
242
+ if (file_meta->fd.largest_seqno > versions_->LastSequence()) {
243
+ versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno);
244
+ versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno);
245
+ versions_->SetLastSequence(file_meta->fd.largest_seqno);
246
+ }
247
+ }
248
+ }
249
+ }
250
+ // Release resources occupied by the dummy VersionStorageInfo
235
251
  for (int level = 0; level < dummy_vstorage.num_levels(); level++) {
236
252
  for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
237
- edit_.AddFile(level, *file_meta);
238
- // If incoming sequence number is higher, update local sequence number.
239
- if (file_meta->fd.largest_seqno > versions_->LastSequence()) {
240
- versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno);
241
- versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno);
242
- versions_->SetLastSequence(file_meta->fd.largest_seqno);
253
+ file_meta->refs--;
254
+ if (file_meta->refs <= 0) {
255
+ delete file_meta;
243
256
  }
244
257
  }
245
258
  }
246
259
  }
247
260
 
248
- // Release resources occupied by the dummy VersionStorageInfo
249
- for (int level = 0; level < dummy_vstorage.num_levels(); level++) {
250
- for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) {
251
- file_meta->refs--;
252
- if (file_meta->refs <= 0) {
253
- delete file_meta;
254
- }
255
- }
256
- }
257
261
  return s;
258
262
  }
259
263
 
@@ -16,6 +16,7 @@
16
16
  #include <limits>
17
17
  #include <sstream>
18
18
  #include <string>
19
+ #include <unordered_map>
19
20
  #include <utility>
20
21
  #include <vector>
21
22
 
@@ -33,7 +34,6 @@
33
34
 
34
35
  namespace ROCKSDB_NAMESPACE {
35
36
 
36
-
37
37
  const std::map<LevelStatType, LevelStat> InternalStats::compaction_level_stats =
38
38
  {
39
39
  {LevelStatType::NUM_FILES, LevelStat{"NumFiles", "Files"}},
@@ -2135,5 +2135,64 @@ void InternalStats::DumpCFFileHistogram(std::string* value) {
2135
2135
  value->append(oss.str());
2136
2136
  }
2137
2137
 
2138
+ namespace {
2139
+
2140
+ class SumPropertyAggregator : public IntPropertyAggregator {
2141
+ public:
2142
+ SumPropertyAggregator() : aggregated_value_(0) {}
2143
+ virtual ~SumPropertyAggregator() override = default;
2144
+
2145
+ void Add(ColumnFamilyData* cfd, uint64_t value) override {
2146
+ (void)cfd;
2147
+ aggregated_value_ += value;
2148
+ }
2149
+
2150
+ uint64_t Aggregate() const override { return aggregated_value_; }
2151
+
2152
+ private:
2153
+ uint64_t aggregated_value_;
2154
+ };
2155
+
2156
+ // A block cache may be shared by multiple column families.
2157
+ // BlockCachePropertyAggregator ensures that the same cache is only added once.
2158
+ class BlockCachePropertyAggregator : public IntPropertyAggregator {
2159
+ public:
2160
+ BlockCachePropertyAggregator() = default;
2161
+ virtual ~BlockCachePropertyAggregator() override = default;
2162
+
2163
+ void Add(ColumnFamilyData* cfd, uint64_t value) override {
2164
+ auto* table_factory = cfd->ioptions()->table_factory.get();
2165
+ assert(table_factory != nullptr);
2166
+ Cache* cache =
2167
+ table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
2168
+ if (cache != nullptr) {
2169
+ block_cache_properties_.emplace(cache, value);
2170
+ }
2171
+ }
2172
+
2173
+ uint64_t Aggregate() const override {
2174
+ uint64_t sum = 0;
2175
+ for (const auto& p : block_cache_properties_) {
2176
+ sum += p.second;
2177
+ }
2178
+ return sum;
2179
+ }
2180
+
2181
+ private:
2182
+ std::unordered_map<Cache*, uint64_t> block_cache_properties_;
2183
+ };
2184
+
2185
+ } // anonymous namespace
2186
+
2187
+ std::unique_ptr<IntPropertyAggregator> CreateIntPropertyAggregator(
2188
+ const Slice& property) {
2189
+ if (property == DB::Properties::kBlockCacheCapacity ||
2190
+ property == DB::Properties::kBlockCacheUsage ||
2191
+ property == DB::Properties::kBlockCachePinnedUsage) {
2192
+ return std::make_unique<BlockCachePropertyAggregator>();
2193
+ } else {
2194
+ return std::make_unique<SumPropertyAggregator>();
2195
+ }
2196
+ }
2138
2197
 
2139
2198
  } // namespace ROCKSDB_NAMESPACE
@@ -432,7 +432,7 @@ class InternalStats {
432
432
  explicit CompactionStatsFull() : stats(), penultimate_level_stats() {}
433
433
 
434
434
  explicit CompactionStatsFull(CompactionReason reason, int c)
435
- : stats(reason, c), penultimate_level_stats(reason, c){}
435
+ : stats(reason, c), penultimate_level_stats(reason, c) {}
436
436
 
437
437
  uint64_t TotalBytesWritten() const {
438
438
  uint64_t bytes_written = stats.bytes_written + stats.bytes_written_blob;
@@ -873,5 +873,24 @@ class InternalStats {
873
873
  uint64_t started_at_;
874
874
  };
875
875
 
876
+ // IntPropertyAggregator aggregates an integer property across all column
877
+ // families.
878
+ class IntPropertyAggregator {
879
+ public:
880
+ IntPropertyAggregator() {}
881
+ virtual ~IntPropertyAggregator() {}
882
+
883
+ IntPropertyAggregator(const IntPropertyAggregator&) = delete;
884
+ void operator=(const IntPropertyAggregator&) = delete;
885
+
886
+ // Add a column family's property value to the aggregator.
887
+ virtual void Add(ColumnFamilyData* cfd, uint64_t value) = 0;
888
+
889
+ // Get the aggregated value.
890
+ virtual uint64_t Aggregate() const = 0;
891
+ };
892
+
893
+ std::unique_ptr<IntPropertyAggregator> CreateIntPropertyAggregator(
894
+ const Slice& property);
876
895
 
877
896
  } // namespace ROCKSDB_NAMESPACE
@@ -232,7 +232,9 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
232
232
  // produce a hole in the recovered data. Report an error here, which
233
233
  // higher layers can choose to ignore when it's provable there is no
234
234
  // hole.
235
- ReportCorruption(scratch->size(), "error reading trailing data");
235
+ ReportCorruption(
236
+ scratch->size(),
237
+ "error reading trailing data due to encountering EOF");
236
238
  }
237
239
  // This can be caused by the writer dying immediately after
238
240
  // writing a physical record but before completing the next; don't
@@ -252,7 +254,9 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
252
254
  // produce a hole in the recovered data. Report an error here,
253
255
  // which higher layers can choose to ignore when it's provable
254
256
  // there is no hole.
255
- ReportCorruption(scratch->size(), "error reading trailing data");
257
+ ReportCorruption(
258
+ scratch->size(),
259
+ "error reading trailing data due to encountering old record");
256
260
  }
257
261
  // This can be caused by the writer dying immediately after
258
262
  // writing a physical record but before completing the next; don't
@@ -483,9 +487,11 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size,
483
487
  type == kRecyclableUserDefinedTimestampSizeType);
484
488
  if (is_recyclable_type) {
485
489
  header_size = kRecyclableHeaderSize;
486
- if (end_of_buffer_offset_ - buffer_.size() == 0) {
487
- recycled_ = true;
490
+ if (first_record_read_ && !recycled_) {
491
+ // A recycled log should have started with a recycled record
492
+ return kBadRecord;
488
493
  }
494
+ recycled_ = true;
489
495
  // We need enough for the larger header
490
496
  if (buffer_.size() < static_cast<size_t>(kRecyclableHeaderSize)) {
491
497
  int r = kEof;
@@ -863,9 +869,12 @@ bool FragmentBufferedReader::TryReadFragment(
863
869
  int header_size = kHeaderSize;
864
870
  if ((type >= kRecyclableFullType && type <= kRecyclableLastType) ||
865
871
  type == kRecyclableUserDefinedTimestampSizeType) {
866
- if (end_of_buffer_offset_ - buffer_.size() == 0) {
867
- recycled_ = true;
872
+ if (first_record_read_ && !recycled_) {
873
+ // A recycled log should have started with a recycled record
874
+ *fragment_type_or_err = kBadRecord;
875
+ return true;
868
876
  }
877
+ recycled_ = true;
869
878
  header_size = kRecyclableHeaderSize;
870
879
  while (buffer_.size() < static_cast<size_t>(kRecyclableHeaderSize)) {
871
880
  size_t old_size = buffer_.size();
@@ -27,6 +27,8 @@ Writer::Writer(std::unique_ptr<WritableFileWriter>&& dest, uint64_t log_number,
27
27
  block_offset_(0),
28
28
  log_number_(log_number),
29
29
  recycle_log_files_(recycle_log_files),
30
+ // Header size varies depending on whether we are recycling or not.
31
+ header_size_(recycle_log_files ? kRecyclableHeaderSize : kHeaderSize),
30
32
  manual_flush_(manual_flush),
31
33
  compression_type_(compression_type),
32
34
  compress_(nullptr) {
@@ -51,6 +53,14 @@ Writer::~Writer() {
51
53
 
52
54
  IOStatus Writer::WriteBuffer(const WriteOptions& write_options) {
53
55
  if (dest_->seen_error()) {
56
+ #ifndef NDEBUG
57
+ if (dest_->seen_injected_error()) {
58
+ std::stringstream msg;
59
+ msg << "Seen " << FaultInjectionTestFS::kInjected
60
+ << " error. Skip writing buffer.";
61
+ return IOStatus::IOError(msg.str());
62
+ }
63
+ #endif // NDEBUG
54
64
  return IOStatus::IOError("Seen error. Skip writing buffer.");
55
65
  }
56
66
  IOOptions opts;
@@ -72,18 +82,31 @@ IOStatus Writer::Close(const WriteOptions& write_options) {
72
82
  return s;
73
83
  }
74
84
 
85
+ bool Writer::PublishIfClosed() {
86
+ if (dest_->IsClosed()) {
87
+ dest_.reset();
88
+ return true;
89
+ } else {
90
+ return false;
91
+ }
92
+ }
93
+
75
94
  IOStatus Writer::AddRecord(const WriteOptions& write_options,
76
95
  const Slice& slice) {
77
96
  if (dest_->seen_error()) {
97
+ #ifndef NDEBUG
98
+ if (dest_->seen_injected_error()) {
99
+ std::stringstream msg;
100
+ msg << "Seen " << FaultInjectionTestFS::kInjected
101
+ << " error. Skip writing buffer.";
102
+ return IOStatus::IOError(msg.str());
103
+ }
104
+ #endif // NDEBUG
78
105
  return IOStatus::IOError("Seen error. Skip writing buffer.");
79
106
  }
80
107
  const char* ptr = slice.data();
81
108
  size_t left = slice.size();
82
109
 
83
- // Header size varies depending on whether we are recycling or not.
84
- const int header_size =
85
- recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize;
86
-
87
110
  // Fragment the record if necessary and emit it. Note that if slice
88
111
  // is empty, we still want to iterate once to emit a single
89
112
  // zero-length record
@@ -102,12 +125,12 @@ IOStatus Writer::AddRecord(const WriteOptions& write_options,
102
125
  do {
103
126
  const int64_t leftover = kBlockSize - block_offset_;
104
127
  assert(leftover >= 0);
105
- if (leftover < header_size) {
128
+ if (leftover < header_size_) {
106
129
  // Switch to a new block
107
130
  if (leftover > 0) {
108
131
  // Fill the trailer (literal below relies on kHeaderSize and
109
132
  // kRecyclableHeaderSize being <= 11)
110
- assert(header_size <= 11);
133
+ assert(header_size_ <= 11);
111
134
  s = dest_->Append(opts,
112
135
  Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
113
136
  static_cast<size_t>(leftover)),
@@ -120,9 +143,9 @@ IOStatus Writer::AddRecord(const WriteOptions& write_options,
120
143
  }
121
144
 
122
145
  // Invariant: we never leave < header_size bytes in a block.
123
- assert(static_cast<int64_t>(kBlockSize - block_offset_) >= header_size);
146
+ assert(static_cast<int64_t>(kBlockSize - block_offset_) >= header_size_);
124
147
 
125
- const size_t avail = kBlockSize - block_offset_ - header_size;
148
+ const size_t avail = kBlockSize - block_offset_ - header_size_;
126
149
 
127
150
  // Compress the record if compression is enabled.
128
151
  // Compress() is called at least once (compress_start=true) and after the
@@ -186,6 +209,14 @@ IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
186
209
  }
187
210
 
188
211
  if (dest_->seen_error()) {
212
+ #ifndef NDEBUG
213
+ if (dest_->seen_injected_error()) {
214
+ std::stringstream msg;
215
+ msg << "Seen " << FaultInjectionTestFS::kInjected
216
+ << " error. Skip writing buffer.";
217
+ return IOStatus::IOError(msg.str());
218
+ }
219
+ #endif // NDEBUG
189
220
  return IOStatus::IOError("Seen error. Skip writing buffer.");
190
221
  }
191
222
 
@@ -203,8 +234,7 @@ IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
203
234
  }
204
235
  }
205
236
  // Initialize fields required for compression
206
- const size_t max_output_buffer_len =
207
- kBlockSize - (recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize);
237
+ const size_t max_output_buffer_len = kBlockSize - header_size_;
208
238
  CompressionOptions opts;
209
239
  constexpr uint32_t compression_format_version = 2;
210
240
  compress_ = StreamingCompress::Create(compression_type_, opts,
@@ -244,6 +274,25 @@ IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
244
274
  record.EncodeTo(&encoded);
245
275
  RecordType type = recycle_log_files_ ? kRecyclableUserDefinedTimestampSizeType
246
276
  : kUserDefinedTimestampSizeType;
277
+
278
+ // If there's not enough space for this record, switch to a new block.
279
+ const int64_t leftover = kBlockSize - block_offset_;
280
+ if (leftover < header_size_ + (int)encoded.size()) {
281
+ IOOptions opts;
282
+ IOStatus s = WritableFileWriter::PrepareIOOptions(write_options, opts);
283
+ if (!s.ok()) {
284
+ return s;
285
+ }
286
+
287
+ std::vector<char> trailer(leftover, '\x00');
288
+ s = dest_->Append(opts, Slice(trailer.data(), trailer.size()));
289
+ if (!s.ok()) {
290
+ return s;
291
+ }
292
+
293
+ block_offset_ = 0;
294
+ }
295
+
247
296
  return EmitPhysicalRecord(write_options, type, encoded.data(),
248
297
  encoded.size());
249
298
  }
@@ -107,13 +107,21 @@ class Writer {
107
107
 
108
108
  IOStatus Close(const WriteOptions& write_options);
109
109
 
110
+ // If closing the writer through file(), call this afterwards to modify
111
+ // this object's state to reflect that. Returns true if the destination file
112
+ // has been closed. If it hasn't been closed, returns false with no change.
113
+ bool PublishIfClosed();
114
+
110
115
  bool BufferIsEmpty();
111
116
 
117
+ size_t TEST_block_offset() const { return block_offset_; }
118
+
112
119
  private:
113
120
  std::unique_ptr<WritableFileWriter> dest_;
114
121
  size_t block_offset_; // Current offset in block
115
122
  uint64_t log_number_;
116
123
  bool recycle_log_files_;
124
+ int header_size_;
117
125
 
118
126
  // crc32c values for all supported record types. These are
119
127
  // pre-computed to reduce the overhead of computing the crc of the
@@ -13,6 +13,7 @@
13
13
  #include <array>
14
14
  #include <limits>
15
15
  #include <memory>
16
+ #include <optional>
16
17
 
17
18
  #include "db/dbformat.h"
18
19
  #include "db/kv_checksum.h"
@@ -617,8 +618,9 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
617
618
  }
618
619
 
619
620
  void MemTable::ConstructFragmentedRangeTombstones() {
620
- assert(!IsFragmentedRangeTombstonesConstructed(false));
621
- // There should be no concurrent Construction
621
+ // There should be no concurrent Construction.
622
+ // We could also check fragmented_range_tombstone_list_ to avoid repeate
623
+ // constructions. We just construct them here again to be safe.
622
624
  if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
623
625
  // TODO: plumb Env::IOActivity, Env::IOPriority
624
626
  auto* unfragmented_iter = new MemTableIterator(
@@ -765,8 +767,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
765
767
  Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz_);
766
768
 
767
769
  if (!allow_concurrent) {
768
- // Extract prefix for insert with hint.
769
- if (insert_with_hint_prefix_extractor_ != nullptr &&
770
+ // Extract prefix for insert with hint. Hints are for point key table
771
+ // (`table_`) only, not `range_del_table_`.
772
+ if (table == table_ && insert_with_hint_prefix_extractor_ != nullptr &&
770
773
  insert_with_hint_prefix_extractor_->InDomain(key_slice)) {
771
774
  Slice prefix = insert_with_hint_prefix_extractor_->Transform(key_slice);
772
775
  bool res = table->InsertKeyWithHint(handle, &insert_hints_[prefix]);
@@ -927,21 +930,10 @@ struct Saver {
927
930
  } // anonymous namespace
928
931
 
929
932
  static bool SaveValue(void* arg, const char* entry) {
930
- TEST_SYNC_POINT_CALLBACK("Memtable::SaveValue:Begin:entry", &entry);
931
933
  Saver* s = static_cast<Saver*>(arg);
932
934
  assert(s != nullptr);
933
935
  assert(!s->value || !s->columns);
934
936
 
935
- if (s->protection_bytes_per_key > 0) {
936
- *(s->status) = MemTable::VerifyEntryChecksum(
937
- entry, s->protection_bytes_per_key, s->allow_data_in_errors);
938
- if (!s->status->ok()) {
939
- ROCKS_LOG_ERROR(s->logger, "In SaveValue: %s", s->status->getState());
940
- // Memtable entry corrupted
941
- return false;
942
- }
943
- }
944
-
945
937
  MergeContext* merge_context = s->merge_context;
946
938
  SequenceNumber max_covering_tombstone_seq = s->max_covering_tombstone_seq;
947
939
  const MergeOperator* merge_operator = s->merge_operator;
@@ -964,6 +956,22 @@ static bool SaveValue(void* arg, const char* entry) {
964
956
  if (user_comparator->EqualWithoutTimestamp(user_key_slice,
965
957
  s->key->user_key())) {
966
958
  // Correct user key
959
+ TEST_SYNC_POINT_CALLBACK("Memtable::SaveValue:Found:entry", &entry);
960
+ std::optional<ReadLock> read_lock;
961
+ if (s->inplace_update_support) {
962
+ read_lock.emplace(s->mem->GetLock(s->key->user_key()));
963
+ }
964
+
965
+ if (s->protection_bytes_per_key > 0) {
966
+ *(s->status) = MemTable::VerifyEntryChecksum(
967
+ entry, s->protection_bytes_per_key, s->allow_data_in_errors);
968
+ if (!s->status->ok()) {
969
+ ROCKS_LOG_ERROR(s->logger, "In SaveValue: %s", s->status->getState());
970
+ // Memtable entry corrupted
971
+ return false;
972
+ }
973
+ }
974
+
967
975
  const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
968
976
  ValueType type;
969
977
  SequenceNumber seq;
@@ -1034,10 +1042,6 @@ static bool SaveValue(void* arg, const char* entry) {
1034
1042
  return false;
1035
1043
  }
1036
1044
 
1037
- if (s->inplace_update_support) {
1038
- s->mem->GetLock(s->key->user_key())->ReadLock();
1039
- }
1040
-
1041
1045
  Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
1042
1046
 
1043
1047
  *(s->status) = Status::OK();
@@ -1048,10 +1052,6 @@ static bool SaveValue(void* arg, const char* entry) {
1048
1052
  s->columns->SetPlainValue(v);
1049
1053
  }
1050
1054
 
1051
- if (s->inplace_update_support) {
1052
- s->mem->GetLock(s->key->user_key())->ReadUnlock();
1053
- }
1054
-
1055
1055
  *(s->found_final_value) = true;
1056
1056
  *(s->is_blob_index) = true;
1057
1057
 
@@ -1059,10 +1059,6 @@ static bool SaveValue(void* arg, const char* entry) {
1059
1059
  }
1060
1060
  case kTypeValue:
1061
1061
  case kTypeValuePreferredSeqno: {
1062
- if (s->inplace_update_support) {
1063
- s->mem->GetLock(s->key->user_key())->ReadLock();
1064
- }
1065
-
1066
1062
  Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
1067
1063
 
1068
1064
  if (type == kTypeValuePreferredSeqno) {
@@ -1099,10 +1095,6 @@ static bool SaveValue(void* arg, const char* entry) {
1099
1095
  s->columns->SetPlainValue(v);
1100
1096
  }
1101
1097
 
1102
- if (s->inplace_update_support) {
1103
- s->mem->GetLock(s->key->user_key())->ReadUnlock();
1104
- }
1105
-
1106
1098
  *(s->found_final_value) = true;
1107
1099
 
1108
1100
  if (s->is_blob_index != nullptr) {
@@ -1112,10 +1104,6 @@ static bool SaveValue(void* arg, const char* entry) {
1112
1104
  return false;
1113
1105
  }
1114
1106
  case kTypeWideColumnEntity: {
1115
- if (s->inplace_update_support) {
1116
- s->mem->GetLock(s->key->user_key())->ReadLock();
1117
- }
1118
-
1119
1107
  Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
1120
1108
 
1121
1109
  *(s->status) = Status::OK();
@@ -1157,10 +1145,6 @@ static bool SaveValue(void* arg, const char* entry) {
1157
1145
  *(s->status) = s->columns->SetWideColumnValue(v);
1158
1146
  }
1159
1147
 
1160
- if (s->inplace_update_support) {
1161
- s->mem->GetLock(s->key->user_key())->ReadUnlock();
1162
- }
1163
-
1164
1148
  *(s->found_final_value) = true;
1165
1149
 
1166
1150
  if (s->is_blob_index != nullptr) {
@@ -1498,9 +1482,9 @@ Status MemTable::Update(SequenceNumber seq, ValueType value_type,
1498
1482
 
1499
1483
  // Update value, if new value size <= previous value size
1500
1484
  if (new_size <= prev_size) {
1485
+ WriteLock wl(GetLock(lkey.user_key()));
1501
1486
  char* p =
1502
1487
  EncodeVarint32(const_cast<char*>(key_ptr) + key_length, new_size);
1503
- WriteLock wl(GetLock(lkey.user_key()));
1504
1488
  memcpy(p, value.data(), value.size());
1505
1489
  assert((unsigned)((p + value.size()) - entry) ==
1506
1490
  (unsigned)(VarintLength(key_length) + key_length +
@@ -534,21 +534,21 @@ class MemTable {
534
534
  // Returns a heuristic flush decision
535
535
  bool ShouldFlushNow();
536
536
 
537
+ // Updates `fragmented_range_tombstone_list_` that will be used to serve reads
538
+ // when this memtable becomes an immutable memtable (in some
539
+ // MemtableListVersion::memlist_). Should be called when this memtable is
540
+ // about to become immutable. May be called multiple times since
541
+ // SwitchMemtable() may fail.
537
542
  void ConstructFragmentedRangeTombstones();
538
543
 
539
544
  // Returns whether a fragmented range tombstone list is already constructed
540
545
  // for this memtable. It should be constructed right before a memtable is
541
546
  // added to an immutable memtable list. Note that if a memtable does not have
542
- // any range tombstone, then no range tombstone list will ever be constructed.
543
- // @param allow_empty Specifies whether a memtable with no range tombstone is
544
- // considered to have its fragmented range tombstone list constructed.
545
- bool IsFragmentedRangeTombstonesConstructed(bool allow_empty = true) const {
546
- if (allow_empty) {
547
- return fragmented_range_tombstone_list_.get() != nullptr ||
548
- is_range_del_table_empty_;
549
- } else {
550
- return fragmented_range_tombstone_list_.get() != nullptr;
551
- }
547
+ // any range tombstone, then no range tombstone list will ever be constructed
548
+ // and true is returned in that case.
549
+ bool IsFragmentedRangeTombstonesConstructed() const {
550
+ return fragmented_range_tombstone_list_.get() != nullptr ||
551
+ is_range_del_table_empty_;
552
552
  }
553
553
 
554
554
  // Get the newest user-defined timestamp contained in this MemTable. Check