@nxtedition/rocksdb 8.2.8 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (359) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
  2. package/deps/rocksdb/rocksdb/Makefile +22 -19
  3. package/deps/rocksdb/rocksdb/TARGETS +8 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
  6. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
  7. package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
  8. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
  9. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
  12. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
  13. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
  15. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
  16. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
  18. package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
  19. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
  22. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
  23. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
  24. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
  25. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
  26. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
  27. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
  29. package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
  30. package/deps/rocksdb/rocksdb/db/c.cc +169 -6
  31. package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
  33. package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
  34. package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
  35. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
  50. package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
  51. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
  52. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
  53. package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
  54. package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
  55. package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
  56. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
  57. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
  58. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
  59. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
  60. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
  61. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
  62. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
  63. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
  64. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
  65. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
  66. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
  67. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
  76. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
  77. package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
  78. package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
  80. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
  81. package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
  82. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
  83. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
  84. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
  85. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
  86. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
  87. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
  88. package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
  89. package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
  90. package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
  91. package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
  92. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
  93. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
  94. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
  95. package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
  96. package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
  97. package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
  98. package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
  99. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
  100. package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
  101. package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
  102. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
  103. package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
  104. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
  107. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
  108. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
  110. package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
  111. package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
  112. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
  113. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
  114. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
  115. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
  116. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
  117. package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
  118. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
  119. package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
  120. package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
  121. package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
  122. package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
  123. package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
  124. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
  125. package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
  126. package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
  127. package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
  128. package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
  129. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
  130. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
  131. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
  132. package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
  133. package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
  134. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
  135. package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
  136. package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
  137. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
  138. package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
  139. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
  140. package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
  141. package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
  142. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
  143. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
  144. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
  145. package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
  146. package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
  147. package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
  148. package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
  149. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  150. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
  151. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
  152. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
  153. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
  154. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
  155. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
  156. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
  157. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
  158. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
  159. package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
  160. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
  161. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
  162. package/deps/rocksdb/rocksdb/env/env.cc +1 -2
  163. package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
  164. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
  165. package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
  166. package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
  167. package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
  168. package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
  169. package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
  170. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
  171. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
  172. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
  173. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
  174. package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
  175. package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
  176. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
  177. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
  178. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
  179. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
  180. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
  181. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
  182. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
  183. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
  184. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
  185. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
  186. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
  187. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
  188. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
  189. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
  190. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
  191. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
  192. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
  193. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
  194. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
  195. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
  196. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
  197. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
  198. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
  199. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
  200. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
  201. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
  202. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
  203. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
  204. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
  205. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  206. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
  207. package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
  208. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
  209. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
  210. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
  211. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
  212. package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
  213. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
  214. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
  215. package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
  216. package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
  217. package/deps/rocksdb/rocksdb/options/options.cc +15 -1
  218. package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
  219. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
  220. package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
  221. package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
  222. package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
  223. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
  224. package/deps/rocksdb/rocksdb/src.mk +3 -0
  225. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
  226. package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
  227. package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
  228. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
  229. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
  230. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
  231. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
  232. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
  233. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
  234. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
  235. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
  236. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
  237. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
  238. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
  239. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
  240. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
  241. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
  242. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
  243. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
  244. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  245. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
  246. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
  247. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
  248. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
  249. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
  250. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
  251. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
  252. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
  253. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
  254. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
  255. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
  256. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
  257. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
  258. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
  259. package/deps/rocksdb/rocksdb/table/format.cc +175 -33
  260. package/deps/rocksdb/rocksdb/table/format.h +63 -10
  261. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
  262. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
  263. package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
  264. package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
  265. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
  266. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
  267. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
  268. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
  269. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
  270. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
  271. package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
  272. package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
  273. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
  274. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
  275. package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
  276. package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
  277. package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
  278. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
  279. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  280. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
  281. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
  282. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
  283. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
  284. package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
  285. package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
  286. package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
  287. package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
  288. package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
  289. package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
  290. package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
  291. package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
  292. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
  293. package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
  294. package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
  295. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
  296. package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
  297. package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
  298. package/deps/rocksdb/rocksdb/util/compression.h +110 -32
  299. package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
  300. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
  301. package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
  302. package/deps/rocksdb/rocksdb/util/hash.h +7 -3
  303. package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
  304. package/deps/rocksdb/rocksdb/util/math.h +58 -6
  305. package/deps/rocksdb/rocksdb/util/math128.h +29 -7
  306. package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
  307. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
  308. package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
  309. package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
  310. package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
  311. package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
  312. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
  313. package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
  314. package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
  315. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
  316. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
  317. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
  318. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
  319. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
  320. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
  321. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
  322. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
  323. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
  324. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
  325. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
  326. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
  327. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
  328. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
  329. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
  330. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
  331. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
  332. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
  333. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
  334. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
  335. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
  336. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
  337. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
  338. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
  339. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
  340. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
  341. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
  342. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
  343. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
  344. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
  345. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
  346. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
  347. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
  348. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
  349. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
  350. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
  351. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
  352. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
  353. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
  354. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
  355. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
  356. package/deps/rocksdb/rocksdb.gyp +2 -0
  357. package/package.json +1 -1
  358. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  359. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -9,8 +9,18 @@
9
9
 
10
10
  #include "cache/clock_cache.h"
11
11
 
12
+ #include <algorithm>
13
+ #include <atomic>
14
+ #include <bitset>
15
+ #include <cassert>
16
+ #include <cstddef>
17
+ #include <cstdint>
18
+ #include <exception>
12
19
  #include <functional>
13
20
  #include <numeric>
21
+ #include <string>
22
+ #include <thread>
23
+ #include <type_traits>
14
24
 
15
25
  #include "cache/cache_key.h"
16
26
  #include "cache/secondary_cache_adapter.h"
@@ -72,6 +82,18 @@ inline void FreeDataMarkEmpty(ClockHandle& h, MemoryAllocator* allocator) {
72
82
  MarkEmpty(h);
73
83
  }
74
84
 
85
+ // Called to undo the effect of referencing an entry for internal purposes,
86
+ // so it should not be marked as having been used.
87
+ inline void Unref(const ClockHandle& h, uint64_t count = 1) {
88
+ // Pretend we never took the reference
89
+ // WART: there's a tiny chance we release last ref to invisible
90
+ // entry here. If that happens, we let eviction take care of it.
91
+ uint64_t old_meta = h.meta.fetch_sub(ClockHandle::kAcquireIncrement * count,
92
+ std::memory_order_release);
93
+ assert(GetRefcount(old_meta) != 0);
94
+ (void)old_meta;
95
+ }
96
+
75
97
  inline bool ClockUpdate(ClockHandle& h) {
76
98
  uint64_t meta = h.meta.load(std::memory_order_relaxed);
77
99
 
@@ -79,8 +101,6 @@ inline bool ClockUpdate(ClockHandle& h) {
79
101
  (meta >> ClockHandle::kAcquireCounterShift) & ClockHandle::kCounterMask;
80
102
  uint64_t release_count =
81
103
  (meta >> ClockHandle::kReleaseCounterShift) & ClockHandle::kCounterMask;
82
- // fprintf(stderr, "ClockUpdate @ %p: %lu %lu %u\n", &h, acquire_count,
83
- // release_count, (unsigned)(meta >> ClockHandle::kStateShift));
84
104
  if (acquire_count != release_count) {
85
105
  // Only clock update entries with no outstanding refs
86
106
  return false;
@@ -98,6 +118,7 @@ inline bool ClockUpdate(ClockHandle& h) {
98
118
  // not aggressively
99
119
  uint64_t new_meta =
100
120
  (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) |
121
+ (meta & ClockHandle::kHitBitMask) |
101
122
  (new_count << ClockHandle::kReleaseCounterShift) |
102
123
  (new_count << ClockHandle::kAcquireCounterShift);
103
124
  h.meta.compare_exchange_strong(meta, new_meta, std::memory_order_relaxed);
@@ -105,10 +126,11 @@ inline bool ClockUpdate(ClockHandle& h) {
105
126
  }
106
127
  // Otherwise, remove entry (either unreferenced invisible or
107
128
  // unreferenced and expired visible).
108
- if (h.meta.compare_exchange_strong(
109
- meta,
110
- uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift,
111
- std::memory_order_acquire)) {
129
+ if (h.meta.compare_exchange_strong(meta,
130
+ (uint64_t{ClockHandle::kStateConstruction}
131
+ << ClockHandle::kStateShift) |
132
+ (meta & ClockHandle::kHitBitMask),
133
+ std::memory_order_acquire)) {
112
134
  // Took ownership.
113
135
  return true;
114
136
  } else {
@@ -118,74 +140,6 @@ inline bool ClockUpdate(ClockHandle& h) {
118
140
  }
119
141
  }
120
142
 
121
- } // namespace
122
-
123
- void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
124
- if (helper->del_cb) {
125
- helper->del_cb(value, allocator);
126
- }
127
- }
128
-
129
- HyperClockTable::HyperClockTable(
130
- size_t capacity, bool /*strict_capacity_limit*/,
131
- CacheMetadataChargePolicy metadata_charge_policy,
132
- MemoryAllocator* allocator,
133
- const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
134
- const Opts& opts)
135
- : length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
136
- metadata_charge_policy)),
137
- length_bits_mask_((size_t{1} << length_bits_) - 1),
138
- occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
139
- kStrictLoadFactor)),
140
- array_(new HandleImpl[size_t{1} << length_bits_]),
141
- allocator_(allocator),
142
- eviction_callback_(*eviction_callback),
143
- hash_seed_(*hash_seed) {
144
- if (metadata_charge_policy ==
145
- CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
146
- usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
147
- }
148
-
149
- static_assert(sizeof(HandleImpl) == 64U,
150
- "Expecting size / alignment with common cache line size");
151
- }
152
-
153
- HyperClockTable::~HyperClockTable() {
154
- // Assumes there are no references or active operations on any slot/element
155
- // in the table.
156
- for (size_t i = 0; i < GetTableSize(); i++) {
157
- HandleImpl& h = array_[i];
158
- switch (h.meta >> ClockHandle::kStateShift) {
159
- case ClockHandle::kStateEmpty:
160
- // noop
161
- break;
162
- case ClockHandle::kStateInvisible: // rare but possible
163
- case ClockHandle::kStateVisible:
164
- assert(GetRefcount(h.meta) == 0);
165
- h.FreeData(allocator_);
166
- #ifndef NDEBUG
167
- Rollback(h.hashed_key, &h);
168
- ReclaimEntryUsage(h.GetTotalCharge());
169
- #endif
170
- break;
171
- // otherwise
172
- default:
173
- assert(false);
174
- break;
175
- }
176
- }
177
-
178
- #ifndef NDEBUG
179
- for (size_t i = 0; i < GetTableSize(); i++) {
180
- assert(array_[i].displacements.load() == 0);
181
- }
182
- #endif
183
-
184
- assert(usage_.load() == 0 ||
185
- usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
186
- assert(occupancy_ == 0);
187
- }
188
-
189
143
  // If an entry doesn't receive clock updates but is repeatedly referenced &
190
144
  // released, the acquire and release counters could overflow without some
191
145
  // intervention. This is that intervention, which should be inexpensive
@@ -259,8 +213,202 @@ inline void CorrectNearOverflow(uint64_t old_meta,
259
213
  }
260
214
  }
261
215
 
262
- inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
263
- size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
216
+ inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
217
+ uint64_t initial_countdown, bool* already_matches) {
218
+ assert(*already_matches == false);
219
+ // Optimistically transition the slot from "empty" to
220
+ // "under construction" (no effect on other states)
221
+ uint64_t old_meta = h.meta.fetch_or(
222
+ uint64_t{ClockHandle::kStateOccupiedBit} << ClockHandle::kStateShift,
223
+ std::memory_order_acq_rel);
224
+ uint64_t old_state = old_meta >> ClockHandle::kStateShift;
225
+
226
+ if (old_state == ClockHandle::kStateEmpty) {
227
+ // We've started inserting into an available slot, and taken
228
+ // ownership.
229
+ return true;
230
+ } else if (old_state != ClockHandle::kStateVisible) {
231
+ // Slot not usable / touchable now
232
+ return false;
233
+ }
234
+ // Existing, visible entry, which might be a match.
235
+ // But first, we need to acquire a ref to read it. In fact, number of
236
+ // refs for initial countdown, so that we boost the clock state if
237
+ // this is a match.
238
+ old_meta =
239
+ h.meta.fetch_add(ClockHandle::kAcquireIncrement * initial_countdown,
240
+ std::memory_order_acq_rel);
241
+ // Like Lookup
242
+ if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) {
243
+ // Acquired a read reference
244
+ if (h.hashed_key == proto.hashed_key) {
245
+ // Match. Release in a way that boosts the clock state
246
+ old_meta =
247
+ h.meta.fetch_add(ClockHandle::kReleaseIncrement * initial_countdown,
248
+ std::memory_order_acq_rel);
249
+ // Correct for possible (but rare) overflow
250
+ CorrectNearOverflow(old_meta, h.meta);
251
+ // Insert detached instead (only if return handle needed)
252
+ *already_matches = true;
253
+ return false;
254
+ } else {
255
+ // Mismatch.
256
+ Unref(h, initial_countdown);
257
+ }
258
+ } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
259
+ ClockHandle::kStateInvisible)) {
260
+ // Pretend we never took the reference
261
+ Unref(h, initial_countdown);
262
+ } else {
263
+ // For other states, incrementing the acquire counter has no effect
264
+ // so we don't need to undo it.
265
+ // Slot not usable / touchable now.
266
+ }
267
+ return false;
268
+ }
269
+
270
+ inline void FinishSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h,
271
+ uint64_t initial_countdown, bool keep_ref) {
272
+ // Save data fields
273
+ ClockHandleBasicData* h_alias = &h;
274
+ *h_alias = proto;
275
+
276
+ // Transition from "under construction" state to "visible" state
277
+ uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
278
+ << ClockHandle::kStateShift;
279
+
280
+ // Maybe with an outstanding reference
281
+ new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
282
+ new_meta |= (initial_countdown - keep_ref)
283
+ << ClockHandle::kReleaseCounterShift;
284
+
285
+ #ifndef NDEBUG
286
+ // Save the state transition, with assertion
287
+ uint64_t old_meta = h.meta.exchange(new_meta, std::memory_order_release);
288
+ assert(old_meta >> ClockHandle::kStateShift ==
289
+ ClockHandle::kStateConstruction);
290
+ #else
291
+ // Save the state transition
292
+ h.meta.store(new_meta, std::memory_order_release);
293
+ #endif
294
+ }
295
+
296
+ bool TryInsert(const ClockHandleBasicData& proto, ClockHandle& h,
297
+ uint64_t initial_countdown, bool keep_ref,
298
+ bool* already_matches) {
299
+ bool b = BeginSlotInsert(proto, h, initial_countdown, already_matches);
300
+ if (b) {
301
+ FinishSlotInsert(proto, h, initial_countdown, keep_ref);
302
+ }
303
+ return b;
304
+ }
305
+
306
+ // Func must be const HandleImpl& -> void callable
307
+ template <class HandleImpl, class Func>
308
+ void ConstApplyToEntriesRange(const Func& func, const HandleImpl* begin,
309
+ const HandleImpl* end,
310
+ bool apply_if_will_be_deleted) {
311
+ uint64_t check_state_mask = ClockHandle::kStateShareableBit;
312
+ if (!apply_if_will_be_deleted) {
313
+ check_state_mask |= ClockHandle::kStateVisibleBit;
314
+ }
315
+
316
+ for (const HandleImpl* h = begin; h < end; ++h) {
317
+ // Note: to avoid using compare_exchange, we have to be extra careful.
318
+ uint64_t old_meta = h->meta.load(std::memory_order_relaxed);
319
+ // Check if it's an entry visible to lookups
320
+ if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
321
+ // Increment acquire counter. Note: it's possible that the entry has
322
+ // completely changed since we loaded old_meta, but incrementing acquire
323
+ // count is always safe. (Similar to optimistic Lookup here.)
324
+ old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement,
325
+ std::memory_order_acquire);
326
+ // Check whether we actually acquired a reference.
327
+ if ((old_meta >> ClockHandle::kStateShift) &
328
+ ClockHandle::kStateShareableBit) {
329
+ // Apply func if appropriate
330
+ if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
331
+ func(*h);
332
+ }
333
+ // Pretend we never took the reference
334
+ Unref(*h);
335
+ // No net change, so don't need to check for overflow
336
+ } else {
337
+ // For other states, incrementing the acquire counter has no effect
338
+ // so we don't need to undo it. Furthermore, we cannot safely undo
339
+ // it because we did not acquire a read reference to lock the
340
+ // entry in a Shareable state.
341
+ }
342
+ }
343
+ }
344
+ }
345
+
346
+ } // namespace
347
+
348
+ void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const {
349
+ if (helper->del_cb) {
350
+ helper->del_cb(value, allocator);
351
+ }
352
+ }
353
+
354
+ template <class HandleImpl>
355
+ HandleImpl* BaseClockTable::StandaloneInsert(
356
+ const ClockHandleBasicData& proto) {
357
+ // Heap allocated separate from table
358
+ HandleImpl* h = new HandleImpl();
359
+ ClockHandleBasicData* h_alias = h;
360
+ *h_alias = proto;
361
+ h->SetStandalone();
362
+ // Single reference (standalone entries only created if returning a refed
363
+ // Handle back to user)
364
+ uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
365
+ << ClockHandle::kStateShift;
366
+ meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
367
+ h->meta.store(meta, std::memory_order_release);
368
+ // Keep track of how much of usage is standalone
369
+ standalone_usage_.fetch_add(proto.GetTotalCharge(),
370
+ std::memory_order_relaxed);
371
+ return h;
372
+ }
373
+
374
+ template <class Table>
375
+ typename Table::HandleImpl* BaseClockTable::CreateStandalone(
376
+ ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit,
377
+ bool allow_uncharged) {
378
+ Table& derived = static_cast<Table&>(*this);
379
+ typename Table::InsertState state;
380
+ derived.StartInsert(state);
381
+
382
+ const size_t total_charge = proto.GetTotalCharge();
383
+ if (strict_capacity_limit) {
384
+ Status s = ChargeUsageMaybeEvictStrict<Table>(
385
+ total_charge, capacity,
386
+ /*need_evict_for_occupancy=*/false, state);
387
+ if (!s.ok()) {
388
+ if (allow_uncharged) {
389
+ proto.total_charge = 0;
390
+ } else {
391
+ return nullptr;
392
+ }
393
+ }
394
+ } else {
395
+ // Case strict_capacity_limit == false
396
+ bool success = ChargeUsageMaybeEvictNonStrict<Table>(
397
+ total_charge, capacity,
398
+ /*need_evict_for_occupancy=*/false, state);
399
+ if (!success) {
400
+ // Force the issue
401
+ usage_.fetch_add(total_charge, std::memory_order_relaxed);
402
+ }
403
+ }
404
+
405
+ return StandaloneInsert<typename Table::HandleImpl>(proto);
406
+ }
407
+
408
+ template <class Table>
409
+ Status BaseClockTable::ChargeUsageMaybeEvictStrict(
410
+ size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
411
+ typename Table::InsertState& state) {
264
412
  if (total_charge > capacity) {
265
413
  return Status::MemoryLimit(
266
414
  "Cache entry too large for a single cache shard: " +
@@ -269,14 +417,14 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
269
417
  // Grab any available capacity, and free up any more required.
270
418
  size_t old_usage = usage_.load(std::memory_order_relaxed);
271
419
  size_t new_usage;
272
- if (LIKELY(old_usage != capacity)) {
273
- do {
274
- new_usage = std::min(capacity, old_usage + total_charge);
275
- } while (!usage_.compare_exchange_weak(old_usage, new_usage,
276
- std::memory_order_relaxed));
277
- } else {
278
- new_usage = old_usage;
279
- }
420
+ do {
421
+ new_usage = std::min(capacity, old_usage + total_charge);
422
+ if (new_usage == old_usage) {
423
+ // No change needed
424
+ break;
425
+ }
426
+ } while (!usage_.compare_exchange_weak(old_usage, new_usage,
427
+ std::memory_order_relaxed));
280
428
  // How much do we need to evict then?
281
429
  size_t need_evict_charge = old_usage + total_charge - new_usage;
282
430
  size_t request_evict_charge = need_evict_charge;
@@ -285,21 +433,20 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
285
433
  request_evict_charge = 1;
286
434
  }
287
435
  if (request_evict_charge > 0) {
288
- size_t evicted_charge = 0;
289
- size_t evicted_count = 0;
290
- Evict(request_evict_charge, &evicted_charge, &evicted_count);
291
- occupancy_.fetch_sub(evicted_count, std::memory_order_release);
292
- if (LIKELY(evicted_charge > need_evict_charge)) {
293
- assert(evicted_count > 0);
436
+ EvictionData data;
437
+ static_cast<Table*>(this)->Evict(request_evict_charge, state, &data);
438
+ occupancy_.fetch_sub(data.freed_count, std::memory_order_release);
439
+ if (LIKELY(data.freed_charge > need_evict_charge)) {
440
+ assert(data.freed_count > 0);
294
441
  // Evicted more than enough
295
- usage_.fetch_sub(evicted_charge - need_evict_charge,
442
+ usage_.fetch_sub(data.freed_charge - need_evict_charge,
296
443
  std::memory_order_relaxed);
297
- } else if (evicted_charge < need_evict_charge ||
298
- (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) {
444
+ } else if (data.freed_charge < need_evict_charge ||
445
+ (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0)) {
299
446
  // Roll back to old usage minus evicted
300
- usage_.fetch_sub(evicted_charge + (new_usage - old_usage),
447
+ usage_.fetch_sub(data.freed_charge + (new_usage - old_usage),
301
448
  std::memory_order_relaxed);
302
- if (evicted_charge < need_evict_charge) {
449
+ if (data.freed_charge < need_evict_charge) {
303
450
  return Status::MemoryLimit(
304
451
  "Insert failed because unable to evict entries to stay within "
305
452
  "capacity limit.");
@@ -311,13 +458,15 @@ inline Status HyperClockTable::ChargeUsageMaybeEvictStrict(
311
458
  }
312
459
  // If we needed to evict something and we are proceeding, we must have
313
460
  // evicted something.
314
- assert(evicted_count > 0);
461
+ assert(data.freed_count > 0);
315
462
  }
316
463
  return Status::OK();
317
464
  }
318
465
 
319
- inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
320
- size_t total_charge, size_t capacity, bool need_evict_for_occupancy) {
466
+ template <class Table>
467
+ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict(
468
+ size_t total_charge, size_t capacity, bool need_evict_for_occupancy,
469
+ typename Table::InsertState& state) {
321
470
  // For simplicity, we consider that either the cache can accept the insert
322
471
  // with no evictions, or we must evict enough to make (at least) enough
323
472
  // space. It could lead to unnecessary failures or excessive evictions in
@@ -351,76 +500,85 @@ inline bool HyperClockTable::ChargeUsageMaybeEvictNonStrict(
351
500
  // deal with occupancy
352
501
  need_evict_charge = 1;
353
502
  }
354
- size_t evicted_charge = 0;
355
- size_t evicted_count = 0;
503
+ EvictionData data;
356
504
  if (need_evict_charge > 0) {
357
- Evict(need_evict_charge, &evicted_charge, &evicted_count);
505
+ static_cast<Table*>(this)->Evict(need_evict_charge, state, &data);
358
506
  // Deal with potential occupancy deficit
359
- if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) {
360
- assert(evicted_charge == 0);
507
+ if (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0) {
508
+ assert(data.freed_charge == 0);
361
509
  // Can't meet occupancy requirement
362
510
  return false;
363
511
  } else {
364
512
  // Update occupancy for evictions
365
- occupancy_.fetch_sub(evicted_count, std::memory_order_release);
513
+ occupancy_.fetch_sub(data.freed_count, std::memory_order_release);
366
514
  }
367
515
  }
368
516
  // Track new usage even if we weren't able to evict enough
369
- usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed);
517
+ usage_.fetch_add(total_charge - data.freed_charge, std::memory_order_relaxed);
370
518
  // No underflow
371
519
  assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
372
520
  // Success
373
521
  return true;
374
522
  }
375
523
 
376
- inline HyperClockTable::HandleImpl* HyperClockTable::StandaloneInsert(
377
- const ClockHandleBasicData& proto) {
378
- // Heap allocated separate from table
379
- HandleImpl* h = new HandleImpl();
380
- ClockHandleBasicData* h_alias = h;
381
- *h_alias = proto;
382
- h->SetStandalone();
383
- // Single reference (standalone entries only created if returning a refed
384
- // Handle back to user)
385
- uint64_t meta = uint64_t{ClockHandle::kStateInvisible}
386
- << ClockHandle::kStateShift;
387
- meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift;
388
- h->meta.store(meta, std::memory_order_release);
389
- // Keep track of how much of usage is standalone
390
- standalone_usage_.fetch_add(proto.GetTotalCharge(),
391
- std::memory_order_relaxed);
392
- return h;
524
+ void BaseClockTable::TrackAndReleaseEvictedEntry(
525
+ ClockHandle* h, BaseClockTable::EvictionData* data) {
526
+ data->freed_charge += h->GetTotalCharge();
527
+ data->freed_count += 1;
528
+
529
+ bool took_value_ownership = false;
530
+ if (eviction_callback_) {
531
+ // For key reconstructed from hash
532
+ UniqueId64x2 unhashed;
533
+ took_value_ownership = eviction_callback_(
534
+ ClockCacheShard<FixedHyperClockTable>::ReverseHash(
535
+ h->GetHash(), &unhashed, hash_seed_),
536
+ reinterpret_cast<Cache::Handle*>(h),
537
+ h->meta.load(std::memory_order_relaxed) & ClockHandle::kHitBitMask);
538
+ }
539
+ if (!took_value_ownership) {
540
+ h->FreeData(allocator_);
541
+ }
542
+ MarkEmpty(*h);
393
543
  }
394
544
 
395
- Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
396
- HandleImpl** handle, Cache::Priority priority,
397
- size_t capacity, bool strict_capacity_limit) {
545
+ template <class Table>
546
+ Status BaseClockTable::Insert(const ClockHandleBasicData& proto,
547
+ typename Table::HandleImpl** handle,
548
+ Cache::Priority priority, size_t capacity,
549
+ bool strict_capacity_limit) {
550
+ using HandleImpl = typename Table::HandleImpl;
551
+ Table& derived = static_cast<Table&>(*this);
552
+
553
+ typename Table::InsertState state;
554
+ derived.StartInsert(state);
555
+
398
556
  // Do we have the available occupancy? Optimistically assume we do
399
557
  // and deal with it if we don't.
400
558
  size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire);
401
- auto revert_occupancy_fn = [&]() {
402
- occupancy_.fetch_sub(1, std::memory_order_relaxed);
403
- };
404
559
  // Whether we over-committed and need an eviction to make up for it
405
- bool need_evict_for_occupancy = old_occupancy >= occupancy_limit_;
560
+ bool need_evict_for_occupancy =
561
+ !derived.GrowIfNeeded(old_occupancy + 1, state);
406
562
 
407
563
  // Usage/capacity handling is somewhat different depending on
408
564
  // strict_capacity_limit, but mostly pessimistic.
409
565
  bool use_standalone_insert = false;
410
566
  const size_t total_charge = proto.GetTotalCharge();
411
567
  if (strict_capacity_limit) {
412
- Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity,
413
- need_evict_for_occupancy);
568
+ Status s = ChargeUsageMaybeEvictStrict<Table>(
569
+ total_charge, capacity, need_evict_for_occupancy, state);
414
570
  if (!s.ok()) {
415
- revert_occupancy_fn();
571
+ // Revert occupancy
572
+ occupancy_.fetch_sub(1, std::memory_order_relaxed);
416
573
  return s;
417
574
  }
418
575
  } else {
419
576
  // Case strict_capacity_limit == false
420
- bool success = ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
421
- need_evict_for_occupancy);
577
+ bool success = ChargeUsageMaybeEvictNonStrict<Table>(
578
+ total_charge, capacity, need_evict_for_occupancy, state);
422
579
  if (!success) {
423
- revert_occupancy_fn();
580
+ // Revert occupancy
581
+ occupancy_.fetch_sub(1, std::memory_order_relaxed);
424
582
  if (handle == nullptr) {
425
583
  // Don't insert the entry but still return ok, as if the entry
426
584
  // inserted into cache and evicted immediately.
@@ -433,11 +591,6 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
433
591
  }
434
592
  }
435
593
  }
436
- auto revert_usage_fn = [&]() {
437
- usage_.fetch_sub(total_charge, std::memory_order_relaxed);
438
- // No underflow
439
- assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
440
- };
441
594
 
442
595
  if (!use_standalone_insert) {
443
596
  // Attempt a table insert, but abort if we find an existing entry for the
@@ -451,129 +604,37 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
451
604
  uint64_t initial_countdown = GetInitialCountdown(priority);
452
605
  assert(initial_countdown > 0);
453
606
 
454
- size_t probe = 0;
455
- HandleImpl* e = FindSlot(
456
- proto.hashed_key,
457
- [&](HandleImpl* h) {
458
- // Optimistically transition the slot from "empty" to
459
- // "under construction" (no effect on other states)
460
- uint64_t old_meta =
461
- h->meta.fetch_or(uint64_t{ClockHandle::kStateOccupiedBit}
462
- << ClockHandle::kStateShift,
463
- std::memory_order_acq_rel);
464
- uint64_t old_state = old_meta >> ClockHandle::kStateShift;
465
-
466
- if (old_state == ClockHandle::kStateEmpty) {
467
- // We've started inserting into an available slot, and taken
468
- // ownership Save data fields
469
- ClockHandleBasicData* h_alias = h;
470
- *h_alias = proto;
471
-
472
- // Transition from "under construction" state to "visible" state
473
- uint64_t new_meta = uint64_t{ClockHandle::kStateVisible}
474
- << ClockHandle::kStateShift;
475
-
476
- // Maybe with an outstanding reference
477
- new_meta |= initial_countdown << ClockHandle::kAcquireCounterShift;
478
- new_meta |= (initial_countdown - (handle != nullptr))
479
- << ClockHandle::kReleaseCounterShift;
607
+ HandleImpl* e =
608
+ derived.DoInsert(proto, initial_countdown, handle != nullptr, state);
480
609
 
481
- #ifndef NDEBUG
482
- // Save the state transition, with assertion
483
- old_meta = h->meta.exchange(new_meta, std::memory_order_release);
484
- assert(old_meta >> ClockHandle::kStateShift ==
485
- ClockHandle::kStateConstruction);
486
- #else
487
- // Save the state transition
488
- h->meta.store(new_meta, std::memory_order_release);
489
- #endif
490
- return true;
491
- } else if (old_state != ClockHandle::kStateVisible) {
492
- // Slot not usable / touchable now
493
- return false;
494
- }
495
- // Existing, visible entry, which might be a match.
496
- // But first, we need to acquire a ref to read it. In fact, number of
497
- // refs for initial countdown, so that we boost the clock state if
498
- // this is a match.
499
- old_meta = h->meta.fetch_add(
500
- ClockHandle::kAcquireIncrement * initial_countdown,
501
- std::memory_order_acq_rel);
502
- // Like Lookup
503
- if ((old_meta >> ClockHandle::kStateShift) ==
504
- ClockHandle::kStateVisible) {
505
- // Acquired a read reference
506
- if (h->hashed_key == proto.hashed_key) {
507
- // Match. Release in a way that boosts the clock state
508
- old_meta = h->meta.fetch_add(
509
- ClockHandle::kReleaseIncrement * initial_countdown,
510
- std::memory_order_acq_rel);
511
- // Correct for possible (but rare) overflow
512
- CorrectNearOverflow(old_meta, h->meta);
513
- // Insert standalone instead (only if return handle needed)
514
- use_standalone_insert = true;
515
- return true;
516
- } else {
517
- // Mismatch. Pretend we never took the reference
518
- old_meta = h->meta.fetch_sub(
519
- ClockHandle::kAcquireIncrement * initial_countdown,
520
- std::memory_order_acq_rel);
521
- }
522
- } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
523
- ClockHandle::kStateInvisible)) {
524
- // Pretend we never took the reference
525
- // WART: there's a tiny chance we release last ref to invisible
526
- // entry here. If that happens, we let eviction take care of it.
527
- old_meta = h->meta.fetch_sub(
528
- ClockHandle::kAcquireIncrement * initial_countdown,
529
- std::memory_order_acq_rel);
530
- } else {
531
- // For other states, incrementing the acquire counter has no effect
532
- // so we don't need to undo it.
533
- // Slot not usable / touchable now.
534
- }
535
- (void)old_meta;
536
- return false;
537
- },
538
- [&](HandleImpl* /*h*/) { return false; },
539
- [&](HandleImpl* h) {
540
- h->displacements.fetch_add(1, std::memory_order_relaxed);
541
- },
542
- probe);
543
- if (e == nullptr) {
544
- // Occupancy check and never abort FindSlot above should generally
545
- // prevent this, except it's theoretically possible for other threads
546
- // to evict and replace entries in the right order to hit every slot
547
- // when it is populated. Assuming random hashing, the chance of that
548
- // should be no higher than pow(kStrictLoadFactor, n) for n slots.
549
- // That should be infeasible for roughly n >= 256, so if this assertion
550
- // fails, that suggests something is going wrong.
551
- assert(GetTableSize() < 256);
552
- use_standalone_insert = true;
553
- }
554
- if (!use_standalone_insert) {
610
+ if (e) {
555
611
  // Successfully inserted
556
612
  if (handle) {
557
613
  *handle = e;
558
614
  }
559
615
  return Status::OK();
560
616
  }
561
- // Roll back table insertion
562
- Rollback(proto.hashed_key, e);
563
- revert_occupancy_fn();
617
+ // Not inserted
618
+ // Revert occupancy
619
+ occupancy_.fetch_sub(1, std::memory_order_relaxed);
564
620
  // Maybe fall back on standalone insert
565
621
  if (handle == nullptr) {
566
- revert_usage_fn();
622
+ // Revert usage
623
+ usage_.fetch_sub(total_charge, std::memory_order_relaxed);
624
+ // No underflow
625
+ assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2);
567
626
  // As if unrefed entry immdiately evicted
568
627
  proto.FreeData(allocator_);
569
628
  return Status::OK();
570
629
  }
630
+
631
+ use_standalone_insert = true;
571
632
  }
572
633
 
573
634
  // Run standalone insert
574
635
  assert(use_standalone_insert);
575
636
 
576
- *handle = StandaloneInsert(proto);
637
+ *handle = StandaloneInsert<HandleImpl>(proto);
577
638
 
578
639
  // The OkOverwritten status is used to count "redundant" insertions into
579
640
  // block cache. This implementation doesn't strictly check for redundant
@@ -583,37 +644,156 @@ Status HyperClockTable::Insert(const ClockHandleBasicData& proto,
583
644
  return Status::OkOverwritten();
584
645
  }
585
646
 
586
- HyperClockTable::HandleImpl* HyperClockTable::CreateStandalone(
587
- ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit,
588
- bool allow_uncharged) {
589
- const size_t total_charge = proto.GetTotalCharge();
590
- if (strict_capacity_limit) {
591
- Status s = ChargeUsageMaybeEvictStrict(total_charge, capacity,
592
- /*need_evict_for_occupancy=*/false);
593
- if (!s.ok()) {
594
- if (allow_uncharged) {
595
- proto.total_charge = 0;
596
- } else {
597
- return nullptr;
598
- }
599
- }
600
- } else {
601
- // Case strict_capacity_limit == false
602
- bool success =
603
- ChargeUsageMaybeEvictNonStrict(total_charge, capacity,
604
- /*need_evict_for_occupancy=*/false);
605
- if (!success) {
606
- // Force the issue
607
- usage_.fetch_add(total_charge, std::memory_order_relaxed);
647
+ void BaseClockTable::Ref(ClockHandle& h) {
648
+ // Increment acquire counter
649
+ uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
650
+ std::memory_order_acquire);
651
+
652
+ assert((old_meta >> ClockHandle::kStateShift) &
653
+ ClockHandle::kStateShareableBit);
654
+ // Must have already had a reference
655
+ assert(GetRefcount(old_meta) > 0);
656
+ (void)old_meta;
657
+ }
658
+
659
+ #ifndef NDEBUG
660
+ void BaseClockTable::TEST_RefN(ClockHandle& h, size_t n) {
661
+ // Increment acquire counter
662
+ uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
663
+ std::memory_order_acquire);
664
+
665
+ assert((old_meta >> ClockHandle::kStateShift) &
666
+ ClockHandle::kStateShareableBit);
667
+ (void)old_meta;
668
+ }
669
+
670
+ void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) {
671
+ assert(n > 0);
672
+
673
+ // Like n-1 Releases, but assumes one more will happen in the caller to take
674
+ // care of anything like erasing an unreferenced, invisible entry.
675
+ uint64_t old_meta = h->meta.fetch_add(
676
+ (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
677
+ assert((old_meta >> ClockHandle::kStateShift) &
678
+ ClockHandle::kStateShareableBit);
679
+ (void)old_meta;
680
+ }
681
+ #endif
682
+
683
+ FixedHyperClockTable::FixedHyperClockTable(
684
+ size_t capacity, bool /*strict_capacity_limit*/,
685
+ CacheMetadataChargePolicy metadata_charge_policy,
686
+ MemoryAllocator* allocator,
687
+ const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed,
688
+ const Opts& opts)
689
+ : BaseClockTable(metadata_charge_policy, allocator, eviction_callback,
690
+ hash_seed),
691
+ length_bits_(CalcHashBits(capacity, opts.estimated_value_size,
692
+ metadata_charge_policy)),
693
+ length_bits_mask_((size_t{1} << length_bits_) - 1),
694
+ occupancy_limit_(static_cast<size_t>((uint64_t{1} << length_bits_) *
695
+ kStrictLoadFactor)),
696
+ array_(new HandleImpl[size_t{1} << length_bits_]) {
697
+ if (metadata_charge_policy ==
698
+ CacheMetadataChargePolicy::kFullChargeCacheMetadata) {
699
+ usage_ += size_t{GetTableSize()} * sizeof(HandleImpl);
700
+ }
701
+
702
+ static_assert(sizeof(HandleImpl) == 64U,
703
+ "Expecting size / alignment with common cache line size");
704
+ }
705
+
706
+ FixedHyperClockTable::~FixedHyperClockTable() {
707
+ // Assumes there are no references or active operations on any slot/element
708
+ // in the table.
709
+ for (size_t i = 0; i < GetTableSize(); i++) {
710
+ HandleImpl& h = array_[i];
711
+ switch (h.meta >> ClockHandle::kStateShift) {
712
+ case ClockHandle::kStateEmpty:
713
+ // noop
714
+ break;
715
+ case ClockHandle::kStateInvisible: // rare but possible
716
+ case ClockHandle::kStateVisible:
717
+ assert(GetRefcount(h.meta) == 0);
718
+ h.FreeData(allocator_);
719
+ #ifndef NDEBUG
720
+ Rollback(h.hashed_key, &h);
721
+ ReclaimEntryUsage(h.GetTotalCharge());
722
+ #endif
723
+ break;
724
+ // otherwise
725
+ default:
726
+ assert(false);
727
+ break;
608
728
  }
609
729
  }
610
730
 
611
- return StandaloneInsert(proto);
731
+ #ifndef NDEBUG
732
+ for (size_t i = 0; i < GetTableSize(); i++) {
733
+ assert(array_[i].displacements.load() == 0);
734
+ }
735
+ #endif
736
+
737
+ assert(usage_.load() == 0 ||
738
+ usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl));
739
+ assert(occupancy_ == 0);
740
+ }
741
+
742
+ void FixedHyperClockTable::StartInsert(InsertState&) {}
743
+
744
+ bool FixedHyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) {
745
+ return new_occupancy <= occupancy_limit_;
612
746
  }
613
747
 
614
- HyperClockTable::HandleImpl* HyperClockTable::Lookup(
748
+ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::DoInsert(
749
+ const ClockHandleBasicData& proto, uint64_t initial_countdown,
750
+ bool keep_ref, InsertState&) {
751
+ bool already_matches = false;
752
+ HandleImpl* e = FindSlot(
753
+ proto.hashed_key,
754
+ [&](HandleImpl* h) {
755
+ return TryInsert(proto, *h, initial_countdown, keep_ref,
756
+ &already_matches);
757
+ },
758
+ [&](HandleImpl* h) {
759
+ if (already_matches) {
760
+ // Stop searching & roll back displacements
761
+ Rollback(proto.hashed_key, h);
762
+ return true;
763
+ } else {
764
+ // Keep going
765
+ return false;
766
+ }
767
+ },
768
+ [&](HandleImpl* h, bool is_last) {
769
+ if (is_last) {
770
+ // Search is ending. Roll back displacements
771
+ Rollback(proto.hashed_key, h);
772
+ } else {
773
+ h->displacements.fetch_add(1, std::memory_order_relaxed);
774
+ }
775
+ });
776
+ if (already_matches) {
777
+ // Insertion skipped
778
+ return nullptr;
779
+ }
780
+ if (e != nullptr) {
781
+ // Successfully inserted
782
+ return e;
783
+ }
784
+ // Else, no available slot found. Occupancy check should generally prevent
785
+ // this, except it's theoretically possible for other threads to evict and
786
+ // replace entries in the right order to hit every slot when it is populated.
787
+ // Assuming random hashing, the chance of that should be no higher than
788
+ // pow(kStrictLoadFactor, n) for n slots. That should be infeasible for
789
+ // roughly n >= 256, so if this assertion fails, that suggests something is
790
+ // going wrong.
791
+ assert(GetTableSize() < 256);
792
+ return nullptr;
793
+ }
794
+
795
+ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup(
615
796
  const UniqueId64x2& hashed_key) {
616
- size_t probe = 0;
617
797
  HandleImpl* e = FindSlot(
618
798
  hashed_key,
619
799
  [&](HandleImpl* h) {
@@ -648,38 +828,38 @@ HyperClockTable::HandleImpl* HyperClockTable::Lookup(
648
828
  // Acquired a read reference
649
829
  if (h->hashed_key == hashed_key) {
650
830
  // Match
831
+ // Update the hit bit
832
+ if (eviction_callback_) {
833
+ h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift,
834
+ std::memory_order_relaxed);
835
+ }
651
836
  return true;
652
837
  } else {
653
838
  // Mismatch. Pretend we never took the reference
654
- old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement,
655
- std::memory_order_release);
839
+ Unref(*h);
656
840
  }
657
841
  } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
658
842
  ClockHandle::kStateInvisible)) {
659
843
  // Pretend we never took the reference
660
- // WART: there's a tiny chance we release last ref to invisible
661
- // entry here. If that happens, we let eviction take care of it.
662
- old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement,
663
- std::memory_order_release);
844
+ Unref(*h);
664
845
  } else {
665
846
  // For other states, incrementing the acquire counter has no effect
666
847
  // so we don't need to undo it. Furthermore, we cannot safely undo
667
848
  // it because we did not acquire a read reference to lock the
668
849
  // entry in a Shareable state.
669
850
  }
670
- (void)old_meta;
671
851
  return false;
672
852
  },
673
853
  [&](HandleImpl* h) {
674
854
  return h->displacements.load(std::memory_order_relaxed) == 0;
675
855
  },
676
- [&](HandleImpl* /*h*/) {}, probe);
856
+ [&](HandleImpl* /*h*/, bool /*is_last*/) {});
677
857
 
678
858
  return e;
679
859
  }
680
860
 
681
- bool HyperClockTable::Release(HandleImpl* h, bool useful,
682
- bool erase_if_last_ref) {
861
+ bool FixedHyperClockTable::Release(HandleImpl* h, bool useful,
862
+ bool erase_if_last_ref) {
683
863
  // In contrast with LRUCache's Release, this function won't delete the handle
684
864
  // when the cache is above capacity and the reference is the last one. Space
685
865
  // is only freed up by EvictFromClock (called by Insert when space is needed)
@@ -753,43 +933,19 @@ bool HyperClockTable::Release(HandleImpl* h, bool useful,
753
933
  }
754
934
  }
755
935
 
756
- void HyperClockTable::Ref(HandleImpl& h) {
757
- // Increment acquire counter
758
- uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
759
- std::memory_order_acquire);
760
-
761
- assert((old_meta >> ClockHandle::kStateShift) &
762
- ClockHandle::kStateShareableBit);
763
- // Must have already had a reference
764
- assert(GetRefcount(old_meta) > 0);
765
- (void)old_meta;
766
- }
767
-
768
- void HyperClockTable::TEST_RefN(HandleImpl& h, size_t n) {
769
- // Increment acquire counter
770
- uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement,
771
- std::memory_order_acquire);
772
-
773
- assert((old_meta >> ClockHandle::kStateShift) &
774
- ClockHandle::kStateShareableBit);
775
- (void)old_meta;
776
- }
777
-
778
- void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) {
936
+ #ifndef NDEBUG
937
+ void FixedHyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) {
779
938
  if (n > 0) {
780
- // Split into n - 1 and 1 steps.
781
- uint64_t old_meta = h->meta.fetch_add(
782
- (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire);
783
- assert((old_meta >> ClockHandle::kStateShift) &
784
- ClockHandle::kStateShareableBit);
785
- (void)old_meta;
939
+ // Do n-1 simple releases first
940
+ TEST_ReleaseNMinus1(h, n);
786
941
 
942
+ // Then the last release might be more involved
787
943
  Release(h, /*useful*/ true, /*erase_if_last_ref*/ false);
788
944
  }
789
945
  }
946
+ #endif
790
947
 
791
- void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
792
- size_t probe = 0;
948
+ void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) {
793
949
  (void)FindSlot(
794
950
  hashed_key,
795
951
  [&](HandleImpl* h) {
@@ -816,8 +972,7 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
816
972
  if (refcount > 1) {
817
973
  // Not last ref at some point in time during this Erase call
818
974
  // Pretend we never took the reference
819
- h->meta.fetch_sub(ClockHandle::kAcquireIncrement,
820
- std::memory_order_release);
975
+ Unref(*h);
821
976
  break;
822
977
  } else if (h->meta.compare_exchange_weak(
823
978
  old_meta,
@@ -837,16 +992,12 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
837
992
  }
838
993
  } else {
839
994
  // Mismatch. Pretend we never took the reference
840
- h->meta.fetch_sub(ClockHandle::kAcquireIncrement,
841
- std::memory_order_release);
995
+ Unref(*h);
842
996
  }
843
997
  } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) ==
844
998
  ClockHandle::kStateInvisible)) {
845
999
  // Pretend we never took the reference
846
- // WART: there's a tiny chance we release last ref to invisible
847
- // entry here. If that happens, we let eviction take care of it.
848
- h->meta.fetch_sub(ClockHandle::kAcquireIncrement,
849
- std::memory_order_release);
1000
+ Unref(*h);
850
1001
  } else {
851
1002
  // For other states, incrementing the acquire counter has no effect
852
1003
  // so we don't need to undo it.
@@ -856,51 +1007,10 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) {
856
1007
  [&](HandleImpl* h) {
857
1008
  return h->displacements.load(std::memory_order_relaxed) == 0;
858
1009
  },
859
- [&](HandleImpl* /*h*/) {}, probe);
860
- }
861
-
862
- void HyperClockTable::ConstApplyToEntriesRange(
863
- std::function<void(const HandleImpl&)> func, size_t index_begin,
864
- size_t index_end, bool apply_if_will_be_deleted) const {
865
- uint64_t check_state_mask = ClockHandle::kStateShareableBit;
866
- if (!apply_if_will_be_deleted) {
867
- check_state_mask |= ClockHandle::kStateVisibleBit;
868
- }
869
-
870
- for (size_t i = index_begin; i < index_end; i++) {
871
- HandleImpl& h = array_[i];
872
-
873
- // Note: to avoid using compare_exchange, we have to be extra careful.
874
- uint64_t old_meta = h.meta.load(std::memory_order_relaxed);
875
- // Check if it's an entry visible to lookups
876
- if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
877
- // Increment acquire counter. Note: it's possible that the entry has
878
- // completely changed since we loaded old_meta, but incrementing acquire
879
- // count is always safe. (Similar to optimistic Lookup here.)
880
- old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement,
881
- std::memory_order_acquire);
882
- // Check whether we actually acquired a reference.
883
- if ((old_meta >> ClockHandle::kStateShift) &
884
- ClockHandle::kStateShareableBit) {
885
- // Apply func if appropriate
886
- if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) {
887
- func(h);
888
- }
889
- // Pretend we never took the reference
890
- h.meta.fetch_sub(ClockHandle::kAcquireIncrement,
891
- std::memory_order_release);
892
- // No net change, so don't need to check for overflow
893
- } else {
894
- // For other states, incrementing the acquire counter has no effect
895
- // so we don't need to undo it. Furthermore, we cannot safely undo
896
- // it because we did not acquire a read reference to lock the
897
- // entry in a Shareable state.
898
- }
899
- }
900
- }
1010
+ [&](HandleImpl* /*h*/, bool /*is_last*/) {});
901
1011
  }
902
1012
 
903
- void HyperClockTable::EraseUnRefEntries() {
1013
+ void FixedHyperClockTable::EraseUnRefEntries() {
904
1014
  for (size_t i = 0; i <= this->length_bits_mask_; i++) {
905
1015
  HandleImpl& h = array_[i];
906
1016
 
@@ -921,10 +1031,10 @@ void HyperClockTable::EraseUnRefEntries() {
921
1031
  }
922
1032
  }
923
1033
 
924
- inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot(
925
- const UniqueId64x2& hashed_key, std::function<bool(HandleImpl*)> match_fn,
926
- std::function<bool(HandleImpl*)> abort_fn,
927
- std::function<void(HandleImpl*)> update_fn, size_t& probe) {
1034
+ template <typename MatchFn, typename AbortFn, typename UpdateFn>
1035
+ inline FixedHyperClockTable::HandleImpl* FixedHyperClockTable::FindSlot(
1036
+ const UniqueId64x2& hashed_key, const MatchFn& match_fn,
1037
+ const AbortFn& abort_fn, const UpdateFn& update_fn) {
928
1038
  // NOTE: upper 32 bits of hashed_key[0] is used for sharding
929
1039
  //
930
1040
  // We use double-hashing probing. Every probe in the sequence is a
@@ -938,26 +1048,27 @@ inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot(
938
1048
  // TODO: we could also reconsider linear probing, though locality benefits
939
1049
  // are limited because each slot is a full cache line
940
1050
  size_t increment = static_cast<size_t>(hashed_key[0]) | 1U;
941
- size_t current = ModTableSize(base + probe * increment);
942
- while (probe <= length_bits_mask_) {
1051
+ size_t first = ModTableSize(base);
1052
+ size_t current = first;
1053
+ bool is_last;
1054
+ do {
943
1055
  HandleImpl* h = &array_[current];
944
1056
  if (match_fn(h)) {
945
- probe++;
946
1057
  return h;
947
1058
  }
948
1059
  if (abort_fn(h)) {
949
1060
  return nullptr;
950
1061
  }
951
- probe++;
952
- update_fn(h);
953
1062
  current = ModTableSize(current + increment);
954
- }
1063
+ is_last = current == first;
1064
+ update_fn(h, is_last);
1065
+ } while (!is_last);
955
1066
  // We looped back.
956
1067
  return nullptr;
957
1068
  }
958
1069
 
959
- inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key,
960
- const HandleImpl* h) {
1070
+ inline void FixedHyperClockTable::Rollback(const UniqueId64x2& hashed_key,
1071
+ const HandleImpl* h) {
961
1072
  size_t current = ModTableSize(hashed_key[1]);
962
1073
  size_t increment = static_cast<size_t>(hashed_key[0]) | 1U;
963
1074
  while (&array_[current] != h) {
@@ -966,7 +1077,7 @@ inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key,
966
1077
  }
967
1078
  }
968
1079
 
969
- inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) {
1080
+ inline void FixedHyperClockTable::ReclaimEntryUsage(size_t total_charge) {
970
1081
  auto old_occupancy = occupancy_.fetch_sub(1U, std::memory_order_release);
971
1082
  (void)old_occupancy;
972
1083
  // No underflow
@@ -977,8 +1088,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) {
977
1088
  assert(old_usage >= total_charge);
978
1089
  }
979
1090
 
980
- inline void HyperClockTable::Evict(size_t requested_charge,
981
- size_t* freed_charge, size_t* freed_count) {
1091
+ inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&,
1092
+ EvictionData* data) {
982
1093
  // precondition
983
1094
  assert(requested_charge > 0);
984
1095
 
@@ -997,33 +1108,18 @@ inline void HyperClockTable::Evict(size_t requested_charge,
997
1108
  uint64_t max_clock_pointer =
998
1109
  old_clock_pointer + (ClockHandle::kMaxCountdown << length_bits_);
999
1110
 
1000
- // For key reconstructed from hash
1001
- UniqueId64x2 unhashed;
1002
-
1003
1111
  for (;;) {
1004
1112
  for (size_t i = 0; i < step_size; i++) {
1005
1113
  HandleImpl& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))];
1006
1114
  bool evicting = ClockUpdate(h);
1007
1115
  if (evicting) {
1008
1116
  Rollback(h.hashed_key, &h);
1009
- *freed_charge += h.GetTotalCharge();
1010
- *freed_count += 1;
1011
- bool took_ownership = false;
1012
- if (eviction_callback_) {
1013
- took_ownership =
1014
- eviction_callback_(ClockCacheShard<HyperClockTable>::ReverseHash(
1015
- h.GetHash(), &unhashed, hash_seed_),
1016
- reinterpret_cast<Cache::Handle*>(&h));
1017
- }
1018
- if (!took_ownership) {
1019
- h.FreeData(allocator_);
1020
- }
1021
- MarkEmpty(h);
1117
+ TrackAndReleaseEvictedEntry(&h, data);
1022
1118
  }
1023
1119
  }
1024
1120
 
1025
1121
  // Loop exit condition
1026
- if (*freed_charge >= requested_charge) {
1122
+ if (data->freed_charge >= requested_charge) {
1027
1123
  return;
1028
1124
  }
1029
1125
  if (old_clock_pointer >= max_clock_pointer) {
@@ -1063,38 +1159,35 @@ void ClockCacheShard<Table>::ApplyToSomeEntries(
1063
1159
  size_t charge,
1064
1160
  const Cache::CacheItemHelper* helper)>& callback,
1065
1161
  size_t average_entries_per_lock, size_t* state) {
1066
- // The state is essentially going to be the starting hash, which works
1067
- // nicely even if we resize between calls because we use upper-most
1068
- // hash bits for table indexes.
1069
- size_t length_bits = table_.GetLengthBits();
1162
+ // The state will be a simple index into the table. Even with a dynamic
1163
+ // hyper clock cache, entries will generally stay in their existing
1164
+ // slots, so we don't need to be aware of the high-level organization
1165
+ // that makes lookup efficient.
1070
1166
  size_t length = table_.GetTableSize();
1071
1167
 
1072
1168
  assert(average_entries_per_lock > 0);
1073
- // Assuming we are called with same average_entries_per_lock repeatedly,
1074
- // this simplifies some logic (index_end will not overflow).
1075
- assert(average_entries_per_lock < length || *state == 0);
1076
1169
 
1077
- size_t index_begin = *state >> (sizeof(size_t) * 8u - length_bits);
1170
+ size_t index_begin = *state;
1078
1171
  size_t index_end = index_begin + average_entries_per_lock;
1079
1172
  if (index_end >= length) {
1080
1173
  // Going to end.
1081
1174
  index_end = length;
1082
1175
  *state = SIZE_MAX;
1083
1176
  } else {
1084
- *state = index_end << (sizeof(size_t) * 8u - length_bits);
1177
+ *state = index_end;
1085
1178
  }
1086
1179
 
1087
1180
  auto hash_seed = table_.GetHashSeed();
1088
- table_.ConstApplyToEntriesRange(
1181
+ ConstApplyToEntriesRange(
1089
1182
  [callback, hash_seed](const HandleImpl& h) {
1090
1183
  UniqueId64x2 unhashed;
1091
1184
  callback(ReverseHash(h.hashed_key, &unhashed, hash_seed), h.value,
1092
1185
  h.GetTotalCharge(), h.helper);
1093
1186
  },
1094
- index_begin, index_end, false);
1187
+ table_.HandlePtr(index_begin), table_.HandlePtr(index_end), false);
1095
1188
  }
1096
1189
 
1097
- int HyperClockTable::CalcHashBits(
1190
+ int FixedHyperClockTable::CalcHashBits(
1098
1191
  size_t capacity, size_t estimated_value_size,
1099
1192
  CacheMetadataChargePolicy metadata_charge_policy) {
1100
1193
  double average_slot_charge = estimated_value_size * kLoadFactor;
@@ -1146,18 +1239,15 @@ Status ClockCacheShard<Table>::Insert(const Slice& key,
1146
1239
  proto.value = value;
1147
1240
  proto.helper = helper;
1148
1241
  proto.total_charge = charge;
1149
- return table_.Insert(proto, handle, priority,
1150
- capacity_.load(std::memory_order_relaxed),
1151
- strict_capacity_limit_.load(std::memory_order_relaxed));
1242
+ return table_.template Insert<Table>(
1243
+ proto, handle, priority, capacity_.load(std::memory_order_relaxed),
1244
+ strict_capacity_limit_.load(std::memory_order_relaxed));
1152
1245
  }
1153
1246
 
1154
1247
  template <class Table>
1155
- typename ClockCacheShard<Table>::HandleImpl*
1156
- ClockCacheShard<Table>::CreateStandalone(const Slice& key,
1157
- const UniqueId64x2& hashed_key,
1158
- Cache::ObjectPtr obj,
1159
- const Cache::CacheItemHelper* helper,
1160
- size_t charge, bool allow_uncharged) {
1248
+ typename Table::HandleImpl* ClockCacheShard<Table>::CreateStandalone(
1249
+ const Slice& key, const UniqueId64x2& hashed_key, Cache::ObjectPtr obj,
1250
+ const Cache::CacheItemHelper* helper, size_t charge, bool allow_uncharged) {
1161
1251
  if (UNLIKELY(key.size() != kCacheKeySize)) {
1162
1252
  return nullptr;
1163
1253
  }
@@ -1166,7 +1256,7 @@ ClockCacheShard<Table>::CreateStandalone(const Slice& key,
1166
1256
  proto.value = obj;
1167
1257
  proto.helper = helper;
1168
1258
  proto.total_charge = charge;
1169
- return table_.CreateStandalone(
1259
+ return table_.template CreateStandalone<Table>(
1170
1260
  proto, capacity_.load(std::memory_order_relaxed),
1171
1261
  strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged);
1172
1262
  }
@@ -1198,6 +1288,7 @@ bool ClockCacheShard<Table>::Release(HandleImpl* handle, bool useful,
1198
1288
  return table_.Release(handle, useful, erase_if_last_ref);
1199
1289
  }
1200
1290
 
1291
+ #ifndef NDEBUG
1201
1292
  template <class Table>
1202
1293
  void ClockCacheShard<Table>::TEST_RefN(HandleImpl* h, size_t n) {
1203
1294
  table_.TEST_RefN(*h, n);
@@ -1207,6 +1298,7 @@ template <class Table>
1207
1298
  void ClockCacheShard<Table>::TEST_ReleaseN(HandleImpl* h, size_t n) {
1208
1299
  table_.TEST_ReleaseN(h, n);
1209
1300
  }
1301
+ #endif
1210
1302
 
1211
1303
  template <class Table>
1212
1304
  bool ClockCacheShard<Table>::Release(HandleImpl* handle,
@@ -1249,7 +1341,7 @@ size_t ClockCacheShard<Table>::GetPinnedUsage() const {
1249
1341
  size_t table_pinned_usage = 0;
1250
1342
  const bool charge_metadata =
1251
1343
  metadata_charge_policy_ == kFullChargeCacheMetadata;
1252
- table_.ConstApplyToEntriesRange(
1344
+ ConstApplyToEntriesRange(
1253
1345
  [&table_pinned_usage, charge_metadata](const HandleImpl& h) {
1254
1346
  uint64_t meta = h.meta.load(std::memory_order_relaxed);
1255
1347
  uint64_t refcount = GetRefcount(meta);
@@ -1262,7 +1354,7 @@ size_t ClockCacheShard<Table>::GetPinnedUsage() const {
1262
1354
  }
1263
1355
  }
1264
1356
  },
1265
- 0, table_.GetTableSize(), true);
1357
+ table_.HandlePtr(0), table_.HandlePtr(table_.GetTableSize()), true);
1266
1358
 
1267
1359
  return table_pinned_usage + table_.GetStandaloneUsage();
1268
1360
  }
@@ -1283,36 +1375,40 @@ size_t ClockCacheShard<Table>::GetTableAddressCount() const {
1283
1375
  }
1284
1376
 
1285
1377
  // Explicit instantiation
1286
- template class ClockCacheShard<HyperClockTable>;
1378
+ template class ClockCacheShard<FixedHyperClockTable>;
1379
+ template class ClockCacheShard<AutoHyperClockTable>;
1287
1380
 
1288
- HyperClockCache::HyperClockCache(const HyperClockCacheOptions& opts)
1289
- : ShardedCache(opts) {
1290
- assert(opts.estimated_entry_charge > 0 ||
1291
- opts.metadata_charge_policy != kDontChargeCacheMetadata);
1381
+ template <class Table>
1382
+ BaseHyperClockCache<Table>::BaseHyperClockCache(
1383
+ const HyperClockCacheOptions& opts)
1384
+ : ShardedCache<ClockCacheShard<Table>>(opts) {
1292
1385
  // TODO: should not need to go through two levels of pointer indirection to
1293
1386
  // get to table entries
1294
- size_t per_shard = GetPerShardCapacity();
1387
+ size_t per_shard = this->GetPerShardCapacity();
1295
1388
  MemoryAllocator* alloc = this->memory_allocator();
1296
- InitShards([&](Shard* cs) {
1297
- HyperClockTable::Opts table_opts;
1298
- table_opts.estimated_value_size = opts.estimated_entry_charge;
1389
+ this->InitShards([&](Shard* cs) {
1390
+ typename Table::Opts table_opts{opts};
1299
1391
  new (cs) Shard(per_shard, opts.strict_capacity_limit,
1300
- opts.metadata_charge_policy, alloc, &eviction_callback_,
1301
- &hash_seed_, table_opts);
1392
+ opts.metadata_charge_policy, alloc,
1393
+ &this->eviction_callback_, &this->hash_seed_, table_opts);
1302
1394
  });
1303
1395
  }
1304
1396
 
1305
- Cache::ObjectPtr HyperClockCache::Value(Handle* handle) {
1306
- return reinterpret_cast<const HandleImpl*>(handle)->value;
1397
+ template <class Table>
1398
+ Cache::ObjectPtr BaseHyperClockCache<Table>::Value(Handle* handle) {
1399
+ return reinterpret_cast<const typename Table::HandleImpl*>(handle)->value;
1307
1400
  }
1308
1401
 
1309
- size_t HyperClockCache::GetCharge(Handle* handle) const {
1310
- return reinterpret_cast<const HandleImpl*>(handle)->GetTotalCharge();
1402
+ template <class Table>
1403
+ size_t BaseHyperClockCache<Table>::GetCharge(Handle* handle) const {
1404
+ return reinterpret_cast<const typename Table::HandleImpl*>(handle)
1405
+ ->GetTotalCharge();
1311
1406
  }
1312
1407
 
1313
- const Cache::CacheItemHelper* HyperClockCache::GetCacheItemHelper(
1408
+ template <class Table>
1409
+ const Cache::CacheItemHelper* BaseHyperClockCache<Table>::GetCacheItemHelper(
1314
1410
  Handle* handle) const {
1315
- auto h = reinterpret_cast<const HandleImpl*>(handle);
1411
+ auto h = reinterpret_cast<const typename Table::HandleImpl*>(handle);
1316
1412
  return h->helper;
1317
1413
  }
1318
1414
 
@@ -1325,7 +1421,7 @@ namespace {
1325
1421
  // or actual occupancy very close to limit (>95% of limit).
1326
1422
  // Also, for each shard compute the recommended estimated_entry_charge,
1327
1423
  // and keep the minimum one for use as overall recommendation.
1328
- void AddShardEvaluation(const HyperClockCache::Shard& shard,
1424
+ void AddShardEvaluation(const FixedHyperClockCache::Shard& shard,
1329
1425
  std::vector<double>& predicted_load_factors,
1330
1426
  size_t& min_recommendation) {
1331
1427
  size_t usage = shard.GetUsage() - shard.GetStandaloneUsage();
@@ -1343,7 +1439,7 @@ void AddShardEvaluation(const HyperClockCache::Shard& shard,
1343
1439
  // If filled to capacity, what would the occupancy ratio be?
1344
1440
  double ratio = occ_ratio / usage_ratio;
1345
1441
  // Given max load factor, what that load factor be?
1346
- double lf = ratio * kStrictLoadFactor;
1442
+ double lf = ratio * FixedHyperClockTable::kStrictLoadFactor;
1347
1443
  predicted_load_factors.push_back(lf);
1348
1444
 
1349
1445
  // Update min_recommendation also
@@ -1351,17 +1447,87 @@ void AddShardEvaluation(const HyperClockCache::Shard& shard,
1351
1447
  min_recommendation = std::min(min_recommendation, recommendation);
1352
1448
  }
1353
1449
 
1450
+ bool IsSlotOccupied(const ClockHandle& h) {
1451
+ return (h.meta.load(std::memory_order_relaxed) >> ClockHandle::kStateShift) !=
1452
+ 0;
1453
+ }
1354
1454
  } // namespace
1355
1455
 
1356
- void HyperClockCache::ReportProblems(
1456
+ // NOTE: GCC might warn about subobject linkage if this is in anon namespace
1457
+ template <size_t N = 500>
1458
+ class LoadVarianceStats {
1459
+ public:
1460
+ std::string Report() const {
1461
+ return "Overall " + PercentStr(positive_count_, samples_) + " (" +
1462
+ std::to_string(positive_count_) + "/" + std::to_string(samples_) +
1463
+ "), Min/Max/Window = " + PercentStr(min_, N) + "/" +
1464
+ PercentStr(max_, N) + "/" + std::to_string(N) +
1465
+ ", MaxRun{Pos/Neg} = " + std::to_string(max_pos_run_) + "/" +
1466
+ std::to_string(max_neg_run_) + "\n";
1467
+ }
1468
+
1469
+ void Add(bool positive) {
1470
+ recent_[samples_ % N] = positive;
1471
+ if (positive) {
1472
+ ++positive_count_;
1473
+ ++cur_pos_run_;
1474
+ max_pos_run_ = std::max(max_pos_run_, cur_pos_run_);
1475
+ cur_neg_run_ = 0;
1476
+ } else {
1477
+ ++cur_neg_run_;
1478
+ max_neg_run_ = std::max(max_neg_run_, cur_neg_run_);
1479
+ cur_pos_run_ = 0;
1480
+ }
1481
+ ++samples_;
1482
+ if (samples_ >= N) {
1483
+ size_t count_set = recent_.count();
1484
+ max_ = std::max(max_, count_set);
1485
+ min_ = std::min(min_, count_set);
1486
+ }
1487
+ }
1488
+
1489
+ private:
1490
+ size_t max_ = 0;
1491
+ size_t min_ = N;
1492
+ size_t positive_count_ = 0;
1493
+ size_t samples_ = 0;
1494
+ size_t max_pos_run_ = 0;
1495
+ size_t cur_pos_run_ = 0;
1496
+ size_t max_neg_run_ = 0;
1497
+ size_t cur_neg_run_ = 0;
1498
+ std::bitset<N> recent_;
1499
+
1500
+ static std::string PercentStr(size_t a, size_t b) {
1501
+ return std::to_string(uint64_t{100} * a / b) + "%";
1502
+ }
1503
+ };
1504
+
1505
+ template <class Table>
1506
+ void BaseHyperClockCache<Table>::ReportProblems(
1507
+ const std::shared_ptr<Logger>& info_log) const {
1508
+ if (info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) {
1509
+ LoadVarianceStats slot_stats;
1510
+ this->ForEachShard([&](const BaseHyperClockCache<Table>::Shard* shard) {
1511
+ size_t count = shard->GetTableAddressCount();
1512
+ for (size_t i = 0; i < count; ++i) {
1513
+ slot_stats.Add(IsSlotOccupied(*shard->GetTable().HandlePtr(i)));
1514
+ }
1515
+ });
1516
+ ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL,
1517
+ "Slot occupancy stats: %s", slot_stats.Report().c_str());
1518
+ }
1519
+ }
1520
+
1521
+ void FixedHyperClockCache::ReportProblems(
1357
1522
  const std::shared_ptr<Logger>& info_log) const {
1523
+ BaseHyperClockCache::ReportProblems(info_log);
1524
+
1358
1525
  uint32_t shard_count = GetNumShards();
1359
1526
  std::vector<double> predicted_load_factors;
1360
1527
  size_t min_recommendation = SIZE_MAX;
1361
- const_cast<HyperClockCache*>(this)->ForEachShard(
1362
- [&](HyperClockCache::Shard* shard) {
1363
- AddShardEvaluation(*shard, predicted_load_factors, min_recommendation);
1364
- });
1528
+ ForEachShard([&](const FixedHyperClockCache::Shard* shard) {
1529
+ AddShardEvaluation(*shard, predicted_load_factors, min_recommendation);
1530
+ });
1365
1531
 
1366
1532
  if (predicted_load_factors.empty()) {
1367
1533
  // None operating "at limit" -> nothing to report
@@ -1382,17 +1548,19 @@ void HyperClockCache::ReportProblems(
1382
1548
  predicted_load_factors.end(), 0.0) /
1383
1549
  shard_count;
1384
1550
 
1385
- constexpr double kLowSpecLoadFactor = kLoadFactor / 2;
1386
- constexpr double kMidSpecLoadFactor = kLoadFactor / 1.414;
1387
- if (average_load_factor > kLoadFactor) {
1551
+ constexpr double kLowSpecLoadFactor = FixedHyperClockTable::kLoadFactor / 2;
1552
+ constexpr double kMidSpecLoadFactor =
1553
+ FixedHyperClockTable::kLoadFactor / 1.414;
1554
+ if (average_load_factor > FixedHyperClockTable::kLoadFactor) {
1388
1555
  // Out of spec => Consider reporting load factor too high
1389
1556
  // Estimate effective overall capacity loss due to enforcing occupancy limit
1390
1557
  double lost_portion = 0.0;
1391
1558
  int over_count = 0;
1392
1559
  for (double lf : predicted_load_factors) {
1393
- if (lf > kStrictLoadFactor) {
1560
+ if (lf > FixedHyperClockTable::kStrictLoadFactor) {
1394
1561
  ++over_count;
1395
- lost_portion += (lf - kStrictLoadFactor) / lf / shard_count;
1562
+ lost_portion +=
1563
+ (lf - FixedHyperClockTable::kStrictLoadFactor) / lf / shard_count;
1396
1564
  }
1397
1565
  }
1398
1566
  // >= 20% loss -> error
@@ -1416,10 +1584,10 @@ void HyperClockCache::ReportProblems(
1416
1584
  if (report) {
1417
1585
  ROCKS_LOG_AT_LEVEL(
1418
1586
  info_log, level,
1419
- "HyperClockCache@%p unable to use estimated %.1f%% capacity because "
1420
- "of "
1421
- "full occupancy in %d/%u cache shards (estimated_entry_charge too "
1422
- "high). Recommend estimated_entry_charge=%zu",
1587
+ "FixedHyperClockCache@%p unable to use estimated %.1f%% capacity "
1588
+ "because of full occupancy in %d/%u cache shards "
1589
+ "(estimated_entry_charge too high). "
1590
+ "Recommend estimated_entry_charge=%zu",
1423
1591
  this, lost_portion * 100.0, over_count, (unsigned)shard_count,
1424
1592
  min_recommendation);
1425
1593
  }
@@ -1437,8 +1605,8 @@ void HyperClockCache::ReportProblems(
1437
1605
  }
1438
1606
  ROCKS_LOG_AT_LEVEL(
1439
1607
  info_log, level,
1440
- "HyperClockCache@%p table has low occupancy at full capacity. Higher "
1441
- "estimated_entry_charge (about %.1fx) would likely improve "
1608
+ "FixedHyperClockCache@%p table has low occupancy at full capacity. "
1609
+ "Higher estimated_entry_charge (about %.1fx) would likely improve "
1442
1610
  "performance. Recommend estimated_entry_charge=%zu",
1443
1611
  this, kMidSpecLoadFactor / average_load_factor, min_recommendation);
1444
1612
  }
@@ -1470,8 +1638,17 @@ std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
1470
1638
  opts.num_shard_bits =
1471
1639
  GetDefaultCacheShardBits(opts.capacity, min_shard_size);
1472
1640
  }
1473
- std::shared_ptr<Cache> cache =
1474
- std::make_shared<clock_cache::HyperClockCache>(opts);
1641
+ std::shared_ptr<Cache> cache;
1642
+ if (opts.estimated_entry_charge == 0) {
1643
+ // BEGIN placeholder logic to be removed
1644
+ // This is sufficient to get the placeholder Auto working in unit tests
1645
+ // much like the Fixed version.
1646
+ opts.estimated_entry_charge = opts.min_avg_entry_charge;
1647
+ // END placeholder logic to be removed
1648
+ cache = std::make_shared<clock_cache::AutoHyperClockCache>(opts);
1649
+ } else {
1650
+ cache = std::make_shared<clock_cache::FixedHyperClockCache>(opts);
1651
+ }
1475
1652
  if (opts.secondary_cache) {
1476
1653
  cache = std::make_shared<CacheWithSecondaryAdapter>(cache,
1477
1654
  opts.secondary_cache);