@nxtedition/rocksdb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1088) hide show
  1. package/CHANGELOG.md +294 -0
  2. package/LICENSE +21 -0
  3. package/README.md +102 -0
  4. package/UPGRADING.md +91 -0
  5. package/binding.cc +1276 -0
  6. package/binding.gyp +73 -0
  7. package/binding.js +1 -0
  8. package/chained-batch.js +44 -0
  9. package/deps/rocksdb/build_version.cc +4 -0
  10. package/deps/rocksdb/rocksdb/CMakeLists.txt +1356 -0
  11. package/deps/rocksdb/rocksdb/COPYING +339 -0
  12. package/deps/rocksdb/rocksdb/LICENSE.Apache +202 -0
  13. package/deps/rocksdb/rocksdb/LICENSE.leveldb +29 -0
  14. package/deps/rocksdb/rocksdb/Makefile +2521 -0
  15. package/deps/rocksdb/rocksdb/TARGETS +2100 -0
  16. package/deps/rocksdb/rocksdb/cache/cache.cc +63 -0
  17. package/deps/rocksdb/rocksdb/cache/cache_bench.cc +381 -0
  18. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +114 -0
  19. package/deps/rocksdb/rocksdb/cache/cache_test.cc +775 -0
  20. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +769 -0
  21. package/deps/rocksdb/rocksdb/cache/clock_cache.h +16 -0
  22. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +574 -0
  23. package/deps/rocksdb/rocksdb/cache/lru_cache.h +339 -0
  24. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +199 -0
  25. package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +162 -0
  26. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +111 -0
  27. package/deps/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in +54 -0
  28. package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
  29. package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
  30. package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
  31. package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
  32. package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
  33. package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
  34. package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
  35. package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
  36. package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
  37. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +108 -0
  38. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +115 -0
  39. package/deps/rocksdb/rocksdb/db/blob/blob_constants.h +16 -0
  40. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +154 -0
  41. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.h +67 -0
  42. package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +206 -0
  43. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +316 -0
  44. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +91 -0
  45. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +660 -0
  46. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +99 -0
  47. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +49 -0
  48. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +268 -0
  49. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.cc +134 -0
  50. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage.h +57 -0
  51. package/deps/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc +173 -0
  52. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +55 -0
  53. package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +164 -0
  54. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +423 -0
  55. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +81 -0
  56. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +771 -0
  57. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +184 -0
  58. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +145 -0
  59. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +148 -0
  60. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +132 -0
  61. package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +76 -0
  62. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +168 -0
  63. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +83 -0
  64. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +307 -0
  65. package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +464 -0
  66. package/deps/rocksdb/rocksdb/db/builder.cc +358 -0
  67. package/deps/rocksdb/rocksdb/db/builder.h +95 -0
  68. package/deps/rocksdb/rocksdb/db/c.cc +5281 -0
  69. package/deps/rocksdb/rocksdb/db/c_test.c +2883 -0
  70. package/deps/rocksdb/rocksdb/db/column_family.cc +1602 -0
  71. package/deps/rocksdb/rocksdb/db/column_family.h +787 -0
  72. package/deps/rocksdb/rocksdb/db/column_family_test.cc +3427 -0
  73. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +425 -0
  74. package/deps/rocksdb/rocksdb/db/compacted_db_impl.cc +169 -0
  75. package/deps/rocksdb/rocksdb/db/compacted_db_impl.h +118 -0
  76. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +591 -0
  77. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +389 -0
  78. package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +37 -0
  79. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +1023 -0
  80. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +353 -0
  81. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1254 -0
  82. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1917 -0
  83. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +208 -0
  84. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +1037 -0
  85. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1224 -0
  86. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +1135 -0
  87. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +318 -0
  88. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +255 -0
  89. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +57 -0
  90. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +510 -0
  91. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +33 -0
  92. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2190 -0
  93. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +1103 -0
  94. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +32 -0
  95. package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +44 -0
  96. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +660 -0
  97. package/deps/rocksdb/rocksdb/db/convenience.cc +78 -0
  98. package/deps/rocksdb/rocksdb/db/corruption_test.cc +921 -0
  99. package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +359 -0
  100. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3820 -0
  101. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1058 -0
  102. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2128 -0
  103. package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +851 -0
  104. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +6292 -0
  105. package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +509 -0
  106. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +130 -0
  107. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +137 -0
  108. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1119 -0
  109. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5057 -0
  110. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +2274 -0
  111. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3421 -0
  112. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +298 -0
  113. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +151 -0
  114. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +967 -0
  115. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1806 -0
  116. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +270 -0
  117. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +146 -0
  118. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +683 -0
  119. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +333 -0
  120. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2024 -0
  121. package/deps/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc +932 -0
  122. package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +137 -0
  123. package/deps/rocksdb/rocksdb/db/db_info_dumper.h +15 -0
  124. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +178 -0
  125. package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +592 -0
  126. package/deps/rocksdb/rocksdb/db/db_iter.cc +1493 -0
  127. package/deps/rocksdb/rocksdb/db/db_iter.h +390 -0
  128. package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +657 -0
  129. package/deps/rocksdb/rocksdb/db/db_iter_test.cc +3268 -0
  130. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +3197 -0
  131. package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +299 -0
  132. package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +513 -0
  133. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +329 -0
  134. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +241 -0
  135. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +671 -0
  136. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1022 -0
  137. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1723 -0
  138. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1694 -0
  139. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1261 -0
  140. package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +164 -0
  141. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +488 -0
  142. package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +567 -0
  143. package/deps/rocksdb/rocksdb/db/db_test.cc +6736 -0
  144. package/deps/rocksdb/rocksdb/db/db_test2.cc +5408 -0
  145. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1633 -0
  146. package/deps/rocksdb/rocksdb/db/db_test_util.h +1194 -0
  147. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +2235 -0
  148. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +1780 -0
  149. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2520 -0
  150. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +119 -0
  151. package/deps/rocksdb/rocksdb/db/db_write_test.cc +465 -0
  152. package/deps/rocksdb/rocksdb/db/dbformat.cc +222 -0
  153. package/deps/rocksdb/rocksdb/db/dbformat.h +786 -0
  154. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +206 -0
  155. package/deps/rocksdb/rocksdb/db/deletefile_test.cc +580 -0
  156. package/deps/rocksdb/rocksdb/db/error_handler.cc +726 -0
  157. package/deps/rocksdb/rocksdb/db/error_handler.h +117 -0
  158. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +2598 -0
  159. package/deps/rocksdb/rocksdb/db/event_helpers.cc +233 -0
  160. package/deps/rocksdb/rocksdb/db/event_helpers.h +57 -0
  161. package/deps/rocksdb/rocksdb/db/experimental.cc +50 -0
  162. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1559 -0
  163. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +910 -0
  164. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +195 -0
  165. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +2936 -0
  166. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +556 -0
  167. package/deps/rocksdb/rocksdb/db/file_indexer.cc +216 -0
  168. package/deps/rocksdb/rocksdb/db/file_indexer.h +142 -0
  169. package/deps/rocksdb/rocksdb/db/file_indexer_test.cc +350 -0
  170. package/deps/rocksdb/rocksdb/db/filename_test.cc +179 -0
  171. package/deps/rocksdb/rocksdb/db/flush_job.cc +514 -0
  172. package/deps/rocksdb/rocksdb/db/flush_job.h +169 -0
  173. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +647 -0
  174. package/deps/rocksdb/rocksdb/db/flush_scheduler.cc +86 -0
  175. package/deps/rocksdb/rocksdb/db/flush_scheduler.h +54 -0
  176. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1023 -0
  177. package/deps/rocksdb/rocksdb/db/forward_iterator.h +163 -0
  178. package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +377 -0
  179. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +282 -0
  180. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +75 -0
  181. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +632 -0
  182. package/deps/rocksdb/rocksdb/db/internal_stats.cc +1461 -0
  183. package/deps/rocksdb/rocksdb/db/internal_stats.h +712 -0
  184. package/deps/rocksdb/rocksdb/db/job_context.h +226 -0
  185. package/deps/rocksdb/rocksdb/db/listener_test.cc +1118 -0
  186. package/deps/rocksdb/rocksdb/db/log_format.h +48 -0
  187. package/deps/rocksdb/rocksdb/db/log_reader.cc +654 -0
  188. package/deps/rocksdb/rocksdb/db/log_reader.h +192 -0
  189. package/deps/rocksdb/rocksdb/db/log_test.cc +901 -0
  190. package/deps/rocksdb/rocksdb/db/log_writer.cc +164 -0
  191. package/deps/rocksdb/rocksdb/db/log_writer.h +115 -0
  192. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.cc +67 -0
  193. package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +63 -0
  194. package/deps/rocksdb/rocksdb/db/lookup_key.h +66 -0
  195. package/deps/rocksdb/rocksdb/db/malloc_stats.cc +54 -0
  196. package/deps/rocksdb/rocksdb/db/malloc_stats.h +24 -0
  197. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +296 -0
  198. package/deps/rocksdb/rocksdb/db/memtable.cc +1169 -0
  199. package/deps/rocksdb/rocksdb/db/memtable.h +554 -0
  200. package/deps/rocksdb/rocksdb/db/memtable_list.cc +888 -0
  201. package/deps/rocksdb/rocksdb/db/memtable_list.h +438 -0
  202. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +935 -0
  203. package/deps/rocksdb/rocksdb/db/merge_context.h +134 -0
  204. package/deps/rocksdb/rocksdb/db/merge_helper.cc +421 -0
  205. package/deps/rocksdb/rocksdb/db/merge_helper.h +197 -0
  206. package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +290 -0
  207. package/deps/rocksdb/rocksdb/db/merge_operator.cc +86 -0
  208. package/deps/rocksdb/rocksdb/db/merge_test.cc +608 -0
  209. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +338 -0
  210. package/deps/rocksdb/rocksdb/db/options_file_test.cc +119 -0
  211. package/deps/rocksdb/rocksdb/db/output_validator.cc +30 -0
  212. package/deps/rocksdb/rocksdb/db/output_validator.h +47 -0
  213. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +993 -0
  214. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +113 -0
  215. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +76 -0
  216. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +231 -0
  217. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +87 -0
  218. package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1374 -0
  219. package/deps/rocksdb/rocksdb/db/pre_release_callback.h +38 -0
  220. package/deps/rocksdb/rocksdb/db/prefix_test.cc +910 -0
  221. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +489 -0
  222. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +446 -0
  223. package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +260 -0
  224. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +709 -0
  225. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +439 -0
  226. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +256 -0
  227. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +552 -0
  228. package/deps/rocksdb/rocksdb/db/read_callback.h +53 -0
  229. package/deps/rocksdb/rocksdb/db/repair.cc +722 -0
  230. package/deps/rocksdb/rocksdb/db/repair_test.cc +390 -0
  231. package/deps/rocksdb/rocksdb/db/snapshot_checker.h +61 -0
  232. package/deps/rocksdb/rocksdb/db/snapshot_impl.cc +26 -0
  233. package/deps/rocksdb/rocksdb/db/snapshot_impl.h +167 -0
  234. package/deps/rocksdb/rocksdb/db/table_cache.cc +704 -0
  235. package/deps/rocksdb/rocksdb/db/table_cache.h +233 -0
  236. package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +75 -0
  237. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +107 -0
  238. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +517 -0
  239. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +318 -0
  240. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +128 -0
  241. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.cc +54 -0
  242. package/deps/rocksdb/rocksdb/db/trim_history_scheduler.h +44 -0
  243. package/deps/rocksdb/rocksdb/db/version_builder.cc +1078 -0
  244. package/deps/rocksdb/rocksdb/db/version_builder.h +69 -0
  245. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +1551 -0
  246. package/deps/rocksdb/rocksdb/db/version_edit.cc +955 -0
  247. package/deps/rocksdb/rocksdb/db/version_edit.h +609 -0
  248. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +699 -0
  249. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +252 -0
  250. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +597 -0
  251. package/deps/rocksdb/rocksdb/db/version_set.cc +6333 -0
  252. package/deps/rocksdb/rocksdb/db/version_set.h +1485 -0
  253. package/deps/rocksdb/rocksdb/db/version_set_test.cc +3035 -0
  254. package/deps/rocksdb/rocksdb/db/wal_edit.cc +204 -0
  255. package/deps/rocksdb/rocksdb/db/wal_edit.h +166 -0
  256. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +214 -0
  257. package/deps/rocksdb/rocksdb/db/wal_manager.cc +517 -0
  258. package/deps/rocksdb/rocksdb/db/wal_manager.h +119 -0
  259. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +340 -0
  260. package/deps/rocksdb/rocksdb/db/write_batch.cc +2174 -0
  261. package/deps/rocksdb/rocksdb/db/write_batch_base.cc +94 -0
  262. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +250 -0
  263. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +907 -0
  264. package/deps/rocksdb/rocksdb/db/write_callback.h +27 -0
  265. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +457 -0
  266. package/deps/rocksdb/rocksdb/db/write_controller.cc +128 -0
  267. package/deps/rocksdb/rocksdb/db/write_controller.h +144 -0
  268. package/deps/rocksdb/rocksdb/db/write_controller_test.cc +135 -0
  269. package/deps/rocksdb/rocksdb/db/write_thread.cc +796 -0
  270. package/deps/rocksdb/rocksdb/db/write_thread.h +433 -0
  271. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +14 -0
  272. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +341 -0
  273. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +520 -0
  274. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +23 -0
  275. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +337 -0
  276. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +554 -0
  277. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +79 -0
  278. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +173 -0
  279. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +17 -0
  280. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +38 -0
  281. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +763 -0
  282. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +222 -0
  283. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc +27 -0
  284. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +428 -0
  285. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +218 -0
  286. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +64 -0
  287. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2430 -0
  288. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +237 -0
  289. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +343 -0
  290. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +800 -0
  291. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +920 -0
  292. package/deps/rocksdb/rocksdb/env/env.cc +733 -0
  293. package/deps/rocksdb/rocksdb/env/env_basic_test.cc +352 -0
  294. package/deps/rocksdb/rocksdb/env/env_chroot.cc +346 -0
  295. package/deps/rocksdb/rocksdb/env/env_chroot.h +22 -0
  296. package/deps/rocksdb/rocksdb/env/env_encryption.cc +1148 -0
  297. package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +137 -0
  298. package/deps/rocksdb/rocksdb/env/env_hdfs.cc +648 -0
  299. package/deps/rocksdb/rocksdb/env/env_posix.cc +514 -0
  300. package/deps/rocksdb/rocksdb/env/env_test.cc +2230 -0
  301. package/deps/rocksdb/rocksdb/env/file_system.cc +132 -0
  302. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +448 -0
  303. package/deps/rocksdb/rocksdb/env/file_system_tracer.h +415 -0
  304. package/deps/rocksdb/rocksdb/env/fs_posix.cc +1086 -0
  305. package/deps/rocksdb/rocksdb/env/io_posix.cc +1499 -0
  306. package/deps/rocksdb/rocksdb/env/io_posix.h +402 -0
  307. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +140 -0
  308. package/deps/rocksdb/rocksdb/env/mock_env.cc +1066 -0
  309. package/deps/rocksdb/rocksdb/env/mock_env.h +41 -0
  310. package/deps/rocksdb/rocksdb/env/mock_env_test.cc +85 -0
  311. package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +402 -0
  312. package/deps/rocksdb/rocksdb/file/delete_scheduler.h +150 -0
  313. package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +717 -0
  314. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +156 -0
  315. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +99 -0
  316. package/deps/rocksdb/rocksdb/file/file_util.cc +268 -0
  317. package/deps/rocksdb/rocksdb/file/file_util.h +96 -0
  318. package/deps/rocksdb/rocksdb/file/filename.cc +473 -0
  319. package/deps/rocksdb/rocksdb/file/filename.h +182 -0
  320. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +188 -0
  321. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +315 -0
  322. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +142 -0
  323. package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +482 -0
  324. package/deps/rocksdb/rocksdb/file/read_write_util.cc +67 -0
  325. package/deps/rocksdb/rocksdb/file/read_write_util.h +34 -0
  326. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +169 -0
  327. package/deps/rocksdb/rocksdb/file/readahead_raf.h +29 -0
  328. package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +237 -0
  329. package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +63 -0
  330. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +552 -0
  331. package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +203 -0
  332. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +523 -0
  333. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +251 -0
  334. package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +386 -0
  335. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +839 -0
  336. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +2218 -0
  337. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +294 -0
  338. package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +71 -0
  339. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +214 -0
  340. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +98 -0
  341. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +137 -0
  342. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +40 -0
  343. package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +46 -0
  344. package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +359 -0
  345. package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +499 -0
  346. package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +138 -0
  347. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1697 -0
  348. package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +11 -0
  349. package/deps/rocksdb/rocksdb/include/rocksdb/db_dump_tool.h +45 -0
  350. package/deps/rocksdb/rocksdb/include/rocksdb/db_stress_tool.h +11 -0
  351. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1671 -0
  352. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +405 -0
  353. package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +29 -0
  354. package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +129 -0
  355. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1472 -0
  356. package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +238 -0
  357. package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +61 -0
  358. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +269 -0
  359. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +56 -0
  360. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +128 -0
  361. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +43 -0
  362. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +556 -0
  363. package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +77 -0
  364. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +385 -0
  365. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +257 -0
  366. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +155 -0
  367. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +1702 -0
  368. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +237 -0
  369. package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +35 -0
  370. package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +73 -0
  371. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +139 -0
  372. package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +10 -0
  373. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +269 -0
  374. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +103 -0
  375. package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +48 -0
  376. package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +19 -0
  377. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +136 -0
  378. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +47 -0
  379. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +145 -0
  380. package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +135 -0
  381. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +592 -0
  382. package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +69 -0
  383. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +608 -0
  384. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +711 -0
  385. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +280 -0
  386. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +188 -0
  387. package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +58 -0
  388. package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +48 -0
  389. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +121 -0
  390. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +74 -0
  391. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +86 -0
  392. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +535 -0
  393. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +61 -0
  394. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/convenience.h +10 -0
  395. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +72 -0
  396. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +49 -0
  397. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +175 -0
  398. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +180 -0
  399. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/info_log_finder.h +19 -0
  400. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +288 -0
  401. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +71 -0
  402. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h +145 -0
  403. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +43 -0
  404. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +55 -0
  405. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +50 -0
  406. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +205 -0
  407. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +100 -0
  408. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +19 -0
  409. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +876 -0
  410. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +128 -0
  411. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +94 -0
  412. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +504 -0
  413. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +95 -0
  414. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +626 -0
  415. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +432 -0
  416. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +92 -0
  417. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +34 -0
  418. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +279 -0
  419. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +16 -0
  420. package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +102 -0
  421. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +377 -0
  422. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +127 -0
  423. package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +106 -0
  424. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +300 -0
  425. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +165 -0
  426. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +684 -0
  427. package/deps/rocksdb/rocksdb/logging/env_logger.h +165 -0
  428. package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +162 -0
  429. package/deps/rocksdb/rocksdb/logging/event_logger.cc +70 -0
  430. package/deps/rocksdb/rocksdb/logging/event_logger.h +203 -0
  431. package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +43 -0
  432. package/deps/rocksdb/rocksdb/logging/log_buffer.cc +92 -0
  433. package/deps/rocksdb/rocksdb/logging/log_buffer.h +56 -0
  434. package/deps/rocksdb/rocksdb/logging/logging.h +68 -0
  435. package/deps/rocksdb/rocksdb/logging/posix_logger.h +185 -0
  436. package/deps/rocksdb/rocksdb/memory/allocator.h +57 -0
  437. package/deps/rocksdb/rocksdb/memory/arena.cc +233 -0
  438. package/deps/rocksdb/rocksdb/memory/arena.h +141 -0
  439. package/deps/rocksdb/rocksdb/memory/arena_test.cc +204 -0
  440. package/deps/rocksdb/rocksdb/memory/concurrent_arena.cc +47 -0
  441. package/deps/rocksdb/rocksdb/memory/concurrent_arena.h +218 -0
  442. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +206 -0
  443. package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +78 -0
  444. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +33 -0
  445. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +27 -0
  446. package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +102 -0
  447. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +38 -0
  448. package/deps/rocksdb/rocksdb/memory/memory_usage.h +25 -0
  449. package/deps/rocksdb/rocksdb/memtable/alloc_tracker.cc +62 -0
  450. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +844 -0
  451. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +49 -0
  452. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +349 -0
  453. package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +44 -0
  454. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +997 -0
  455. package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +663 -0
  456. package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +677 -0
  457. package/deps/rocksdb/rocksdb/memtable/skiplist.h +496 -0
  458. package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +388 -0
  459. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +280 -0
  460. package/deps/rocksdb/rocksdb/memtable/stl_wrappers.h +33 -0
  461. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +301 -0
  462. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -0
  463. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +203 -0
  464. package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +23 -0
  465. package/deps/rocksdb/rocksdb/monitoring/histogram.cc +287 -0
  466. package/deps/rocksdb/rocksdb/monitoring/histogram.h +149 -0
  467. package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +231 -0
  468. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +200 -0
  469. package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +84 -0
  470. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.cc +49 -0
  471. package/deps/rocksdb/rocksdb/monitoring/in_memory_stats_history.h +74 -0
  472. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +71 -0
  473. package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +98 -0
  474. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +62 -0
  475. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +60 -0
  476. package/deps/rocksdb/rocksdb/monitoring/iostats_context_test.cc +29 -0
  477. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +566 -0
  478. package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +97 -0
  479. package/deps/rocksdb/rocksdb/monitoring/perf_level.cc +28 -0
  480. package/deps/rocksdb/rocksdb/monitoring/perf_level_imp.h +18 -0
  481. package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +79 -0
  482. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.cc +169 -0
  483. package/deps/rocksdb/rocksdb/monitoring/persistent_stats_history.h +83 -0
  484. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +431 -0
  485. package/deps/rocksdb/rocksdb/monitoring/statistics.h +138 -0
  486. package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -0
  487. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +652 -0
  488. package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +163 -0
  489. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +314 -0
  490. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +233 -0
  491. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +43 -0
  492. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +206 -0
  493. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +134 -0
  494. package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +32 -0
  495. package/deps/rocksdb/rocksdb/options/cf_options.cc +1026 -0
  496. package/deps/rocksdb/rocksdb/options/cf_options.h +308 -0
  497. package/deps/rocksdb/rocksdb/options/configurable.cc +681 -0
  498. package/deps/rocksdb/rocksdb/options/configurable_helper.h +251 -0
  499. package/deps/rocksdb/rocksdb/options/configurable_test.cc +757 -0
  500. package/deps/rocksdb/rocksdb/options/configurable_test.h +127 -0
  501. package/deps/rocksdb/rocksdb/options/customizable.cc +77 -0
  502. package/deps/rocksdb/rocksdb/options/customizable_helper.h +216 -0
  503. package/deps/rocksdb/rocksdb/options/customizable_test.cc +625 -0
  504. package/deps/rocksdb/rocksdb/options/db_options.cc +835 -0
  505. package/deps/rocksdb/rocksdb/options/db_options.h +126 -0
  506. package/deps/rocksdb/rocksdb/options/options.cc +664 -0
  507. package/deps/rocksdb/rocksdb/options/options_helper.cc +1391 -0
  508. package/deps/rocksdb/rocksdb/options/options_helper.h +118 -0
  509. package/deps/rocksdb/rocksdb/options/options_parser.cc +721 -0
  510. package/deps/rocksdb/rocksdb/options/options_parser.h +151 -0
  511. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +583 -0
  512. package/deps/rocksdb/rocksdb/options/options_test.cc +3794 -0
  513. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +106 -0
  514. package/deps/rocksdb/rocksdb/port/lang.h +16 -0
  515. package/deps/rocksdb/rocksdb/port/likely.h +18 -0
  516. package/deps/rocksdb/rocksdb/port/malloc.h +17 -0
  517. package/deps/rocksdb/rocksdb/port/port.h +21 -0
  518. package/deps/rocksdb/rocksdb/port/port_dirent.h +44 -0
  519. package/deps/rocksdb/rocksdb/port/port_example.h +101 -0
  520. package/deps/rocksdb/rocksdb/port/port_posix.cc +266 -0
  521. package/deps/rocksdb/rocksdb/port/port_posix.h +223 -0
  522. package/deps/rocksdb/rocksdb/port/stack_trace.cc +179 -0
  523. package/deps/rocksdb/rocksdb/port/stack_trace.h +28 -0
  524. package/deps/rocksdb/rocksdb/port/sys_time.h +47 -0
  525. package/deps/rocksdb/rocksdb/port/util_logger.h +20 -0
  526. package/deps/rocksdb/rocksdb/port/win/env_default.cc +45 -0
  527. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1449 -0
  528. package/deps/rocksdb/rocksdb/port/win/env_win.h +294 -0
  529. package/deps/rocksdb/rocksdb/port/win/io_win.cc +1084 -0
  530. package/deps/rocksdb/rocksdb/port/win/io_win.h +494 -0
  531. package/deps/rocksdb/rocksdb/port/win/port_win.cc +283 -0
  532. package/deps/rocksdb/rocksdb/port/win/port_win.h +411 -0
  533. package/deps/rocksdb/rocksdb/port/win/win_jemalloc.cc +79 -0
  534. package/deps/rocksdb/rocksdb/port/win/win_logger.cc +194 -0
  535. package/deps/rocksdb/rocksdb/port/win/win_logger.h +67 -0
  536. package/deps/rocksdb/rocksdb/port/win/win_thread.cc +183 -0
  537. package/deps/rocksdb/rocksdb/port/win/win_thread.h +122 -0
  538. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +221 -0
  539. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +26 -0
  540. package/deps/rocksdb/rocksdb/port/xpress.h +17 -0
  541. package/deps/rocksdb/rocksdb/src.mk +631 -0
  542. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +126 -0
  543. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +57 -0
  544. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +73 -0
  545. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.h +48 -0
  546. package/deps/rocksdb/rocksdb/table/block_based/block.cc +1049 -0
  547. package/deps/rocksdb/rocksdb/table/block_based/block.h +720 -0
  548. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +348 -0
  549. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +119 -0
  550. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +434 -0
  551. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1835 -0
  552. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +193 -0
  553. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +839 -0
  554. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +95 -0
  555. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +383 -0
  556. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +251 -0
  557. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3563 -0
  558. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +681 -0
  559. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +190 -0
  560. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +347 -0
  561. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +201 -0
  562. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +78 -0
  563. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +66 -0
  564. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +32 -0
  565. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.cc +232 -0
  566. package/deps/rocksdb/rocksdb/table/block_based/block_prefix_index.h +66 -0
  567. package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +623 -0
  568. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
  569. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +220 -0
  570. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +59 -0
  571. package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +25 -0
  572. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc +93 -0
  573. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +136 -0
  574. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +717 -0
  575. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +180 -0
  576. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +102 -0
  577. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +55 -0
  578. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1407 -0
  579. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +168 -0
  580. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +88 -0
  581. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +41 -0
  582. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +344 -0
  583. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +139 -0
  584. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +333 -0
  585. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +147 -0
  586. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.h +49 -0
  587. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +248 -0
  588. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +444 -0
  589. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +54 -0
  590. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +85 -0
  591. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +56 -0
  592. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +22 -0
  593. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +40 -0
  594. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +521 -0
  595. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +144 -0
  596. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +424 -0
  597. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +163 -0
  598. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +142 -0
  599. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +186 -0
  600. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +51 -0
  601. package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +64 -0
  602. package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +38 -0
  603. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +120 -0
  604. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +59 -0
  605. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +324 -0
  606. package/deps/rocksdb/rocksdb/table/block_fetcher.h +129 -0
  607. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +534 -0
  608. package/deps/rocksdb/rocksdb/table/cleanable_test.cc +277 -0
  609. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +543 -0
  610. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +136 -0
  611. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +663 -0
  612. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +107 -0
  613. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +81 -0
  614. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +404 -0
  615. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +101 -0
  616. package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +585 -0
  617. package/deps/rocksdb/rocksdb/table/format.cc +422 -0
  618. package/deps/rocksdb/rocksdb/table/format.h +348 -0
  619. package/deps/rocksdb/rocksdb/table/get_context.cc +408 -0
  620. package/deps/rocksdb/rocksdb/table/get_context.h +212 -0
  621. package/deps/rocksdb/rocksdb/table/internal_iterator.h +205 -0
  622. package/deps/rocksdb/rocksdb/table/iter_heap.h +42 -0
  623. package/deps/rocksdb/rocksdb/table/iterator.cc +210 -0
  624. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +180 -0
  625. package/deps/rocksdb/rocksdb/table/merger_test.cc +180 -0
  626. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +481 -0
  627. package/deps/rocksdb/rocksdb/table/merging_iterator.h +64 -0
  628. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +541 -0
  629. package/deps/rocksdb/rocksdb/table/meta_blocks.h +154 -0
  630. package/deps/rocksdb/rocksdb/table/mock_table.cc +328 -0
  631. package/deps/rocksdb/rocksdb/table/mock_table.h +89 -0
  632. package/deps/rocksdb/rocksdb/table/multiget_context.h +282 -0
  633. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +116 -0
  634. package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +44 -0
  635. package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +34 -0
  636. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.cc +78 -0
  637. package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +135 -0
  638. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +332 -0
  639. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +153 -0
  640. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +263 -0
  641. package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +182 -0
  642. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +211 -0
  643. package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +249 -0
  644. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +506 -0
  645. package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +201 -0
  646. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +781 -0
  647. package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +247 -0
  648. package/deps/rocksdb/rocksdb/table/scoped_arena_iterator.h +61 -0
  649. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +502 -0
  650. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +96 -0
  651. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +98 -0
  652. package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +228 -0
  653. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +340 -0
  654. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +94 -0
  655. package/deps/rocksdb/rocksdb/table/table_builder.h +203 -0
  656. package/deps/rocksdb/rocksdb/table/table_factory.cc +38 -0
  657. package/deps/rocksdb/rocksdb/table/table_properties.cc +300 -0
  658. package/deps/rocksdb/rocksdb/table/table_properties_internal.h +30 -0
  659. package/deps/rocksdb/rocksdb/table/table_reader.h +147 -0
  660. package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +347 -0
  661. package/deps/rocksdb/rocksdb/table/table_reader_caller.h +39 -0
  662. package/deps/rocksdb/rocksdb/table/table_test.cc +4769 -0
  663. package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +215 -0
  664. package/deps/rocksdb/rocksdb/table/two_level_iterator.h +43 -0
  665. package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +38 -0
  666. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +74 -0
  667. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +93 -0
  668. package/deps/rocksdb/rocksdb/test_util/sync_point.h +161 -0
  669. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +129 -0
  670. package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +74 -0
  671. package/deps/rocksdb/rocksdb/test_util/testharness.cc +56 -0
  672. package/deps/rocksdb/rocksdb/test_util/testharness.h +53 -0
  673. package/deps/rocksdb/rocksdb/test_util/testutil.cc +566 -0
  674. package/deps/rocksdb/rocksdb/test_util/testutil.h +887 -0
  675. package/deps/rocksdb/rocksdb/test_util/testutil_test.cc +43 -0
  676. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +388 -0
  677. package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +132 -0
  678. package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +27 -0
  679. package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +45 -0
  680. package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +166 -0
  681. package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +570 -0
  682. package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +92 -0
  683. package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +54 -0
  684. package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +152 -0
  685. package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +59 -0
  686. package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +141 -0
  687. package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +33 -0
  688. package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +74 -0
  689. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +117 -0
  690. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +263 -0
  691. package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +96 -0
  692. package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +40 -0
  693. package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +29 -0
  694. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +144 -0
  695. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +30 -0
  696. package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +51 -0
  697. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +28 -0
  698. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +10 -0
  699. package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +26 -0
  700. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +138 -0
  701. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +23 -0
  702. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +57 -0
  703. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +260 -0
  704. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +52 -0
  705. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +328 -0
  706. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +1703 -0
  707. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +16 -0
  708. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +304 -0
  709. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +39 -0
  710. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +26 -0
  711. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +318 -0
  712. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.cpp +12 -0
  713. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +57 -0
  714. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +219 -0
  715. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +207 -0
  716. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +164 -0
  717. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +57 -0
  718. package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +77 -0
  719. package/deps/rocksdb/rocksdb/third-party/gcc/ppc-asm.h +390 -0
  720. package/deps/rocksdb/rocksdb/thirdparty.inc +268 -0
  721. package/deps/rocksdb/rocksdb/tools/CMakeLists.txt +30 -0
  722. package/deps/rocksdb/rocksdb/tools/blob_dump.cc +110 -0
  723. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/__init__.py +2 -0
  724. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +2000 -0
  725. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh +156 -0
  726. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +734 -0
  727. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +2307 -0
  728. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +395 -0
  729. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +721 -0
  730. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +719 -0
  731. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc +25 -0
  732. package/deps/rocksdb/rocksdb/tools/db_bench.cc +21 -0
  733. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +7416 -0
  734. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +328 -0
  735. package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +130 -0
  736. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +297 -0
  737. package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +259 -0
  738. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc +63 -0
  739. package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +62 -0
  740. package/deps/rocksdb/rocksdb/tools/io_tracer_parser.cc +25 -0
  741. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +187 -0
  742. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +120 -0
  743. package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.h +40 -0
  744. package/deps/rocksdb/rocksdb/tools/ldb.cc +21 -0
  745. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3609 -0
  746. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +665 -0
  747. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +746 -0
  748. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +159 -0
  749. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +221 -0
  750. package/deps/rocksdb/rocksdb/tools/sst_dump.cc +20 -0
  751. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +427 -0
  752. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +541 -0
  753. package/deps/rocksdb/rocksdb/tools/trace_analyzer.cc +25 -0
  754. package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +752 -0
  755. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +2001 -0
  756. package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +292 -0
  757. package/deps/rocksdb/rocksdb/tools/write_stress.cc +305 -0
  758. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +496 -0
  759. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +294 -0
  760. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +379 -0
  761. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +229 -0
  762. package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +174 -0
  763. package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +215 -0
  764. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +491 -0
  765. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +195 -0
  766. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +255 -0
  767. package/deps/rocksdb/rocksdb/util/autovector.h +367 -0
  768. package/deps/rocksdb/rocksdb/util/autovector_test.cc +330 -0
  769. package/deps/rocksdb/rocksdb/util/bloom_impl.h +485 -0
  770. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1191 -0
  771. package/deps/rocksdb/rocksdb/util/build_version.cc.in +5 -0
  772. package/deps/rocksdb/rocksdb/util/build_version.h +15 -0
  773. package/deps/rocksdb/rocksdb/util/cast_util.h +20 -0
  774. package/deps/rocksdb/rocksdb/util/channel.h +67 -0
  775. package/deps/rocksdb/rocksdb/util/coding.cc +89 -0
  776. package/deps/rocksdb/rocksdb/util/coding.h +419 -0
  777. package/deps/rocksdb/rocksdb/util/coding_lean.h +101 -0
  778. package/deps/rocksdb/rocksdb/util/coding_test.cc +217 -0
  779. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +92 -0
  780. package/deps/rocksdb/rocksdb/util/comparator.cc +219 -0
  781. package/deps/rocksdb/rocksdb/util/compression.h +1529 -0
  782. package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +108 -0
  783. package/deps/rocksdb/rocksdb/util/compression_context_cache.h +47 -0
  784. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.cc +67 -0
  785. package/deps/rocksdb/rocksdb/util/concurrent_task_limiter_impl.h +67 -0
  786. package/deps/rocksdb/rocksdb/util/core_local.h +83 -0
  787. package/deps/rocksdb/rocksdb/util/crc32c.cc +1283 -0
  788. package/deps/rocksdb/rocksdb/util/crc32c.h +51 -0
  789. package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +169 -0
  790. package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +50 -0
  791. package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +94 -0
  792. package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +19 -0
  793. package/deps/rocksdb/rocksdb/util/crc32c_ppc_asm.S +756 -0
  794. package/deps/rocksdb/rocksdb/util/crc32c_ppc_constants.h +900 -0
  795. package/deps/rocksdb/rocksdb/util/crc32c_test.cc +180 -0
  796. package/deps/rocksdb/rocksdb/util/defer.h +52 -0
  797. package/deps/rocksdb/rocksdb/util/defer_test.cc +39 -0
  798. package/deps/rocksdb/rocksdb/util/duplicate_detector.h +68 -0
  799. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +70 -0
  800. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +214 -0
  801. package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +323 -0
  802. package/deps/rocksdb/rocksdb/util/fastrange.h +112 -0
  803. package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +136 -0
  804. package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +98 -0
  805. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +449 -0
  806. package/deps/rocksdb/rocksdb/util/filelock_test.cc +152 -0
  807. package/deps/rocksdb/rocksdb/util/filter_bench.cc +781 -0
  808. package/deps/rocksdb/rocksdb/util/gflags_compat.h +20 -0
  809. package/deps/rocksdb/rocksdb/util/hash.cc +83 -0
  810. package/deps/rocksdb/rocksdb/util/hash.h +107 -0
  811. package/deps/rocksdb/rocksdb/util/hash_map.h +67 -0
  812. package/deps/rocksdb/rocksdb/util/hash_test.cc +593 -0
  813. package/deps/rocksdb/rocksdb/util/heap.h +166 -0
  814. package/deps/rocksdb/rocksdb/util/heap_test.cc +139 -0
  815. package/deps/rocksdb/rocksdb/util/kv_map.h +33 -0
  816. package/deps/rocksdb/rocksdb/util/log_write_bench.cc +86 -0
  817. package/deps/rocksdb/rocksdb/util/math.h +186 -0
  818. package/deps/rocksdb/rocksdb/util/math128.h +298 -0
  819. package/deps/rocksdb/rocksdb/util/murmurhash.cc +191 -0
  820. package/deps/rocksdb/rocksdb/util/murmurhash.h +42 -0
  821. package/deps/rocksdb/rocksdb/util/mutexlock.h +186 -0
  822. package/deps/rocksdb/rocksdb/util/ppc-opcode.h +27 -0
  823. package/deps/rocksdb/rocksdb/util/random.cc +56 -0
  824. package/deps/rocksdb/rocksdb/util/random.h +186 -0
  825. package/deps/rocksdb/rocksdb/util/random_test.cc +105 -0
  826. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +340 -0
  827. package/deps/rocksdb/rocksdb/util/rate_limiter.h +113 -0
  828. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +251 -0
  829. package/deps/rocksdb/rocksdb/util/repeatable_thread.h +151 -0
  830. package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +107 -0
  831. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1201 -0
  832. package/deps/rocksdb/rocksdb/util/ribbon_impl.h +1062 -0
  833. package/deps/rocksdb/rocksdb/util/ribbon_test.cc +931 -0
  834. package/deps/rocksdb/rocksdb/util/set_comparator.h +22 -0
  835. package/deps/rocksdb/rocksdb/util/slice.cc +243 -0
  836. package/deps/rocksdb/rocksdb/util/slice_test.cc +163 -0
  837. package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +153 -0
  838. package/deps/rocksdb/rocksdb/util/status.cc +149 -0
  839. package/deps/rocksdb/rocksdb/util/stderr_logger.h +31 -0
  840. package/deps/rocksdb/rocksdb/util/stop_watch.h +118 -0
  841. package/deps/rocksdb/rocksdb/util/string_util.cc +422 -0
  842. package/deps/rocksdb/rocksdb/util/string_util.h +144 -0
  843. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +354 -0
  844. package/deps/rocksdb/rocksdb/util/thread_local.cc +554 -0
  845. package/deps/rocksdb/rocksdb/util/thread_local.h +101 -0
  846. package/deps/rocksdb/rocksdb/util/thread_local_test.cc +583 -0
  847. package/deps/rocksdb/rocksdb/util/thread_operation.h +121 -0
  848. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +506 -0
  849. package/deps/rocksdb/rocksdb/util/threadpool_imp.h +112 -0
  850. package/deps/rocksdb/rocksdb/util/timer.h +331 -0
  851. package/deps/rocksdb/rocksdb/util/timer_queue.h +230 -0
  852. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +72 -0
  853. package/deps/rocksdb/rocksdb/util/timer_test.cc +399 -0
  854. package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +80 -0
  855. package/deps/rocksdb/rocksdb/util/vector_iterator.h +101 -0
  856. package/deps/rocksdb/rocksdb/util/work_queue.h +148 -0
  857. package/deps/rocksdb/rocksdb/util/work_queue_test.cc +268 -0
  858. package/deps/rocksdb/rocksdb/util/xxh3p.h +1392 -0
  859. package/deps/rocksdb/rocksdb/util/xxhash.cc +1158 -0
  860. package/deps/rocksdb/rocksdb/util/xxhash.h +598 -0
  861. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +2354 -0
  862. package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc +2955 -0
  863. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +488 -0
  864. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +199 -0
  865. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +112 -0
  866. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +266 -0
  867. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +52 -0
  868. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2167 -0
  869. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +500 -0
  870. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +113 -0
  871. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +147 -0
  872. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +66 -0
  873. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +2386 -0
  874. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +281 -0
  875. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +58 -0
  876. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +314 -0
  877. package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +244 -0
  878. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +47 -0
  879. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +42 -0
  880. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc +375 -0
  881. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +327 -0
  882. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc +114 -0
  883. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc +187 -0
  884. package/deps/rocksdb/rocksdb/utilities/cassandra/format.cc +390 -0
  885. package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +184 -0
  886. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +67 -0
  887. package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +44 -0
  888. package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +75 -0
  889. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +72 -0
  890. package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +43 -0
  891. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +588 -0
  892. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +82 -0
  893. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +821 -0
  894. package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +37 -0
  895. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +29 -0
  896. package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +27 -0
  897. package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +25 -0
  898. package/deps/rocksdb/rocksdb/utilities/debug.cc +82 -0
  899. package/deps/rocksdb/rocksdb/utilities/env_librados.cc +1497 -0
  900. package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +1146 -0
  901. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +262 -0
  902. package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +223 -0
  903. package/deps/rocksdb/rocksdb/utilities/env_timed.cc +145 -0
  904. package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +44 -0
  905. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +490 -0
  906. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +242 -0
  907. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +581 -0
  908. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +437 -0
  909. package/deps/rocksdb/rocksdb/utilities/leveldb_options/leveldb_options.cc +56 -0
  910. package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +275 -0
  911. package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +52 -0
  912. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc +59 -0
  913. package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +39 -0
  914. package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +77 -0
  915. package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +83 -0
  916. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +97 -0
  917. package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +38 -0
  918. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +59 -0
  919. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +31 -0
  920. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +117 -0
  921. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +49 -0
  922. package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +598 -0
  923. package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +69 -0
  924. package/deps/rocksdb/rocksdb/utilities/merge_operators.h +55 -0
  925. package/deps/rocksdb/rocksdb/utilities/object_registry.cc +87 -0
  926. package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +174 -0
  927. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +168 -0
  928. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +431 -0
  929. package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +159 -0
  930. package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +655 -0
  931. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +425 -0
  932. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +156 -0
  933. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +609 -0
  934. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +296 -0
  935. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file_buffer.h +127 -0
  936. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +86 -0
  937. package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +125 -0
  938. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +238 -0
  939. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc +308 -0
  940. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +168 -0
  941. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +160 -0
  942. package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +174 -0
  943. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +360 -0
  944. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +456 -0
  945. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +286 -0
  946. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +167 -0
  947. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +339 -0
  948. package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_util.h +67 -0
  949. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +140 -0
  950. package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +142 -0
  951. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +285 -0
  952. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.h +231 -0
  953. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +494 -0
  954. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +356 -0
  955. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +224 -0
  956. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +122 -0
  957. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +72 -0
  958. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +244 -0
  959. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +125 -0
  960. package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +48 -0
  961. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +29 -0
  962. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +82 -0
  963. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_tracker.h +209 -0
  964. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +720 -0
  965. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +223 -0
  966. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +181 -0
  967. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +319 -0
  968. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +270 -0
  969. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.h +99 -0
  970. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +30 -0
  971. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +306 -0
  972. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.AGPLv3 +661 -0
  973. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.APACHEv2 +174 -0
  974. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.GPLv2 +339 -0
  975. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +76 -0
  976. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h +138 -0
  977. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h +102 -0
  978. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc +139 -0
  979. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h +174 -0
  980. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc +222 -0
  981. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h +141 -0
  982. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc +525 -0
  983. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h +253 -0
  984. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +1007 -0
  985. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +560 -0
  986. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc +527 -0
  987. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc +265 -0
  988. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h +178 -0
  989. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc +520 -0
  990. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h +302 -0
  991. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc +120 -0
  992. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h +92 -0
  993. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc +213 -0
  994. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h +124 -0
  995. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/memory.h +215 -0
  996. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h +39 -0
  997. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h +130 -0
  998. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +82 -0
  999. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h +286 -0
  1000. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h +87 -0
  1001. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h +520 -0
  1002. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h +179 -0
  1003. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +172 -0
  1004. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h +27 -0
  1005. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc +132 -0
  1006. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +153 -0
  1007. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h +98 -0
  1008. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +144 -0
  1009. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc +201 -0
  1010. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h +141 -0
  1011. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h +794 -0
  1012. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h +1295 -0
  1013. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h +165 -0
  1014. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h +76 -0
  1015. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +479 -0
  1016. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +130 -0
  1017. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc +156 -0
  1018. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h +146 -0
  1019. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +196 -0
  1020. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +101 -0
  1021. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +111 -0
  1022. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +87 -0
  1023. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1418 -0
  1024. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +752 -0
  1025. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +232 -0
  1026. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +628 -0
  1027. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +228 -0
  1028. package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +49 -0
  1029. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +678 -0
  1030. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +373 -0
  1031. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +135 -0
  1032. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +26 -0
  1033. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +6350 -0
  1034. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +522 -0
  1035. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +188 -0
  1036. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +80 -0
  1037. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3531 -0
  1038. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +483 -0
  1039. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +119 -0
  1040. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +999 -0
  1041. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +1109 -0
  1042. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +786 -0
  1043. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1039 -0
  1044. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +341 -0
  1045. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +470 -0
  1046. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +108 -0
  1047. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +332 -0
  1048. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +353 -0
  1049. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +703 -0
  1050. package/deps/rocksdb/rocksdb/utilities/util_merge_operators_test.cc +99 -0
  1051. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +617 -0
  1052. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +345 -0
  1053. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +569 -0
  1054. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1867 -0
  1055. package/deps/rocksdb/rocksdb.gyp +475 -0
  1056. package/deps/snappy/freebsd/config.h +135 -0
  1057. package/deps/snappy/freebsd/snappy-stubs-public.h +100 -0
  1058. package/deps/snappy/linux/config.h +135 -0
  1059. package/deps/snappy/linux/snappy-stubs-public.h +100 -0
  1060. package/deps/snappy/mac/config.h +137 -0
  1061. package/deps/snappy/mac/snappy-stubs-public.h +100 -0
  1062. package/deps/snappy/openbsd/config.h +135 -0
  1063. package/deps/snappy/openbsd/snappy-stubs-public.h +100 -0
  1064. package/deps/snappy/snappy-1.1.7/COPYING +54 -0
  1065. package/deps/snappy/snappy-1.1.7/cmake/SnappyConfig.cmake +1 -0
  1066. package/deps/snappy/snappy-1.1.7/cmake/config.h.in +62 -0
  1067. package/deps/snappy/snappy-1.1.7/snappy-c.cc +90 -0
  1068. package/deps/snappy/snappy-1.1.7/snappy-c.h +138 -0
  1069. package/deps/snappy/snappy-1.1.7/snappy-internal.h +224 -0
  1070. package/deps/snappy/snappy-1.1.7/snappy-sinksource.cc +104 -0
  1071. package/deps/snappy/snappy-1.1.7/snappy-sinksource.h +182 -0
  1072. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.cc +42 -0
  1073. package/deps/snappy/snappy-1.1.7/snappy-stubs-internal.h +561 -0
  1074. package/deps/snappy/snappy-1.1.7/snappy-stubs-public.h.in +94 -0
  1075. package/deps/snappy/snappy-1.1.7/snappy-test.cc +612 -0
  1076. package/deps/snappy/snappy-1.1.7/snappy-test.h +573 -0
  1077. package/deps/snappy/snappy-1.1.7/snappy.cc +1515 -0
  1078. package/deps/snappy/snappy-1.1.7/snappy.h +203 -0
  1079. package/deps/snappy/snappy-1.1.7/snappy_unittest.cc +1410 -0
  1080. package/deps/snappy/snappy.gyp +90 -0
  1081. package/deps/snappy/solaris/config.h +135 -0
  1082. package/deps/snappy/solaris/snappy-stubs-public.h +100 -0
  1083. package/deps/snappy/win32/config.h +29 -0
  1084. package/deps/snappy/win32/snappy-stubs-public.h +100 -0
  1085. package/iterator.js +55 -0
  1086. package/leveldown.js +113 -0
  1087. package/package-lock.json +23687 -0
  1088. package/package.json +70 -0
@@ -0,0 +1,2024 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+ //
6
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
+ // Use of this source code is governed by a BSD-style license that can be
8
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
+ #include <cinttypes>
10
+
11
+ #include "db/db_impl/db_impl.h"
12
+ #include "db/error_handler.h"
13
+ #include "db/event_helpers.h"
14
+ #include "monitoring/perf_context_imp.h"
15
+ #include "options/options_helper.h"
16
+ #include "test_util/sync_point.h"
17
+ #include "util/cast_util.h"
18
+
19
+ namespace ROCKSDB_NAMESPACE {
20
+ // Convenience methods
21
+ Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family,
22
+ const Slice& key, const Slice& val) {
23
+ return DB::Put(o, column_family, key, val);
24
+ }
25
+
26
+ Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family,
27
+ const Slice& key, const Slice& val) {
28
+ auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
29
+ if (!cfh->cfd()->ioptions()->merge_operator) {
30
+ return Status::NotSupported("Provide a merge_operator when opening DB");
31
+ } else {
32
+ return DB::Merge(o, column_family, key, val);
33
+ }
34
+ }
35
+
36
+ Status DBImpl::Delete(const WriteOptions& write_options,
37
+ ColumnFamilyHandle* column_family, const Slice& key) {
38
+ return DB::Delete(write_options, column_family, key);
39
+ }
40
+
41
+ Status DBImpl::SingleDelete(const WriteOptions& write_options,
42
+ ColumnFamilyHandle* column_family,
43
+ const Slice& key) {
44
+ return DB::SingleDelete(write_options, column_family, key);
45
+ }
46
+
47
+ void DBImpl::SetRecoverableStatePreReleaseCallback(
48
+ PreReleaseCallback* callback) {
49
+ recoverable_state_pre_release_callback_.reset(callback);
50
+ }
51
+
52
+ Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) {
53
+ return WriteImpl(write_options, my_batch, nullptr, nullptr);
54
+ }
55
+
56
+ #ifndef ROCKSDB_LITE
57
+ Status DBImpl::WriteWithCallback(const WriteOptions& write_options,
58
+ WriteBatch* my_batch,
59
+ WriteCallback* callback) {
60
+ return WriteImpl(write_options, my_batch, callback, nullptr);
61
+ }
62
+ #endif // ROCKSDB_LITE
63
+
64
+ // The main write queue. This is the only write queue that updates LastSequence.
65
+ // When using one write queue, the same sequence also indicates the last
66
+ // published sequence.
67
+ Status DBImpl::WriteImpl(const WriteOptions& write_options,
68
+ WriteBatch* my_batch, WriteCallback* callback,
69
+ uint64_t* log_used, uint64_t log_ref,
70
+ bool disable_memtable, uint64_t* seq_used,
71
+ size_t batch_cnt,
72
+ PreReleaseCallback* pre_release_callback) {
73
+ assert(!seq_per_batch_ || batch_cnt != 0);
74
+ if (my_batch == nullptr) {
75
+ return Status::Corruption("Batch is nullptr!");
76
+ }
77
+ if (tracer_) {
78
+ InstrumentedMutexLock lock(&trace_mutex_);
79
+ if (tracer_) {
80
+ // TODO: maybe handle the tracing status?
81
+ tracer_->Write(my_batch).PermitUncheckedError();
82
+ }
83
+ }
84
+ if (write_options.sync && write_options.disableWAL) {
85
+ return Status::InvalidArgument("Sync writes has to enable WAL.");
86
+ }
87
+ if (two_write_queues_ && immutable_db_options_.enable_pipelined_write) {
88
+ return Status::NotSupported(
89
+ "pipelined_writes is not compatible with concurrent prepares");
90
+ }
91
+ if (seq_per_batch_ && immutable_db_options_.enable_pipelined_write) {
92
+ // TODO(yiwu): update pipeline write with seq_per_batch and batch_cnt
93
+ return Status::NotSupported(
94
+ "pipelined_writes is not compatible with seq_per_batch");
95
+ }
96
+ if (immutable_db_options_.unordered_write &&
97
+ immutable_db_options_.enable_pipelined_write) {
98
+ return Status::NotSupported(
99
+ "pipelined_writes is not compatible with unordered_write");
100
+ }
101
+ // Otherwise IsLatestPersistentState optimization does not make sense
102
+ assert(!WriteBatchInternal::IsLatestPersistentState(my_batch) ||
103
+ disable_memtable);
104
+
105
+ if (write_options.low_pri) {
106
+ Status s = ThrottleLowPriWritesIfNeeded(write_options, my_batch);
107
+ if (!s.ok()) {
108
+ return s;
109
+ }
110
+ }
111
+
112
+ if (two_write_queues_ && disable_memtable) {
113
+ AssignOrder assign_order =
114
+ seq_per_batch_ ? kDoAssignOrder : kDontAssignOrder;
115
+ // Otherwise it is WAL-only Prepare batches in WriteCommitted policy and
116
+ // they don't consume sequence.
117
+ return WriteImplWALOnly(&nonmem_write_thread_, write_options, my_batch,
118
+ callback, log_used, log_ref, seq_used, batch_cnt,
119
+ pre_release_callback, assign_order,
120
+ kDontPublishLastSeq, disable_memtable);
121
+ }
122
+
123
+ if (immutable_db_options_.unordered_write) {
124
+ const size_t sub_batch_cnt = batch_cnt != 0
125
+ ? batch_cnt
126
+ // every key is a sub-batch consuming a seq
127
+ : WriteBatchInternal::Count(my_batch);
128
+ uint64_t seq = 0;
129
+ // Use a write thread to i) optimize for WAL write, ii) publish last
130
+ // sequence in in increasing order, iii) call pre_release_callback serially
131
+ Status status = WriteImplWALOnly(
132
+ &write_thread_, write_options, my_batch, callback, log_used, log_ref,
133
+ &seq, sub_batch_cnt, pre_release_callback, kDoAssignOrder,
134
+ kDoPublishLastSeq, disable_memtable);
135
+ TEST_SYNC_POINT("DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL");
136
+ if (!status.ok()) {
137
+ return status;
138
+ }
139
+ if (seq_used) {
140
+ *seq_used = seq;
141
+ }
142
+ if (!disable_memtable) {
143
+ TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeUnorderedWriteMemtable");
144
+ status = UnorderedWriteMemtable(write_options, my_batch, callback,
145
+ log_ref, seq, sub_batch_cnt);
146
+ }
147
+ return status;
148
+ }
149
+
150
+ if (immutable_db_options_.enable_pipelined_write) {
151
+ return PipelinedWriteImpl(write_options, my_batch, callback, log_used,
152
+ log_ref, disable_memtable, seq_used);
153
+ }
154
+
155
+ PERF_TIMER_GUARD(write_pre_and_post_process_time);
156
+ WriteThread::Writer w(write_options, my_batch, callback, log_ref,
157
+ disable_memtable, batch_cnt, pre_release_callback);
158
+
159
+ if (!write_options.disableWAL) {
160
+ RecordTick(stats_, WRITE_WITH_WAL);
161
+ }
162
+
163
+ StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
164
+
165
+ write_thread_.JoinBatchGroup(&w);
166
+ if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
167
+ // we are a non-leader in a parallel group
168
+
169
+ if (w.ShouldWriteToMemtable()) {
170
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
171
+ PERF_TIMER_GUARD(write_memtable_time);
172
+
173
+ ColumnFamilyMemTablesImpl column_family_memtables(
174
+ versions_->GetColumnFamilySet());
175
+ w.status = WriteBatchInternal::InsertInto(
176
+ &w, w.sequence, &column_family_memtables, &flush_scheduler_,
177
+ &trim_history_scheduler_,
178
+ write_options.ignore_missing_column_families, 0 /*log_number*/, this,
179
+ true /*concurrent_memtable_writes*/, seq_per_batch_, w.batch_cnt,
180
+ batch_per_txn_, write_options.memtable_insert_hint_per_batch);
181
+
182
+ PERF_TIMER_START(write_pre_and_post_process_time);
183
+ }
184
+
185
+ if (write_thread_.CompleteParallelMemTableWriter(&w)) {
186
+ // we're responsible for exit batch group
187
+ // TODO(myabandeh): propagate status to write_group
188
+ auto last_sequence = w.write_group->last_sequence;
189
+ versions_->SetLastSequence(last_sequence);
190
+ MemTableInsertStatusCheck(w.status);
191
+ write_thread_.ExitAsBatchGroupFollower(&w);
192
+ }
193
+ assert(w.state == WriteThread::STATE_COMPLETED);
194
+ // STATE_COMPLETED conditional below handles exit
195
+ }
196
+ if (w.state == WriteThread::STATE_COMPLETED) {
197
+ if (log_used != nullptr) {
198
+ *log_used = w.log_used;
199
+ }
200
+ if (seq_used != nullptr) {
201
+ *seq_used = w.sequence;
202
+ }
203
+ // write is complete and leader has updated sequence
204
+ return w.FinalStatus();
205
+ }
206
+ // else we are the leader of the write batch group
207
+ assert(w.state == WriteThread::STATE_GROUP_LEADER);
208
+ Status status;
209
+ // Once reaches this point, the current writer "w" will try to do its write
210
+ // job. It may also pick up some of the remaining writers in the "writers_"
211
+ // when it finds suitable, and finish them in the same write batch.
212
+ // This is how a write job could be done by the other writer.
213
+ WriteContext write_context;
214
+ WriteThread::WriteGroup write_group;
215
+ bool in_parallel_group = false;
216
+ uint64_t last_sequence = kMaxSequenceNumber;
217
+
218
+ mutex_.Lock();
219
+
220
+ bool need_log_sync = write_options.sync;
221
+ bool need_log_dir_sync = need_log_sync && !log_dir_synced_;
222
+ if (!two_write_queues_ || !disable_memtable) {
223
+ // With concurrent writes we do preprocess only in the write thread that
224
+ // also does write to memtable to avoid sync issue on shared data structure
225
+ // with the other thread
226
+
227
+ // PreprocessWrite does its own perf timing.
228
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
229
+
230
+ status = PreprocessWrite(write_options, &need_log_sync, &write_context);
231
+ if (!two_write_queues_) {
232
+ // Assign it after ::PreprocessWrite since the sequence might advance
233
+ // inside it by WriteRecoverableState
234
+ last_sequence = versions_->LastSequence();
235
+ }
236
+
237
+ PERF_TIMER_START(write_pre_and_post_process_time);
238
+ }
239
+ log::Writer* log_writer = logs_.back().writer;
240
+
241
+ mutex_.Unlock();
242
+
243
+ // Add to log and apply to memtable. We can release the lock
244
+ // during this phase since &w is currently responsible for logging
245
+ // and protects against concurrent loggers and concurrent writes
246
+ // into memtables
247
+
248
+ TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeLeaderEnters");
249
+ last_batch_group_size_ =
250
+ write_thread_.EnterAsBatchGroupLeader(&w, &write_group);
251
+
252
+ IOStatus io_s;
253
+ if (status.ok()) {
254
+ // Rules for when we can update the memtable concurrently
255
+ // 1. supported by memtable
256
+ // 2. Puts are not okay if inplace_update_support
257
+ // 3. Merges are not okay
258
+ //
259
+ // Rules 1..2 are enforced by checking the options
260
+ // during startup (CheckConcurrentWritesSupported), so if
261
+ // options.allow_concurrent_memtable_write is true then they can be
262
+ // assumed to be true. Rule 3 is checked for each batch. We could
263
+ // relax rules 2 if we could prevent write batches from referring
264
+ // more than once to a particular key.
265
+ bool parallel = immutable_db_options_.allow_concurrent_memtable_write &&
266
+ write_group.size > 1;
267
+ size_t total_count = 0;
268
+ size_t valid_batches = 0;
269
+ size_t total_byte_size = 0;
270
+ size_t pre_release_callback_cnt = 0;
271
+ for (auto* writer : write_group) {
272
+ if (writer->CheckCallback(this)) {
273
+ valid_batches += writer->batch_cnt;
274
+ if (writer->ShouldWriteToMemtable()) {
275
+ total_count += WriteBatchInternal::Count(writer->batch);
276
+ parallel = parallel && !writer->batch->HasMerge();
277
+ }
278
+ total_byte_size = WriteBatchInternal::AppendedByteSize(
279
+ total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
280
+ if (writer->pre_release_callback) {
281
+ pre_release_callback_cnt++;
282
+ }
283
+ }
284
+ }
285
+ // Note about seq_per_batch_: either disableWAL is set for the entire write
286
+ // group or not. In either case we inc seq for each write batch with no
287
+ // failed callback. This means that there could be a batch with
288
+ // disalbe_memtable in between; although we do not write this batch to
289
+ // memtable it still consumes a seq. Otherwise, if !seq_per_batch_, we inc
290
+ // the seq per valid written key to mem.
291
+ size_t seq_inc = seq_per_batch_ ? valid_batches : total_count;
292
+
293
+ const bool concurrent_update = two_write_queues_;
294
+ // Update stats while we are an exclusive group leader, so we know
295
+ // that nobody else can be writing to these particular stats.
296
+ // We're optimistic, updating the stats before we successfully
297
+ // commit. That lets us release our leader status early.
298
+ auto stats = default_cf_internal_stats_;
299
+ stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count,
300
+ concurrent_update);
301
+ RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
302
+ stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
303
+ concurrent_update);
304
+ RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
305
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
306
+ concurrent_update);
307
+ RecordTick(stats_, WRITE_DONE_BY_SELF);
308
+ auto write_done_by_other = write_group.size - 1;
309
+ if (write_done_by_other > 0) {
310
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
311
+ write_done_by_other, concurrent_update);
312
+ RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
313
+ }
314
+ RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
315
+
316
+ if (write_options.disableWAL) {
317
+ has_unpersisted_data_.store(true, std::memory_order_relaxed);
318
+ }
319
+
320
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
321
+
322
+ if (!two_write_queues_) {
323
+ if (status.ok() && !write_options.disableWAL) {
324
+ PERF_TIMER_GUARD(write_wal_time);
325
+ io_s = WriteToWAL(write_group, log_writer, log_used, need_log_sync,
326
+ need_log_dir_sync, last_sequence + 1);
327
+ }
328
+ } else {
329
+ if (status.ok() && !write_options.disableWAL) {
330
+ PERF_TIMER_GUARD(write_wal_time);
331
+ // LastAllocatedSequence is increased inside WriteToWAL under
332
+ // wal_write_mutex_ to ensure ordered events in WAL
333
+ io_s = ConcurrentWriteToWAL(write_group, log_used, &last_sequence,
334
+ seq_inc);
335
+ } else {
336
+ // Otherwise we inc seq number for memtable writes
337
+ last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
338
+ }
339
+ }
340
+ status = io_s;
341
+ assert(last_sequence != kMaxSequenceNumber);
342
+ const SequenceNumber current_sequence = last_sequence + 1;
343
+ last_sequence += seq_inc;
344
+
345
+ // PreReleaseCallback is called after WAL write and before memtable write
346
+ if (status.ok()) {
347
+ SequenceNumber next_sequence = current_sequence;
348
+ size_t index = 0;
349
+ // Note: the logic for advancing seq here must be consistent with the
350
+ // logic in WriteBatchInternal::InsertInto(write_group...) as well as
351
+ // with WriteBatchInternal::InsertInto(write_batch...) that is called on
352
+ // the merged batch during recovery from the WAL.
353
+ for (auto* writer : write_group) {
354
+ if (writer->CallbackFailed()) {
355
+ continue;
356
+ }
357
+ writer->sequence = next_sequence;
358
+ if (writer->pre_release_callback) {
359
+ Status ws = writer->pre_release_callback->Callback(
360
+ writer->sequence, disable_memtable, writer->log_used, index++,
361
+ pre_release_callback_cnt);
362
+ if (!ws.ok()) {
363
+ status = ws;
364
+ break;
365
+ }
366
+ }
367
+ if (seq_per_batch_) {
368
+ assert(writer->batch_cnt);
369
+ next_sequence += writer->batch_cnt;
370
+ } else if (writer->ShouldWriteToMemtable()) {
371
+ next_sequence += WriteBatchInternal::Count(writer->batch);
372
+ }
373
+ }
374
+ }
375
+
376
+ if (status.ok()) {
377
+ PERF_TIMER_GUARD(write_memtable_time);
378
+
379
+ if (!parallel) {
380
+ // w.sequence will be set inside InsertInto
381
+ w.status = WriteBatchInternal::InsertInto(
382
+ write_group, current_sequence, column_family_memtables_.get(),
383
+ &flush_scheduler_, &trim_history_scheduler_,
384
+ write_options.ignore_missing_column_families,
385
+ 0 /*recovery_log_number*/, this, parallel, seq_per_batch_,
386
+ batch_per_txn_);
387
+ } else {
388
+ write_group.last_sequence = last_sequence;
389
+ write_thread_.LaunchParallelMemTableWriters(&write_group);
390
+ in_parallel_group = true;
391
+
392
+ // Each parallel follower is doing each own writes. The leader should
393
+ // also do its own.
394
+ if (w.ShouldWriteToMemtable()) {
395
+ ColumnFamilyMemTablesImpl column_family_memtables(
396
+ versions_->GetColumnFamilySet());
397
+ assert(w.sequence == current_sequence);
398
+ w.status = WriteBatchInternal::InsertInto(
399
+ &w, w.sequence, &column_family_memtables, &flush_scheduler_,
400
+ &trim_history_scheduler_,
401
+ write_options.ignore_missing_column_families, 0 /*log_number*/,
402
+ this, true /*concurrent_memtable_writes*/, seq_per_batch_,
403
+ w.batch_cnt, batch_per_txn_,
404
+ write_options.memtable_insert_hint_per_batch);
405
+ }
406
+ }
407
+ if (seq_used != nullptr) {
408
+ *seq_used = w.sequence;
409
+ }
410
+ }
411
+ }
412
+ PERF_TIMER_START(write_pre_and_post_process_time);
413
+
414
+ if (!w.CallbackFailed()) {
415
+ if (!io_s.ok()) {
416
+ IOStatusCheck(io_s);
417
+ } else {
418
+ WriteStatusCheck(status);
419
+ }
420
+ }
421
+
422
+ if (need_log_sync) {
423
+ mutex_.Lock();
424
+ if (status.ok()) {
425
+ status = MarkLogsSynced(logfile_number_, need_log_dir_sync);
426
+ } else {
427
+ MarkLogsNotSynced(logfile_number_);
428
+ }
429
+ mutex_.Unlock();
430
+ // Requesting sync with two_write_queues_ is expected to be very rare. We
431
+ // hence provide a simple implementation that is not necessarily efficient.
432
+ if (two_write_queues_) {
433
+ if (manual_wal_flush_) {
434
+ status = FlushWAL(true);
435
+ } else {
436
+ status = SyncWAL();
437
+ }
438
+ }
439
+ }
440
+
441
+ bool should_exit_batch_group = true;
442
+ if (in_parallel_group) {
443
+ // CompleteParallelWorker returns true if this thread should
444
+ // handle exit, false means somebody else did
445
+ should_exit_batch_group = write_thread_.CompleteParallelMemTableWriter(&w);
446
+ }
447
+ if (should_exit_batch_group) {
448
+ if (status.ok()) {
449
+ // Note: if we are to resume after non-OK statuses we need to revisit how
450
+ // we reacts to non-OK statuses here.
451
+ versions_->SetLastSequence(last_sequence);
452
+ }
453
+ MemTableInsertStatusCheck(w.status);
454
+ write_thread_.ExitAsBatchGroupLeader(write_group, status);
455
+ }
456
+
457
+ if (status.ok()) {
458
+ status = w.FinalStatus();
459
+ }
460
+ return status;
461
+ }
462
+
463
+ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
464
+ WriteBatch* my_batch, WriteCallback* callback,
465
+ uint64_t* log_used, uint64_t log_ref,
466
+ bool disable_memtable, uint64_t* seq_used) {
467
+ PERF_TIMER_GUARD(write_pre_and_post_process_time);
468
+ StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
469
+
470
+ WriteContext write_context;
471
+
472
+ WriteThread::Writer w(write_options, my_batch, callback, log_ref,
473
+ disable_memtable);
474
+ write_thread_.JoinBatchGroup(&w);
475
+ TEST_SYNC_POINT("DBImplWrite::PipelinedWriteImpl:AfterJoinBatchGroup");
476
+ if (w.state == WriteThread::STATE_GROUP_LEADER) {
477
+ WriteThread::WriteGroup wal_write_group;
478
+ if (w.callback && !w.callback->AllowWriteBatching()) {
479
+ write_thread_.WaitForMemTableWriters();
480
+ }
481
+ mutex_.Lock();
482
+ bool need_log_sync = !write_options.disableWAL && write_options.sync;
483
+ bool need_log_dir_sync = need_log_sync && !log_dir_synced_;
484
+ // PreprocessWrite does its own perf timing.
485
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
486
+ w.status = PreprocessWrite(write_options, &need_log_sync, &write_context);
487
+ PERF_TIMER_START(write_pre_and_post_process_time);
488
+ log::Writer* log_writer = logs_.back().writer;
489
+ mutex_.Unlock();
490
+
491
+ // This can set non-OK status if callback fail.
492
+ last_batch_group_size_ =
493
+ write_thread_.EnterAsBatchGroupLeader(&w, &wal_write_group);
494
+ const SequenceNumber current_sequence =
495
+ write_thread_.UpdateLastSequence(versions_->LastSequence()) + 1;
496
+ size_t total_count = 0;
497
+ size_t total_byte_size = 0;
498
+
499
+ if (w.status.ok()) {
500
+ SequenceNumber next_sequence = current_sequence;
501
+ for (auto writer : wal_write_group) {
502
+ if (writer->CheckCallback(this)) {
503
+ if (writer->ShouldWriteToMemtable()) {
504
+ writer->sequence = next_sequence;
505
+ size_t count = WriteBatchInternal::Count(writer->batch);
506
+ next_sequence += count;
507
+ total_count += count;
508
+ }
509
+ total_byte_size = WriteBatchInternal::AppendedByteSize(
510
+ total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
511
+ }
512
+ }
513
+ if (w.disable_wal) {
514
+ has_unpersisted_data_.store(true, std::memory_order_relaxed);
515
+ }
516
+ write_thread_.UpdateLastSequence(current_sequence + total_count - 1);
517
+ }
518
+
519
+ auto stats = default_cf_internal_stats_;
520
+ stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count);
521
+ RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
522
+ stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size);
523
+ RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
524
+ RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
525
+
526
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
527
+
528
+ IOStatus io_s;
529
+ io_s.PermitUncheckedError(); // Allow io_s to be uninitialized
530
+
531
+ if (w.status.ok() && !write_options.disableWAL) {
532
+ PERF_TIMER_GUARD(write_wal_time);
533
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1);
534
+ RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
535
+ if (wal_write_group.size > 1) {
536
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
537
+ wal_write_group.size - 1);
538
+ RecordTick(stats_, WRITE_DONE_BY_OTHER, wal_write_group.size - 1);
539
+ }
540
+ io_s = WriteToWAL(wal_write_group, log_writer, log_used, need_log_sync,
541
+ need_log_dir_sync, current_sequence);
542
+ w.status = io_s;
543
+ }
544
+
545
+ if (!w.CallbackFailed()) {
546
+ if (!io_s.ok()) {
547
+ IOStatusCheck(io_s);
548
+ } else {
549
+ WriteStatusCheck(w.status);
550
+ }
551
+ }
552
+
553
+ if (need_log_sync) {
554
+ mutex_.Lock();
555
+ if (w.status.ok()) {
556
+ w.status = MarkLogsSynced(logfile_number_, need_log_dir_sync);
557
+ } else {
558
+ MarkLogsNotSynced(logfile_number_);
559
+ }
560
+ mutex_.Unlock();
561
+ }
562
+
563
+ write_thread_.ExitAsBatchGroupLeader(wal_write_group, w.status);
564
+ }
565
+
566
+ // NOTE: the memtable_write_group is declared before the following
567
+ // `if` statement because its lifetime needs to be longer
568
+ // that the inner context of the `if` as a reference to it
569
+ // may be used further below within the outer _write_thread
570
+ WriteThread::WriteGroup memtable_write_group;
571
+
572
+ if (w.state == WriteThread::STATE_MEMTABLE_WRITER_LEADER) {
573
+ PERF_TIMER_GUARD(write_memtable_time);
574
+ assert(w.ShouldWriteToMemtable());
575
+ write_thread_.EnterAsMemTableWriter(&w, &memtable_write_group);
576
+ if (memtable_write_group.size > 1 &&
577
+ immutable_db_options_.allow_concurrent_memtable_write) {
578
+ write_thread_.LaunchParallelMemTableWriters(&memtable_write_group);
579
+ } else {
580
+ memtable_write_group.status = WriteBatchInternal::InsertInto(
581
+ memtable_write_group, w.sequence, column_family_memtables_.get(),
582
+ &flush_scheduler_, &trim_history_scheduler_,
583
+ write_options.ignore_missing_column_families, 0 /*log_number*/, this,
584
+ false /*concurrent_memtable_writes*/, seq_per_batch_, batch_per_txn_);
585
+ versions_->SetLastSequence(memtable_write_group.last_sequence);
586
+ write_thread_.ExitAsMemTableWriter(&w, memtable_write_group);
587
+ }
588
+ } else {
589
+ // NOTE: the memtable_write_group is never really used,
590
+ // so we need to set its status to pass ASSERT_STATUS_CHECKED
591
+ memtable_write_group.status.PermitUncheckedError();
592
+ }
593
+
594
+ if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
595
+ assert(w.ShouldWriteToMemtable());
596
+ ColumnFamilyMemTablesImpl column_family_memtables(
597
+ versions_->GetColumnFamilySet());
598
+ w.status = WriteBatchInternal::InsertInto(
599
+ &w, w.sequence, &column_family_memtables, &flush_scheduler_,
600
+ &trim_history_scheduler_, write_options.ignore_missing_column_families,
601
+ 0 /*log_number*/, this, true /*concurrent_memtable_writes*/,
602
+ false /*seq_per_batch*/, 0 /*batch_cnt*/, true /*batch_per_txn*/,
603
+ write_options.memtable_insert_hint_per_batch);
604
+ if (write_thread_.CompleteParallelMemTableWriter(&w)) {
605
+ MemTableInsertStatusCheck(w.status);
606
+ versions_->SetLastSequence(w.write_group->last_sequence);
607
+ write_thread_.ExitAsMemTableWriter(&w, *w.write_group);
608
+ }
609
+ }
610
+ if (seq_used != nullptr) {
611
+ *seq_used = w.sequence;
612
+ }
613
+
614
+ assert(w.state == WriteThread::STATE_COMPLETED);
615
+ return w.FinalStatus();
616
+ }
617
+
618
+ Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
619
+ WriteBatch* my_batch,
620
+ WriteCallback* callback, uint64_t log_ref,
621
+ SequenceNumber seq,
622
+ const size_t sub_batch_cnt) {
623
+ PERF_TIMER_GUARD(write_pre_and_post_process_time);
624
+ StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
625
+
626
+ WriteThread::Writer w(write_options, my_batch, callback, log_ref,
627
+ false /*disable_memtable*/);
628
+
629
+ if (w.CheckCallback(this) && w.ShouldWriteToMemtable()) {
630
+ w.sequence = seq;
631
+ size_t total_count = WriteBatchInternal::Count(my_batch);
632
+ InternalStats* stats = default_cf_internal_stats_;
633
+ stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count);
634
+ RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
635
+
636
+ ColumnFamilyMemTablesImpl column_family_memtables(
637
+ versions_->GetColumnFamilySet());
638
+ w.status = WriteBatchInternal::InsertInto(
639
+ &w, w.sequence, &column_family_memtables, &flush_scheduler_,
640
+ &trim_history_scheduler_, write_options.ignore_missing_column_families,
641
+ 0 /*log_number*/, this, true /*concurrent_memtable_writes*/,
642
+ seq_per_batch_, sub_batch_cnt, true /*batch_per_txn*/,
643
+ write_options.memtable_insert_hint_per_batch);
644
+ if (write_options.disableWAL) {
645
+ has_unpersisted_data_.store(true, std::memory_order_relaxed);
646
+ }
647
+ }
648
+
649
+ size_t pending_cnt = pending_memtable_writes_.fetch_sub(1) - 1;
650
+ if (pending_cnt == 0) {
651
+ // switch_cv_ waits until pending_memtable_writes_ = 0. Locking its mutex
652
+ // before notify ensures that cv is in waiting state when it is notified
653
+ // thus not missing the update to pending_memtable_writes_ even though it is
654
+ // not modified under the mutex.
655
+ std::lock_guard<std::mutex> lck(switch_mutex_);
656
+ switch_cv_.notify_all();
657
+ }
658
+ WriteStatusCheck(w.status);
659
+
660
+ if (!w.FinalStatus().ok()) {
661
+ return w.FinalStatus();
662
+ }
663
+ return Status::OK();
664
+ }
665
+
666
+ // The 2nd write queue. If enabled it will be used only for WAL-only writes.
667
+ // This is the only queue that updates LastPublishedSequence which is only
668
+ // applicable in a two-queue setting.
669
+ Status DBImpl::WriteImplWALOnly(
670
+ WriteThread* write_thread, const WriteOptions& write_options,
671
+ WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used,
672
+ const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
673
+ PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
674
+ const PublishLastSeq publish_last_seq, const bool disable_memtable) {
675
+ PERF_TIMER_GUARD(write_pre_and_post_process_time);
676
+ WriteThread::Writer w(write_options, my_batch, callback, log_ref,
677
+ disable_memtable, sub_batch_cnt, pre_release_callback);
678
+ RecordTick(stats_, WRITE_WITH_WAL);
679
+ StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
680
+
681
+ write_thread->JoinBatchGroup(&w);
682
+ assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER);
683
+ if (w.state == WriteThread::STATE_COMPLETED) {
684
+ if (log_used != nullptr) {
685
+ *log_used = w.log_used;
686
+ }
687
+ if (seq_used != nullptr) {
688
+ *seq_used = w.sequence;
689
+ }
690
+ return w.FinalStatus();
691
+ }
692
+ // else we are the leader of the write batch group
693
+ assert(w.state == WriteThread::STATE_GROUP_LEADER);
694
+
695
+ if (publish_last_seq == kDoPublishLastSeq) {
696
+ Status status;
697
+
698
+ // Currently we only use kDoPublishLastSeq in unordered_write
699
+ assert(immutable_db_options_.unordered_write);
700
+ WriteContext write_context;
701
+ if (error_handler_.IsDBStopped()) {
702
+ status = error_handler_.GetBGError();
703
+ }
704
+ // TODO(myabandeh): Make preliminary checks thread-safe so we could do them
705
+ // without paying the cost of obtaining the mutex.
706
+ if (status.ok()) {
707
+ InstrumentedMutexLock l(&mutex_);
708
+ bool need_log_sync = false;
709
+ status = PreprocessWrite(write_options, &need_log_sync, &write_context);
710
+ WriteStatusCheckOnLocked(status);
711
+ }
712
+ if (!status.ok()) {
713
+ WriteThread::WriteGroup write_group;
714
+ write_thread->EnterAsBatchGroupLeader(&w, &write_group);
715
+ write_thread->ExitAsBatchGroupLeader(write_group, status);
716
+ return status;
717
+ }
718
+ }
719
+
720
+ WriteThread::WriteGroup write_group;
721
+ uint64_t last_sequence;
722
+ write_thread->EnterAsBatchGroupLeader(&w, &write_group);
723
+ // Note: no need to update last_batch_group_size_ here since the batch writes
724
+ // to WAL only
725
+
726
+ size_t pre_release_callback_cnt = 0;
727
+ size_t total_byte_size = 0;
728
+ for (auto* writer : write_group) {
729
+ if (writer->CheckCallback(this)) {
730
+ total_byte_size = WriteBatchInternal::AppendedByteSize(
731
+ total_byte_size, WriteBatchInternal::ByteSize(writer->batch));
732
+ if (writer->pre_release_callback) {
733
+ pre_release_callback_cnt++;
734
+ }
735
+ }
736
+ }
737
+
738
+ const bool concurrent_update = true;
739
+ // Update stats while we are an exclusive group leader, so we know
740
+ // that nobody else can be writing to these particular stats.
741
+ // We're optimistic, updating the stats before we successfully
742
+ // commit. That lets us release our leader status early.
743
+ auto stats = default_cf_internal_stats_;
744
+ stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
745
+ concurrent_update);
746
+ RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
747
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
748
+ concurrent_update);
749
+ RecordTick(stats_, WRITE_DONE_BY_SELF);
750
+ auto write_done_by_other = write_group.size - 1;
751
+ if (write_done_by_other > 0) {
752
+ stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
753
+ write_done_by_other, concurrent_update);
754
+ RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
755
+ }
756
+ RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
757
+
758
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
759
+
760
+ PERF_TIMER_GUARD(write_wal_time);
761
+ // LastAllocatedSequence is increased inside WriteToWAL under
762
+ // wal_write_mutex_ to ensure ordered events in WAL
763
+ size_t seq_inc = 0 /* total_count */;
764
+ if (assign_order == kDoAssignOrder) {
765
+ size_t total_batch_cnt = 0;
766
+ for (auto* writer : write_group) {
767
+ assert(writer->batch_cnt || !seq_per_batch_);
768
+ if (!writer->CallbackFailed()) {
769
+ total_batch_cnt += writer->batch_cnt;
770
+ }
771
+ }
772
+ seq_inc = total_batch_cnt;
773
+ }
774
+ Status status;
775
+ IOStatus io_s;
776
+ io_s.PermitUncheckedError(); // Allow io_s to be uninitialized
777
+ if (!write_options.disableWAL) {
778
+ io_s = ConcurrentWriteToWAL(write_group, log_used, &last_sequence, seq_inc);
779
+ status = io_s;
780
+ } else {
781
+ // Otherwise we inc seq number to do solely the seq allocation
782
+ last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
783
+ }
784
+
785
+ size_t memtable_write_cnt = 0;
786
+ auto curr_seq = last_sequence + 1;
787
+ for (auto* writer : write_group) {
788
+ if (writer->CallbackFailed()) {
789
+ continue;
790
+ }
791
+ writer->sequence = curr_seq;
792
+ if (assign_order == kDoAssignOrder) {
793
+ assert(writer->batch_cnt || !seq_per_batch_);
794
+ curr_seq += writer->batch_cnt;
795
+ }
796
+ if (!writer->disable_memtable) {
797
+ memtable_write_cnt++;
798
+ }
799
+ // else seq advances only by memtable writes
800
+ }
801
+ if (status.ok() && write_options.sync) {
802
+ assert(!write_options.disableWAL);
803
+ // Requesting sync with two_write_queues_ is expected to be very rare. We
804
+ // hance provide a simple implementation that is not necessarily efficient.
805
+ if (manual_wal_flush_) {
806
+ status = FlushWAL(true);
807
+ } else {
808
+ status = SyncWAL();
809
+ }
810
+ }
811
+ PERF_TIMER_START(write_pre_and_post_process_time);
812
+
813
+ if (!w.CallbackFailed()) {
814
+ if (!io_s.ok()) {
815
+ IOStatusCheck(io_s);
816
+ } else {
817
+ WriteStatusCheck(status);
818
+ }
819
+ }
820
+ if (status.ok()) {
821
+ size_t index = 0;
822
+ for (auto* writer : write_group) {
823
+ if (!writer->CallbackFailed() && writer->pre_release_callback) {
824
+ assert(writer->sequence != kMaxSequenceNumber);
825
+ Status ws = writer->pre_release_callback->Callback(
826
+ writer->sequence, disable_memtable, writer->log_used, index++,
827
+ pre_release_callback_cnt);
828
+ if (!ws.ok()) {
829
+ status = ws;
830
+ break;
831
+ }
832
+ }
833
+ }
834
+ }
835
+ if (publish_last_seq == kDoPublishLastSeq) {
836
+ versions_->SetLastSequence(last_sequence + seq_inc);
837
+ // Currently we only use kDoPublishLastSeq in unordered_write
838
+ assert(immutable_db_options_.unordered_write);
839
+ }
840
+ if (immutable_db_options_.unordered_write && status.ok()) {
841
+ pending_memtable_writes_ += memtable_write_cnt;
842
+ }
843
+ write_thread->ExitAsBatchGroupLeader(write_group, status);
844
+ if (status.ok()) {
845
+ status = w.FinalStatus();
846
+ }
847
+ if (seq_used != nullptr) {
848
+ *seq_used = w.sequence;
849
+ }
850
+ return status;
851
+ }
852
+
853
+ void DBImpl::WriteStatusCheckOnLocked(const Status& status) {
854
+ // Is setting bg_error_ enough here? This will at least stop
855
+ // compaction and fail any further writes.
856
+ // Caller must hold mutex_.
857
+ assert(!status.IsIOFenced() || !error_handler_.GetBGError().ok());
858
+ mutex_.AssertHeld();
859
+ if (immutable_db_options_.paranoid_checks && !status.ok() &&
860
+ !status.IsBusy() && !status.IsIncomplete()) {
861
+ // Maybe change the return status to void?
862
+ error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
863
+ }
864
+ }
865
+
866
+ void DBImpl::WriteStatusCheck(const Status& status) {
867
+ // Is setting bg_error_ enough here? This will at least stop
868
+ // compaction and fail any further writes.
869
+ assert(!status.IsIOFenced() || !error_handler_.GetBGError().ok());
870
+ if (immutable_db_options_.paranoid_checks && !status.ok() &&
871
+ !status.IsBusy() && !status.IsIncomplete()) {
872
+ mutex_.Lock();
873
+ // Maybe change the return status to void?
874
+ error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback);
875
+ mutex_.Unlock();
876
+ }
877
+ }
878
+
879
+ void DBImpl::IOStatusCheck(const IOStatus& io_status) {
880
+ // Is setting bg_error_ enough here? This will at least stop
881
+ // compaction and fail any further writes.
882
+ if ((immutable_db_options_.paranoid_checks && !io_status.ok() &&
883
+ !io_status.IsBusy() && !io_status.IsIncomplete()) ||
884
+ io_status.IsIOFenced()) {
885
+ mutex_.Lock();
886
+ // Maybe change the return status to void?
887
+ error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback);
888
+ mutex_.Unlock();
889
+ }
890
+ }
891
+
892
+ void DBImpl::MemTableInsertStatusCheck(const Status& status) {
893
+ // A non-OK status here indicates that the state implied by the
894
+ // WAL has diverged from the in-memory state. This could be
895
+ // because of a corrupt write_batch (very bad), or because the
896
+ // client specified an invalid column family and didn't specify
897
+ // ignore_missing_column_families.
898
+ if (!status.ok()) {
899
+ mutex_.Lock();
900
+ assert(!error_handler_.IsBGWorkStopped());
901
+ // Maybe change the return status to void?
902
+ error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
903
+ .PermitUncheckedError();
904
+ mutex_.Unlock();
905
+ }
906
+ }
907
+
908
+ Status DBImpl::PreprocessWrite(const WriteOptions& write_options,
909
+ bool* need_log_sync,
910
+ WriteContext* write_context) {
911
+ mutex_.AssertHeld();
912
+ assert(write_context != nullptr && need_log_sync != nullptr);
913
+ Status status;
914
+
915
+ if (error_handler_.IsDBStopped()) {
916
+ status = error_handler_.GetBGError();
917
+ }
918
+
919
+ PERF_TIMER_GUARD(write_scheduling_flushes_compactions_time);
920
+
921
+ assert(!single_column_family_mode_ ||
922
+ versions_->GetColumnFamilySet()->NumberOfColumnFamilies() == 1);
923
+ if (UNLIKELY(status.ok() && !single_column_family_mode_ &&
924
+ total_log_size_ > GetMaxTotalWalSize())) {
925
+ WaitForPendingWrites();
926
+ status = SwitchWAL(write_context);
927
+ }
928
+
929
+ if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldFlush())) {
930
+ // Before a new memtable is added in SwitchMemtable(),
931
+ // write_buffer_manager_->ShouldFlush() will keep returning true. If another
932
+ // thread is writing to another DB with the same write buffer, they may also
933
+ // be flushed. We may end up with flushing much more DBs than needed. It's
934
+ // suboptimal but still correct.
935
+ WaitForPendingWrites();
936
+ status = HandleWriteBufferFull(write_context);
937
+ }
938
+
939
+ if (UNLIKELY(status.ok() && !trim_history_scheduler_.Empty())) {
940
+ status = TrimMemtableHistory(write_context);
941
+ }
942
+
943
+ if (UNLIKELY(status.ok() && !flush_scheduler_.Empty())) {
944
+ WaitForPendingWrites();
945
+ status = ScheduleFlushes(write_context);
946
+ }
947
+
948
+ PERF_TIMER_STOP(write_scheduling_flushes_compactions_time);
949
+ PERF_TIMER_GUARD(write_pre_and_post_process_time);
950
+
951
+ if (UNLIKELY(status.ok() && (write_controller_.IsStopped() ||
952
+ write_controller_.NeedsDelay()))) {
953
+ PERF_TIMER_STOP(write_pre_and_post_process_time);
954
+ PERF_TIMER_GUARD(write_delay_time);
955
+ // We don't know size of curent batch so that we always use the size
956
+ // for previous one. It might create a fairness issue that expiration
957
+ // might happen for smaller writes but larger writes can go through.
958
+ // Can optimize it if it is an issue.
959
+ status = DelayWrite(last_batch_group_size_, write_options);
960
+ PERF_TIMER_START(write_pre_and_post_process_time);
961
+ }
962
+
963
+ if (status.ok() && *need_log_sync) {
964
+ // Wait until the parallel syncs are finished. Any sync process has to sync
965
+ // the front log too so it is enough to check the status of front()
966
+ // We do a while loop since log_sync_cv_ is signalled when any sync is
967
+ // finished
968
+ // Note: there does not seem to be a reason to wait for parallel sync at
969
+ // this early step but it is not important since parallel sync (SyncWAL) and
970
+ // need_log_sync are usually not used together.
971
+ while (logs_.front().getting_synced) {
972
+ log_sync_cv_.Wait();
973
+ }
974
+ for (auto& log : logs_) {
975
+ assert(!log.getting_synced);
976
+ // This is just to prevent the logs to be synced by a parallel SyncWAL
977
+ // call. We will do the actual syncing later after we will write to the
978
+ // WAL.
979
+ // Note: there does not seem to be a reason to set this early before we
980
+ // actually write to the WAL
981
+ log.getting_synced = true;
982
+ }
983
+ } else {
984
+ *need_log_sync = false;
985
+ }
986
+
987
+ return status;
988
+ }
989
+
990
+ WriteBatch* DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group,
991
+ WriteBatch* tmp_batch, size_t* write_with_wal,
992
+ WriteBatch** to_be_cached_state) {
993
+ assert(write_with_wal != nullptr);
994
+ assert(tmp_batch != nullptr);
995
+ assert(*to_be_cached_state == nullptr);
996
+ WriteBatch* merged_batch = nullptr;
997
+ *write_with_wal = 0;
998
+ auto* leader = write_group.leader;
999
+ assert(!leader->disable_wal); // Same holds for all in the batch group
1000
+ if (write_group.size == 1 && !leader->CallbackFailed() &&
1001
+ leader->batch->GetWalTerminationPoint().is_cleared()) {
1002
+ // we simply write the first WriteBatch to WAL if the group only
1003
+ // contains one batch, that batch should be written to the WAL,
1004
+ // and the batch is not wanting to be truncated
1005
+ merged_batch = leader->batch;
1006
+ if (WriteBatchInternal::IsLatestPersistentState(merged_batch)) {
1007
+ *to_be_cached_state = merged_batch;
1008
+ }
1009
+ *write_with_wal = 1;
1010
+ } else {
1011
+ // WAL needs all of the batches flattened into a single batch.
1012
+ // We could avoid copying here with an iov-like AddRecord
1013
+ // interface
1014
+ merged_batch = tmp_batch;
1015
+ for (auto writer : write_group) {
1016
+ if (!writer->CallbackFailed()) {
1017
+ Status s = WriteBatchInternal::Append(merged_batch, writer->batch,
1018
+ /*WAL_only*/ true);
1019
+ // Always returns Status::OK.
1020
+ assert(s.ok());
1021
+ if (WriteBatchInternal::IsLatestPersistentState(writer->batch)) {
1022
+ // We only need to cache the last of such write batch
1023
+ *to_be_cached_state = writer->batch;
1024
+ }
1025
+ (*write_with_wal)++;
1026
+ }
1027
+ }
1028
+ }
1029
+ return merged_batch;
1030
+ }
1031
+
1032
+ // When two_write_queues_ is disabled, this function is called from the only
1033
+ // write thread. Otherwise this must be called holding log_write_mutex_.
1034
+ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
1035
+ log::Writer* log_writer, uint64_t* log_used,
1036
+ uint64_t* log_size) {
1037
+ assert(log_size != nullptr);
1038
+ Slice log_entry = WriteBatchInternal::Contents(&merged_batch);
1039
+ *log_size = log_entry.size();
1040
+ // When two_write_queues_ WriteToWAL has to be protected from concurretn calls
1041
+ // from the two queues anyway and log_write_mutex_ is already held. Otherwise
1042
+ // if manual_wal_flush_ is enabled we need to protect log_writer->AddRecord
1043
+ // from possible concurrent calls via the FlushWAL by the application.
1044
+ const bool needs_locking = manual_wal_flush_ && !two_write_queues_;
1045
+ // Due to performance cocerns of missed branch prediction penalize the new
1046
+ // manual_wal_flush_ feature (by UNLIKELY) instead of the more common case
1047
+ // when we do not need any locking.
1048
+ if (UNLIKELY(needs_locking)) {
1049
+ log_write_mutex_.Lock();
1050
+ }
1051
+ IOStatus io_s = log_writer->AddRecord(log_entry);
1052
+
1053
+ if (UNLIKELY(needs_locking)) {
1054
+ log_write_mutex_.Unlock();
1055
+ }
1056
+ if (log_used != nullptr) {
1057
+ *log_used = logfile_number_;
1058
+ }
1059
+ total_log_size_ += log_entry.size();
1060
+ // TODO(myabandeh): it might be unsafe to access alive_log_files_.back() here
1061
+ // since alive_log_files_ might be modified concurrently
1062
+ alive_log_files_.back().AddSize(log_entry.size());
1063
+ log_empty_ = false;
1064
+ return io_s;
1065
+ }
1066
+
1067
+ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
1068
+ log::Writer* log_writer, uint64_t* log_used,
1069
+ bool need_log_sync, bool need_log_dir_sync,
1070
+ SequenceNumber sequence) {
1071
+ IOStatus io_s;
1072
+ assert(!write_group.leader->disable_wal);
1073
+ // Same holds for all in the batch group
1074
+ size_t write_with_wal = 0;
1075
+ WriteBatch* to_be_cached_state = nullptr;
1076
+ WriteBatch* merged_batch = MergeBatch(write_group, &tmp_batch_,
1077
+ &write_with_wal, &to_be_cached_state);
1078
+ if (merged_batch == write_group.leader->batch) {
1079
+ write_group.leader->log_used = logfile_number_;
1080
+ } else if (write_with_wal > 1) {
1081
+ for (auto writer : write_group) {
1082
+ writer->log_used = logfile_number_;
1083
+ }
1084
+ }
1085
+
1086
+ WriteBatchInternal::SetSequence(merged_batch, sequence);
1087
+
1088
+ uint64_t log_size;
1089
+ io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size);
1090
+ if (to_be_cached_state) {
1091
+ cached_recoverable_state_ = *to_be_cached_state;
1092
+ cached_recoverable_state_empty_ = false;
1093
+ }
1094
+
1095
+ if (io_s.ok() && need_log_sync) {
1096
+ StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS);
1097
+ // It's safe to access logs_ with unlocked mutex_ here because:
1098
+ // - we've set getting_synced=true for all logs,
1099
+ // so other threads won't pop from logs_ while we're here,
1100
+ // - only writer thread can push to logs_, and we're in
1101
+ // writer thread, so no one will push to logs_,
1102
+ // - as long as other threads don't modify it, it's safe to read
1103
+ // from std::deque from multiple threads concurrently.
1104
+ for (auto& log : logs_) {
1105
+ io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
1106
+ if (!io_s.ok()) {
1107
+ break;
1108
+ }
1109
+ }
1110
+
1111
+ if (io_s.ok() && need_log_dir_sync) {
1112
+ // We only sync WAL directory the first time WAL syncing is
1113
+ // requested, so that in case users never turn on WAL sync,
1114
+ // we can avoid the disk I/O in the write code path.
1115
+ io_s = directories_.GetWalDir()->Fsync(IOOptions(), nullptr);
1116
+ }
1117
+ }
1118
+
1119
+ if (merged_batch == &tmp_batch_) {
1120
+ tmp_batch_.Clear();
1121
+ }
1122
+ if (io_s.ok()) {
1123
+ auto stats = default_cf_internal_stats_;
1124
+ if (need_log_sync) {
1125
+ stats->AddDBStats(InternalStats::kIntStatsWalFileSynced, 1);
1126
+ RecordTick(stats_, WAL_FILE_SYNCED);
1127
+ }
1128
+ stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size);
1129
+ RecordTick(stats_, WAL_FILE_BYTES, log_size);
1130
+ stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal);
1131
+ RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
1132
+ }
1133
+ return io_s;
1134
+ }
1135
+
1136
+ IOStatus DBImpl::ConcurrentWriteToWAL(
1137
+ const WriteThread::WriteGroup& write_group, uint64_t* log_used,
1138
+ SequenceNumber* last_sequence, size_t seq_inc) {
1139
+ IOStatus io_s;
1140
+
1141
+ assert(!write_group.leader->disable_wal);
1142
+ // Same holds for all in the batch group
1143
+ WriteBatch tmp_batch;
1144
+ size_t write_with_wal = 0;
1145
+ WriteBatch* to_be_cached_state = nullptr;
1146
+ WriteBatch* merged_batch =
1147
+ MergeBatch(write_group, &tmp_batch, &write_with_wal, &to_be_cached_state);
1148
+
1149
+ // We need to lock log_write_mutex_ since logs_ and alive_log_files might be
1150
+ // pushed back concurrently
1151
+ log_write_mutex_.Lock();
1152
+ if (merged_batch == write_group.leader->batch) {
1153
+ write_group.leader->log_used = logfile_number_;
1154
+ } else if (write_with_wal > 1) {
1155
+ for (auto writer : write_group) {
1156
+ writer->log_used = logfile_number_;
1157
+ }
1158
+ }
1159
+ *last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc);
1160
+ auto sequence = *last_sequence + 1;
1161
+ WriteBatchInternal::SetSequence(merged_batch, sequence);
1162
+
1163
+ log::Writer* log_writer = logs_.back().writer;
1164
+ uint64_t log_size;
1165
+ io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size);
1166
+ if (to_be_cached_state) {
1167
+ cached_recoverable_state_ = *to_be_cached_state;
1168
+ cached_recoverable_state_empty_ = false;
1169
+ }
1170
+ log_write_mutex_.Unlock();
1171
+
1172
+ if (io_s.ok()) {
1173
+ const bool concurrent = true;
1174
+ auto stats = default_cf_internal_stats_;
1175
+ stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size,
1176
+ concurrent);
1177
+ RecordTick(stats_, WAL_FILE_BYTES, log_size);
1178
+ stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal,
1179
+ concurrent);
1180
+ RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
1181
+ }
1182
+ return io_s;
1183
+ }
1184
+
1185
+ Status DBImpl::WriteRecoverableState() {
1186
+ mutex_.AssertHeld();
1187
+ if (!cached_recoverable_state_empty_) {
1188
+ bool dont_care_bool;
1189
+ SequenceNumber next_seq;
1190
+ if (two_write_queues_) {
1191
+ log_write_mutex_.Lock();
1192
+ }
1193
+ SequenceNumber seq;
1194
+ if (two_write_queues_) {
1195
+ seq = versions_->FetchAddLastAllocatedSequence(0);
1196
+ } else {
1197
+ seq = versions_->LastSequence();
1198
+ }
1199
+ WriteBatchInternal::SetSequence(&cached_recoverable_state_, seq + 1);
1200
+ auto status = WriteBatchInternal::InsertInto(
1201
+ &cached_recoverable_state_, column_family_memtables_.get(),
1202
+ &flush_scheduler_, &trim_history_scheduler_, true,
1203
+ 0 /*recovery_log_number*/, this, false /* concurrent_memtable_writes */,
1204
+ &next_seq, &dont_care_bool, seq_per_batch_);
1205
+ auto last_seq = next_seq - 1;
1206
+ if (two_write_queues_) {
1207
+ versions_->FetchAddLastAllocatedSequence(last_seq - seq);
1208
+ versions_->SetLastPublishedSequence(last_seq);
1209
+ }
1210
+ versions_->SetLastSequence(last_seq);
1211
+ if (two_write_queues_) {
1212
+ log_write_mutex_.Unlock();
1213
+ }
1214
+ if (status.ok() && recoverable_state_pre_release_callback_) {
1215
+ const bool DISABLE_MEMTABLE = true;
1216
+ for (uint64_t sub_batch_seq = seq + 1;
1217
+ sub_batch_seq < next_seq && status.ok(); sub_batch_seq++) {
1218
+ uint64_t const no_log_num = 0;
1219
+ // Unlock it since the callback might end up locking mutex. e.g.,
1220
+ // AddCommitted -> AdvanceMaxEvictedSeq -> GetSnapshotListFromDB
1221
+ mutex_.Unlock();
1222
+ status = recoverable_state_pre_release_callback_->Callback(
1223
+ sub_batch_seq, !DISABLE_MEMTABLE, no_log_num, 0, 1);
1224
+ mutex_.Lock();
1225
+ }
1226
+ }
1227
+ if (status.ok()) {
1228
+ cached_recoverable_state_.Clear();
1229
+ cached_recoverable_state_empty_ = true;
1230
+ }
1231
+ return status;
1232
+ }
1233
+ return Status::OK();
1234
+ }
1235
+
1236
+ void DBImpl::SelectColumnFamiliesForAtomicFlush(
1237
+ autovector<ColumnFamilyData*>* cfds) {
1238
+ for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) {
1239
+ if (cfd->IsDropped()) {
1240
+ continue;
1241
+ }
1242
+ if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
1243
+ !cached_recoverable_state_empty_.load()) {
1244
+ cfds->push_back(cfd);
1245
+ }
1246
+ }
1247
+ }
1248
+
1249
+ // Assign sequence number for atomic flush.
1250
+ void DBImpl::AssignAtomicFlushSeq(const autovector<ColumnFamilyData*>& cfds) {
1251
+ assert(immutable_db_options_.atomic_flush);
1252
+ auto seq = versions_->LastSequence();
1253
+ for (auto cfd : cfds) {
1254
+ cfd->imm()->AssignAtomicFlushSeq(seq);
1255
+ }
1256
+ }
1257
+
1258
+ Status DBImpl::SwitchWAL(WriteContext* write_context) {
1259
+ mutex_.AssertHeld();
1260
+ assert(write_context != nullptr);
1261
+ Status status;
1262
+
1263
+ if (alive_log_files_.begin()->getting_flushed) {
1264
+ return status;
1265
+ }
1266
+
1267
+ auto oldest_alive_log = alive_log_files_.begin()->number;
1268
+ bool flush_wont_release_oldest_log = false;
1269
+ if (allow_2pc()) {
1270
+ auto oldest_log_with_uncommitted_prep =
1271
+ logs_with_prep_tracker_.FindMinLogContainingOutstandingPrep();
1272
+
1273
+ assert(oldest_log_with_uncommitted_prep == 0 ||
1274
+ oldest_log_with_uncommitted_prep >= oldest_alive_log);
1275
+ if (oldest_log_with_uncommitted_prep > 0 &&
1276
+ oldest_log_with_uncommitted_prep == oldest_alive_log) {
1277
+ if (unable_to_release_oldest_log_) {
1278
+ // we already attempted to flush all column families dependent on
1279
+ // the oldest alive log but the log still contained uncommitted
1280
+ // transactions so there is still nothing that we can do.
1281
+ return status;
1282
+ } else {
1283
+ ROCKS_LOG_WARN(
1284
+ immutable_db_options_.info_log,
1285
+ "Unable to release oldest log due to uncommitted transaction");
1286
+ unable_to_release_oldest_log_ = true;
1287
+ flush_wont_release_oldest_log = true;
1288
+ }
1289
+ }
1290
+ }
1291
+ if (!flush_wont_release_oldest_log) {
1292
+ // we only mark this log as getting flushed if we have successfully
1293
+ // flushed all data in this log. If this log contains outstanding prepared
1294
+ // transactions then we cannot flush this log until those transactions are
1295
+ // commited.
1296
+ unable_to_release_oldest_log_ = false;
1297
+ alive_log_files_.begin()->getting_flushed = true;
1298
+ }
1299
+
1300
+ ROCKS_LOG_INFO(
1301
+ immutable_db_options_.info_log,
1302
+ "Flushing all column families with data in WAL number %" PRIu64
1303
+ ". Total log size is %" PRIu64 " while max_total_wal_size is %" PRIu64,
1304
+ oldest_alive_log, total_log_size_.load(), GetMaxTotalWalSize());
1305
+ // no need to refcount because drop is happening in write thread, so can't
1306
+ // happen while we're in the write thread
1307
+ autovector<ColumnFamilyData*> cfds;
1308
+ if (immutable_db_options_.atomic_flush) {
1309
+ SelectColumnFamiliesForAtomicFlush(&cfds);
1310
+ } else {
1311
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
1312
+ if (cfd->IsDropped()) {
1313
+ continue;
1314
+ }
1315
+ if (cfd->OldestLogToKeep() <= oldest_alive_log) {
1316
+ cfds.push_back(cfd);
1317
+ }
1318
+ }
1319
+ MaybeFlushStatsCF(&cfds);
1320
+ }
1321
+ WriteThread::Writer nonmem_w;
1322
+ if (two_write_queues_) {
1323
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1324
+ }
1325
+
1326
+ for (const auto cfd : cfds) {
1327
+ cfd->Ref();
1328
+ status = SwitchMemtable(cfd, write_context);
1329
+ cfd->UnrefAndTryDelete();
1330
+ if (!status.ok()) {
1331
+ break;
1332
+ }
1333
+ }
1334
+ if (two_write_queues_) {
1335
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1336
+ }
1337
+
1338
+ if (status.ok()) {
1339
+ if (immutable_db_options_.atomic_flush) {
1340
+ AssignAtomicFlushSeq(cfds);
1341
+ }
1342
+ for (auto cfd : cfds) {
1343
+ cfd->imm()->FlushRequested();
1344
+ if (!immutable_db_options_.atomic_flush) {
1345
+ FlushRequest flush_req;
1346
+ GenerateFlushRequest({cfd}, &flush_req);
1347
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferManager);
1348
+ }
1349
+ }
1350
+ if (immutable_db_options_.atomic_flush) {
1351
+ FlushRequest flush_req;
1352
+ GenerateFlushRequest(cfds, &flush_req);
1353
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferManager);
1354
+ }
1355
+ MaybeScheduleFlushOrCompaction();
1356
+ }
1357
+ return status;
1358
+ }
1359
+
1360
+ Status DBImpl::HandleWriteBufferFull(WriteContext* write_context) {
1361
+ mutex_.AssertHeld();
1362
+ assert(write_context != nullptr);
1363
+ Status status;
1364
+
1365
+ // Before a new memtable is added in SwitchMemtable(),
1366
+ // write_buffer_manager_->ShouldFlush() will keep returning true. If another
1367
+ // thread is writing to another DB with the same write buffer, they may also
1368
+ // be flushed. We may end up with flushing much more DBs than needed. It's
1369
+ // suboptimal but still correct.
1370
+ ROCKS_LOG_INFO(
1371
+ immutable_db_options_.info_log,
1372
+ "Flushing column family with oldest memtable entry. Write buffer is "
1373
+ "using %" ROCKSDB_PRIszt " bytes out of a total of %" ROCKSDB_PRIszt ".",
1374
+ write_buffer_manager_->memory_usage(),
1375
+ write_buffer_manager_->buffer_size());
1376
+ // no need to refcount because drop is happening in write thread, so can't
1377
+ // happen while we're in the write thread
1378
+ autovector<ColumnFamilyData*> cfds;
1379
+ if (immutable_db_options_.atomic_flush) {
1380
+ SelectColumnFamiliesForAtomicFlush(&cfds);
1381
+ } else {
1382
+ ColumnFamilyData* cfd_picked = nullptr;
1383
+ SequenceNumber seq_num_for_cf_picked = kMaxSequenceNumber;
1384
+
1385
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
1386
+ if (cfd->IsDropped()) {
1387
+ continue;
1388
+ }
1389
+ if (!cfd->mem()->IsEmpty()) {
1390
+ // We only consider active mem table, hoping immutable memtable is
1391
+ // already in the process of flushing.
1392
+ uint64_t seq = cfd->mem()->GetCreationSeq();
1393
+ if (cfd_picked == nullptr || seq < seq_num_for_cf_picked) {
1394
+ cfd_picked = cfd;
1395
+ seq_num_for_cf_picked = seq;
1396
+ }
1397
+ }
1398
+ }
1399
+ if (cfd_picked != nullptr) {
1400
+ cfds.push_back(cfd_picked);
1401
+ }
1402
+ MaybeFlushStatsCF(&cfds);
1403
+ }
1404
+
1405
+ WriteThread::Writer nonmem_w;
1406
+ if (two_write_queues_) {
1407
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1408
+ }
1409
+ for (const auto cfd : cfds) {
1410
+ if (cfd->mem()->IsEmpty()) {
1411
+ continue;
1412
+ }
1413
+ cfd->Ref();
1414
+ status = SwitchMemtable(cfd, write_context);
1415
+ cfd->UnrefAndTryDelete();
1416
+ if (!status.ok()) {
1417
+ break;
1418
+ }
1419
+ }
1420
+ if (two_write_queues_) {
1421
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1422
+ }
1423
+
1424
+ if (status.ok()) {
1425
+ if (immutable_db_options_.atomic_flush) {
1426
+ AssignAtomicFlushSeq(cfds);
1427
+ }
1428
+ for (const auto cfd : cfds) {
1429
+ cfd->imm()->FlushRequested();
1430
+ if (!immutable_db_options_.atomic_flush) {
1431
+ FlushRequest flush_req;
1432
+ GenerateFlushRequest({cfd}, &flush_req);
1433
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
1434
+ }
1435
+ }
1436
+ if (immutable_db_options_.atomic_flush) {
1437
+ FlushRequest flush_req;
1438
+ GenerateFlushRequest(cfds, &flush_req);
1439
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
1440
+ }
1441
+ MaybeScheduleFlushOrCompaction();
1442
+ }
1443
+ return status;
1444
+ }
1445
+
1446
+ uint64_t DBImpl::GetMaxTotalWalSize() const {
1447
+ mutex_.AssertHeld();
1448
+ return mutable_db_options_.max_total_wal_size == 0
1449
+ ? 4 * max_total_in_memory_state_
1450
+ : mutable_db_options_.max_total_wal_size;
1451
+ }
1452
+
1453
+ // REQUIRES: mutex_ is held
1454
+ // REQUIRES: this thread is currently at the front of the writer queue
1455
+ Status DBImpl::DelayWrite(uint64_t num_bytes,
1456
+ const WriteOptions& write_options) {
1457
+ uint64_t time_delayed = 0;
1458
+ bool delayed = false;
1459
+ {
1460
+ StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed);
1461
+ uint64_t delay = write_controller_.GetDelay(env_, num_bytes);
1462
+ if (delay > 0) {
1463
+ if (write_options.no_slowdown) {
1464
+ return Status::Incomplete("Write stall");
1465
+ }
1466
+ TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep");
1467
+
1468
+ // Notify write_thread_ about the stall so it can setup a barrier and
1469
+ // fail any pending writers with no_slowdown
1470
+ write_thread_.BeginWriteStall();
1471
+ TEST_SYNC_POINT("DBImpl::DelayWrite:BeginWriteStallDone");
1472
+ mutex_.Unlock();
1473
+ // We will delay the write until we have slept for delay ms or
1474
+ // we don't need a delay anymore
1475
+ const uint64_t kDelayInterval = 1000;
1476
+ uint64_t stall_end = sw.start_time() + delay;
1477
+ while (write_controller_.NeedsDelay()) {
1478
+ if (env_->NowMicros() >= stall_end) {
1479
+ // We already delayed this write `delay` microseconds
1480
+ break;
1481
+ }
1482
+
1483
+ delayed = true;
1484
+ // Sleep for 0.001 seconds
1485
+ env_->SleepForMicroseconds(kDelayInterval);
1486
+ }
1487
+ mutex_.Lock();
1488
+ write_thread_.EndWriteStall();
1489
+ }
1490
+
1491
+ // Don't wait if there's a background error, even if its a soft error. We
1492
+ // might wait here indefinitely as the background compaction may never
1493
+ // finish successfully, resulting in the stall condition lasting
1494
+ // indefinitely
1495
+ while (error_handler_.GetBGError().ok() && write_controller_.IsStopped()) {
1496
+ if (write_options.no_slowdown) {
1497
+ return Status::Incomplete("Write stall");
1498
+ }
1499
+ delayed = true;
1500
+
1501
+ // Notify write_thread_ about the stall so it can setup a barrier and
1502
+ // fail any pending writers with no_slowdown
1503
+ write_thread_.BeginWriteStall();
1504
+ TEST_SYNC_POINT("DBImpl::DelayWrite:Wait");
1505
+ bg_cv_.Wait();
1506
+ write_thread_.EndWriteStall();
1507
+ }
1508
+ }
1509
+ assert(!delayed || !write_options.no_slowdown);
1510
+ if (delayed) {
1511
+ default_cf_internal_stats_->AddDBStats(
1512
+ InternalStats::kIntStatsWriteStallMicros, time_delayed);
1513
+ RecordTick(stats_, STALL_MICROS, time_delayed);
1514
+ }
1515
+
1516
+ // If DB is not in read-only mode and write_controller is not stopping
1517
+ // writes, we can ignore any background errors and allow the write to
1518
+ // proceed
1519
+ Status s;
1520
+ if (write_controller_.IsStopped()) {
1521
+ // If writes are still stopped, it means we bailed due to a background
1522
+ // error
1523
+ s = Status::Incomplete(error_handler_.GetBGError().ToString());
1524
+ }
1525
+ if (error_handler_.IsDBStopped()) {
1526
+ s = error_handler_.GetBGError();
1527
+ }
1528
+ return s;
1529
+ }
1530
+
1531
+ Status DBImpl::ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options,
1532
+ WriteBatch* my_batch) {
1533
+ assert(write_options.low_pri);
1534
+ // This is called outside the DB mutex. Although it is safe to make the call,
1535
+ // the consistency condition is not guaranteed to hold. It's OK to live with
1536
+ // it in this case.
1537
+ // If we need to speed compaction, it means the compaction is left behind
1538
+ // and we start to limit low pri writes to a limit.
1539
+ if (write_controller_.NeedSpeedupCompaction()) {
1540
+ if (allow_2pc() && (my_batch->HasCommit() || my_batch->HasRollback())) {
1541
+ // For 2PC, we only rate limit prepare, not commit.
1542
+ return Status::OK();
1543
+ }
1544
+ if (write_options.no_slowdown) {
1545
+ return Status::Incomplete("Low priority write stall");
1546
+ } else {
1547
+ assert(my_batch != nullptr);
1548
+ // Rate limit those writes. The reason that we don't completely wait
1549
+ // is that in case the write is heavy, low pri writes may never have
1550
+ // a chance to run. Now we guarantee we are still slowly making
1551
+ // progress.
1552
+ PERF_TIMER_GUARD(write_delay_time);
1553
+ write_controller_.low_pri_rate_limiter()->Request(
1554
+ my_batch->GetDataSize(), Env::IO_HIGH, nullptr /* stats */,
1555
+ RateLimiter::OpType::kWrite);
1556
+ }
1557
+ }
1558
+ return Status::OK();
1559
+ }
1560
+
1561
+ void DBImpl::MaybeFlushStatsCF(autovector<ColumnFamilyData*>* cfds) {
1562
+ assert(cfds != nullptr);
1563
+ if (!cfds->empty() && immutable_db_options_.persist_stats_to_disk) {
1564
+ ColumnFamilyData* cfd_stats =
1565
+ versions_->GetColumnFamilySet()->GetColumnFamily(
1566
+ kPersistentStatsColumnFamilyName);
1567
+ if (cfd_stats != nullptr && !cfd_stats->mem()->IsEmpty()) {
1568
+ for (ColumnFamilyData* cfd : *cfds) {
1569
+ if (cfd == cfd_stats) {
1570
+ // stats CF already included in cfds
1571
+ return;
1572
+ }
1573
+ }
1574
+ // force flush stats CF when its log number is less than all other CF's
1575
+ // log numbers
1576
+ bool force_flush_stats_cf = true;
1577
+ for (auto* loop_cfd : *versions_->GetColumnFamilySet()) {
1578
+ if (loop_cfd == cfd_stats) {
1579
+ continue;
1580
+ }
1581
+ if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) {
1582
+ force_flush_stats_cf = false;
1583
+ }
1584
+ }
1585
+ if (force_flush_stats_cf) {
1586
+ cfds->push_back(cfd_stats);
1587
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1588
+ "Force flushing stats CF with automated flush "
1589
+ "to avoid holding old logs");
1590
+ }
1591
+ }
1592
+ }
1593
+ }
1594
+
1595
+ Status DBImpl::TrimMemtableHistory(WriteContext* context) {
1596
+ autovector<ColumnFamilyData*> cfds;
1597
+ ColumnFamilyData* tmp_cfd;
1598
+ while ((tmp_cfd = trim_history_scheduler_.TakeNextColumnFamily()) !=
1599
+ nullptr) {
1600
+ cfds.push_back(tmp_cfd);
1601
+ }
1602
+ for (auto& cfd : cfds) {
1603
+ autovector<MemTable*> to_delete;
1604
+ bool trimmed = cfd->imm()->TrimHistory(
1605
+ &to_delete, cfd->mem()->ApproximateMemoryUsage());
1606
+ if (!to_delete.empty()) {
1607
+ for (auto m : to_delete) {
1608
+ delete m;
1609
+ }
1610
+ }
1611
+ if (trimmed) {
1612
+ context->superversion_context.NewSuperVersion();
1613
+ assert(context->superversion_context.new_superversion.get() != nullptr);
1614
+ cfd->InstallSuperVersion(&context->superversion_context, &mutex_);
1615
+ }
1616
+
1617
+ if (cfd->UnrefAndTryDelete()) {
1618
+ cfd = nullptr;
1619
+ }
1620
+ }
1621
+ return Status::OK();
1622
+ }
1623
+
1624
+ Status DBImpl::ScheduleFlushes(WriteContext* context) {
1625
+ autovector<ColumnFamilyData*> cfds;
1626
+ if (immutable_db_options_.atomic_flush) {
1627
+ SelectColumnFamiliesForAtomicFlush(&cfds);
1628
+ for (auto cfd : cfds) {
1629
+ cfd->Ref();
1630
+ }
1631
+ flush_scheduler_.Clear();
1632
+ } else {
1633
+ ColumnFamilyData* tmp_cfd;
1634
+ while ((tmp_cfd = flush_scheduler_.TakeNextColumnFamily()) != nullptr) {
1635
+ cfds.push_back(tmp_cfd);
1636
+ }
1637
+ MaybeFlushStatsCF(&cfds);
1638
+ }
1639
+ Status status;
1640
+ WriteThread::Writer nonmem_w;
1641
+ if (two_write_queues_) {
1642
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1643
+ }
1644
+
1645
+ for (auto& cfd : cfds) {
1646
+ if (!cfd->mem()->IsEmpty()) {
1647
+ status = SwitchMemtable(cfd, context);
1648
+ }
1649
+ if (cfd->UnrefAndTryDelete()) {
1650
+ cfd = nullptr;
1651
+ }
1652
+ if (!status.ok()) {
1653
+ break;
1654
+ }
1655
+ }
1656
+
1657
+ if (two_write_queues_) {
1658
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1659
+ }
1660
+
1661
+ if (status.ok()) {
1662
+ if (immutable_db_options_.atomic_flush) {
1663
+ AssignAtomicFlushSeq(cfds);
1664
+ FlushRequest flush_req;
1665
+ GenerateFlushRequest(cfds, &flush_req);
1666
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
1667
+ } else {
1668
+ for (auto* cfd : cfds) {
1669
+ FlushRequest flush_req;
1670
+ GenerateFlushRequest({cfd}, &flush_req);
1671
+ SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull);
1672
+ }
1673
+ }
1674
+ MaybeScheduleFlushOrCompaction();
1675
+ }
1676
+ return status;
1677
+ }
1678
+
1679
+ #ifndef ROCKSDB_LITE
1680
+ void DBImpl::NotifyOnMemTableSealed(ColumnFamilyData* /*cfd*/,
1681
+ const MemTableInfo& mem_table_info) {
1682
+ if (immutable_db_options_.listeners.size() == 0U) {
1683
+ return;
1684
+ }
1685
+ if (shutting_down_.load(std::memory_order_acquire)) {
1686
+ return;
1687
+ }
1688
+
1689
+ for (auto listener : immutable_db_options_.listeners) {
1690
+ listener->OnMemTableSealed(mem_table_info);
1691
+ }
1692
+ }
1693
+ #endif // ROCKSDB_LITE
1694
+
1695
+ // REQUIRES: mutex_ is held
1696
+ // REQUIRES: this thread is currently at the front of the writer queue
1697
+ // REQUIRES: this thread is currently at the front of the 2nd writer queue if
1698
+ // two_write_queues_ is true (This is to simplify the reasoning.)
1699
+ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
1700
+ mutex_.AssertHeld();
1701
+ WriteThread::Writer nonmem_w;
1702
+ std::unique_ptr<WritableFile> lfile;
1703
+ log::Writer* new_log = nullptr;
1704
+ MemTable* new_mem = nullptr;
1705
+ IOStatus io_s;
1706
+
1707
+ // Recoverable state is persisted in WAL. After memtable switch, WAL might
1708
+ // be deleted, so we write the state to memtable to be persisted as well.
1709
+ Status s = WriteRecoverableState();
1710
+ if (!s.ok()) {
1711
+ return s;
1712
+ }
1713
+
1714
+ // Attempt to switch to a new memtable and trigger flush of old.
1715
+ // Do this without holding the dbmutex lock.
1716
+ assert(versions_->prev_log_number() == 0);
1717
+ if (two_write_queues_) {
1718
+ log_write_mutex_.Lock();
1719
+ }
1720
+ bool creating_new_log = !log_empty_;
1721
+ if (two_write_queues_) {
1722
+ log_write_mutex_.Unlock();
1723
+ }
1724
+ uint64_t recycle_log_number = 0;
1725
+ if (creating_new_log && immutable_db_options_.recycle_log_file_num &&
1726
+ !log_recycle_files_.empty()) {
1727
+ recycle_log_number = log_recycle_files_.front();
1728
+ }
1729
+ uint64_t new_log_number =
1730
+ creating_new_log ? versions_->NewFileNumber() : logfile_number_;
1731
+ const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
1732
+
1733
+ // Set memtable_info for memtable sealed callback
1734
+ #ifndef ROCKSDB_LITE
1735
+ MemTableInfo memtable_info;
1736
+ memtable_info.cf_name = cfd->GetName();
1737
+ memtable_info.first_seqno = cfd->mem()->GetFirstSequenceNumber();
1738
+ memtable_info.earliest_seqno = cfd->mem()->GetEarliestSequenceNumber();
1739
+ memtable_info.num_entries = cfd->mem()->num_entries();
1740
+ memtable_info.num_deletes = cfd->mem()->num_deletes();
1741
+ #endif // ROCKSDB_LITE
1742
+ // Log this later after lock release. It may be outdated, e.g., if background
1743
+ // flush happens before logging, but that should be ok.
1744
+ int num_imm_unflushed = cfd->imm()->NumNotFlushed();
1745
+ const auto preallocate_block_size =
1746
+ GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size);
1747
+ mutex_.Unlock();
1748
+ if (creating_new_log) {
1749
+ // TODO: Write buffer size passed in should be max of all CF's instead
1750
+ // of mutable_cf_options.write_buffer_size.
1751
+ io_s = CreateWAL(new_log_number, recycle_log_number, preallocate_block_size,
1752
+ &new_log);
1753
+ if (s.ok()) {
1754
+ s = io_s;
1755
+ }
1756
+ }
1757
+ if (s.ok()) {
1758
+ SequenceNumber seq = versions_->LastSequence();
1759
+ new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq);
1760
+ context->superversion_context.NewSuperVersion();
1761
+ }
1762
+ ROCKS_LOG_INFO(immutable_db_options_.info_log,
1763
+ "[%s] New memtable created with log file: #%" PRIu64
1764
+ ". Immutable memtables: %d.\n",
1765
+ cfd->GetName().c_str(), new_log_number, num_imm_unflushed);
1766
+ mutex_.Lock();
1767
+ if (recycle_log_number != 0) {
1768
+ // Since renaming the file is done outside DB mutex, we need to ensure
1769
+ // concurrent full purges don't delete the file while we're recycling it.
1770
+ // To achieve that we hold the old log number in the recyclable list until
1771
+ // after it has been renamed.
1772
+ assert(log_recycle_files_.front() == recycle_log_number);
1773
+ log_recycle_files_.pop_front();
1774
+ }
1775
+ if (s.ok() && creating_new_log) {
1776
+ log_write_mutex_.Lock();
1777
+ assert(new_log != nullptr);
1778
+ if (!logs_.empty()) {
1779
+ // Alway flush the buffer of the last log before switching to a new one
1780
+ log::Writer* cur_log_writer = logs_.back().writer;
1781
+ io_s = cur_log_writer->WriteBuffer();
1782
+ if (s.ok()) {
1783
+ s = io_s;
1784
+ }
1785
+ if (!s.ok()) {
1786
+ ROCKS_LOG_WARN(immutable_db_options_.info_log,
1787
+ "[%s] Failed to switch from #%" PRIu64 " to #%" PRIu64
1788
+ " WAL file\n",
1789
+ cfd->GetName().c_str(), cur_log_writer->get_log_number(),
1790
+ new_log_number);
1791
+ }
1792
+ }
1793
+ if (s.ok()) {
1794
+ logfile_number_ = new_log_number;
1795
+ log_empty_ = true;
1796
+ log_dir_synced_ = false;
1797
+ logs_.emplace_back(logfile_number_, new_log);
1798
+ alive_log_files_.push_back(LogFileNumberSize(logfile_number_));
1799
+ }
1800
+ log_write_mutex_.Unlock();
1801
+ }
1802
+
1803
+ if (!s.ok()) {
1804
+ // how do we fail if we're not creating new log?
1805
+ assert(creating_new_log);
1806
+ if (new_mem) {
1807
+ delete new_mem;
1808
+ }
1809
+ if (new_log) {
1810
+ delete new_log;
1811
+ }
1812
+ SuperVersion* new_superversion =
1813
+ context->superversion_context.new_superversion.release();
1814
+ if (new_superversion != nullptr) {
1815
+ delete new_superversion;
1816
+ }
1817
+ // We may have lost data from the WritableFileBuffer in-memory buffer for
1818
+ // the current log, so treat it as a fatal error and set bg_error
1819
+ if (!io_s.ok()) {
1820
+ error_handler_.SetBGError(io_s, BackgroundErrorReason::kMemTable);
1821
+ } else {
1822
+ error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable);
1823
+ }
1824
+ // Read back bg_error in order to get the right severity
1825
+ s = error_handler_.GetBGError();
1826
+ return s;
1827
+ }
1828
+
1829
+ bool empty_cf_updated = false;
1830
+ if (immutable_db_options_.track_and_verify_wals_in_manifest &&
1831
+ !immutable_db_options_.allow_2pc && creating_new_log) {
1832
+ // In non-2pc mode, WALs become obsolete if they do not contain unflushed
1833
+ // data. Updating the empty CF's log number might cause some WALs to become
1834
+ // obsolete. So we should track the WAL obsoletion event before actually
1835
+ // updating the empty CF's log number.
1836
+ uint64_t min_wal_number_to_keep =
1837
+ versions_->PreComputeMinLogNumberWithUnflushedData(logfile_number_);
1838
+ if (min_wal_number_to_keep >
1839
+ versions_->GetWalSet().GetMinWalNumberToKeep()) {
1840
+ // Get a snapshot of the empty column families.
1841
+ // LogAndApply may release and reacquire db
1842
+ // mutex, during that period, column family may become empty (e.g. its
1843
+ // flush succeeds), then it affects the computed min_log_number_to_keep,
1844
+ // so we take a snapshot for consistency of column family data
1845
+ // status. If a column family becomes non-empty afterwards, its active log
1846
+ // should still be the created new log, so the min_log_number_to_keep is
1847
+ // not affected.
1848
+ autovector<ColumnFamilyData*> empty_cfs;
1849
+ for (auto cf : *versions_->GetColumnFamilySet()) {
1850
+ if (cf->IsEmpty()) {
1851
+ empty_cfs.push_back(cf);
1852
+ }
1853
+ }
1854
+
1855
+ VersionEdit wal_deletion;
1856
+ wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
1857
+ s = versions_->LogAndApplyToDefaultColumnFamily(&wal_deletion, &mutex_);
1858
+ if (!s.ok() && versions_->io_status().IsIOError()) {
1859
+ s = error_handler_.SetBGError(versions_->io_status(),
1860
+ BackgroundErrorReason::kManifestWrite);
1861
+ }
1862
+ if (!s.ok()) {
1863
+ return s;
1864
+ }
1865
+
1866
+ for (auto cf : empty_cfs) {
1867
+ if (cf->IsEmpty()) {
1868
+ cf->SetLogNumber(logfile_number_);
1869
+ cf->mem()->SetCreationSeq(versions_->LastSequence());
1870
+ } // cf may become non-empty.
1871
+ }
1872
+ empty_cf_updated = true;
1873
+ }
1874
+ }
1875
+ if (!empty_cf_updated) {
1876
+ for (auto cf : *versions_->GetColumnFamilySet()) {
1877
+ // all this is just optimization to delete logs that
1878
+ // are no longer needed -- if CF is empty, that means it
1879
+ // doesn't need that particular log to stay alive, so we just
1880
+ // advance the log number. no need to persist this in the manifest
1881
+ if (cf->IsEmpty()) {
1882
+ if (creating_new_log) {
1883
+ cf->SetLogNumber(logfile_number_);
1884
+ }
1885
+ cf->mem()->SetCreationSeq(versions_->LastSequence());
1886
+ }
1887
+ }
1888
+ }
1889
+
1890
+ cfd->mem()->SetNextLogNumber(logfile_number_);
1891
+ cfd->imm()->Add(cfd->mem(), &context->memtables_to_free_);
1892
+ new_mem->Ref();
1893
+ cfd->SetMemtable(new_mem);
1894
+ InstallSuperVersionAndScheduleWork(cfd, &context->superversion_context,
1895
+ mutable_cf_options);
1896
+ #ifndef ROCKSDB_LITE
1897
+ mutex_.Unlock();
1898
+ // Notify client that memtable is sealed, now that we have successfully
1899
+ // installed a new memtable
1900
+ NotifyOnMemTableSealed(cfd, memtable_info);
1901
+ mutex_.Lock();
1902
+ #endif // ROCKSDB_LITE
1903
+ // It is possible that we got here without checking the value of i_os, but
1904
+ // that is okay. If we did, it most likely means that s was already an error.
1905
+ // In any case, ignore any unchecked error for i_os here.
1906
+ io_s.PermitUncheckedError();
1907
+ return s;
1908
+ }
1909
+
1910
+ size_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const {
1911
+ mutex_.AssertHeld();
1912
+ size_t bsize =
1913
+ static_cast<size_t>(write_buffer_size / 10 + write_buffer_size);
1914
+ // Some users might set very high write_buffer_size and rely on
1915
+ // max_total_wal_size or other parameters to control the WAL size.
1916
+ if (mutable_db_options_.max_total_wal_size > 0) {
1917
+ bsize = std::min<size_t>(
1918
+ bsize, static_cast<size_t>(mutable_db_options_.max_total_wal_size));
1919
+ }
1920
+ if (immutable_db_options_.db_write_buffer_size > 0) {
1921
+ bsize = std::min<size_t>(bsize, immutable_db_options_.db_write_buffer_size);
1922
+ }
1923
+ if (immutable_db_options_.write_buffer_manager &&
1924
+ immutable_db_options_.write_buffer_manager->enabled()) {
1925
+ bsize = std::min<size_t>(
1926
+ bsize, immutable_db_options_.write_buffer_manager->buffer_size());
1927
+ }
1928
+
1929
+ return bsize;
1930
+ }
1931
+
1932
+ // Default implementations of convenience methods that subclasses of DB
1933
+ // can call if they wish
1934
+ Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family,
1935
+ const Slice& key, const Slice& value) {
1936
+ if (nullptr == opt.timestamp) {
1937
+ // Pre-allocate size of write batch conservatively.
1938
+ // 8 bytes are taken by header, 4 bytes for count, 1 byte for type,
1939
+ // and we allocate 11 extra bytes for key length, as well as value length.
1940
+ WriteBatch batch(key.size() + value.size() + 24);
1941
+ Status s = batch.Put(column_family, key, value);
1942
+ if (!s.ok()) {
1943
+ return s;
1944
+ }
1945
+ return Write(opt, &batch);
1946
+ }
1947
+ const Slice* ts = opt.timestamp;
1948
+ assert(nullptr != ts);
1949
+ size_t ts_sz = ts->size();
1950
+ assert(column_family->GetComparator());
1951
+ assert(ts_sz == column_family->GetComparator()->timestamp_size());
1952
+ WriteBatch batch(key.size() + ts_sz + value.size() + 24, /*max_bytes=*/0,
1953
+ ts_sz);
1954
+ Status s = batch.Put(column_family, key, value);
1955
+ if (!s.ok()) {
1956
+ return s;
1957
+ }
1958
+ s = batch.AssignTimestamp(*ts);
1959
+ if (!s.ok()) {
1960
+ return s;
1961
+ }
1962
+ return Write(opt, &batch);
1963
+ }
1964
+
1965
+ Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family,
1966
+ const Slice& key) {
1967
+ if (nullptr == opt.timestamp) {
1968
+ WriteBatch batch;
1969
+ Status s = batch.Delete(column_family, key);
1970
+ if (!s.ok()) {
1971
+ return s;
1972
+ }
1973
+ return Write(opt, &batch);
1974
+ }
1975
+ const Slice* ts = opt.timestamp;
1976
+ assert(ts != nullptr);
1977
+ const size_t ts_sz = ts->size();
1978
+ constexpr size_t kKeyAndValueLenSize = 11;
1979
+ constexpr size_t kWriteBatchOverhead =
1980
+ WriteBatchInternal::kHeader + sizeof(ValueType) + kKeyAndValueLenSize;
1981
+ WriteBatch batch(key.size() + ts_sz + kWriteBatchOverhead, /*max_bytes=*/0,
1982
+ ts_sz);
1983
+ Status s = batch.Delete(column_family, key);
1984
+ if (!s.ok()) {
1985
+ return s;
1986
+ }
1987
+ s = batch.AssignTimestamp(*ts);
1988
+ if (!s.ok()) {
1989
+ return s;
1990
+ }
1991
+ return Write(opt, &batch);
1992
+ }
1993
+
1994
+ Status DB::SingleDelete(const WriteOptions& opt,
1995
+ ColumnFamilyHandle* column_family, const Slice& key) {
1996
+ WriteBatch batch;
1997
+ Status s = batch.SingleDelete(column_family, key);
1998
+ if (!s.ok()) {
1999
+ return s;
2000
+ }
2001
+ return Write(opt, &batch);
2002
+ }
2003
+
2004
+ Status DB::DeleteRange(const WriteOptions& opt,
2005
+ ColumnFamilyHandle* column_family,
2006
+ const Slice& begin_key, const Slice& end_key) {
2007
+ WriteBatch batch;
2008
+ Status s = batch.DeleteRange(column_family, begin_key, end_key);
2009
+ if (!s.ok()) {
2010
+ return s;
2011
+ }
2012
+ return Write(opt, &batch);
2013
+ }
2014
+
2015
+ Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family,
2016
+ const Slice& key, const Slice& value) {
2017
+ WriteBatch batch;
2018
+ Status s = batch.Merge(column_family, key, value);
2019
+ if (!s.ok()) {
2020
+ return s;
2021
+ }
2022
+ return Write(opt, &batch);
2023
+ }
2024
+ } // namespace ROCKSDB_NAMESPACE