@nxtedition/rocksdb 10.1.4 → 10.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/binding.cc +16 -12
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
  3. package/deps/rocksdb/rocksdb/Makefile +38 -15
  4. package/deps/rocksdb/rocksdb/TARGETS +10 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
  6. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  7. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  8. package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
  9. package/deps/rocksdb/rocksdb/db/builder.h +1 -1
  10. package/deps/rocksdb/rocksdb/db/c.cc +205 -6
  11. package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
  12. package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
  14. package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
  26. package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
  27. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
  28. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
  29. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
  30. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
  35. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
  36. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
  37. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
  38. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
  39. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
  40. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
  41. package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
  42. package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
  43. package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
  44. package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
  45. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
  46. package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
  47. package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
  48. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
  49. package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
  50. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
  51. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  52. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
  53. package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
  54. package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
  55. package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
  56. package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
  57. package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
  58. package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
  59. package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
  60. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  61. package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
  62. package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
  63. package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
  64. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  65. package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
  66. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  67. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
  68. package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
  69. package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
  70. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
  71. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
  72. package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
  73. package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
  74. package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
  75. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
  76. package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
  77. package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
  78. package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
  79. package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
  80. package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  81. package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
  82. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
  83. package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
  84. package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
  85. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
  86. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
  87. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
  89. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
  90. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
  91. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
  92. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
  93. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
  94. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
  95. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
  96. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
  97. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
  98. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
  99. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
  100. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
  101. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
  102. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
  103. package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
  104. package/deps/rocksdb/rocksdb/env/env.cc +6 -0
  105. package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
  106. package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
  107. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
  108. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
  109. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
  110. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
  111. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
  113. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
  115. package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
  116. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
  119. package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
  120. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  122. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
  128. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  129. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
  130. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  131. package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
  132. package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
  133. package/deps/rocksdb/rocksdb/options/options.cc +3 -0
  134. package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
  135. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  136. package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
  137. package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
  138. package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
  139. package/deps/rocksdb/rocksdb/src.mk +4 -0
  140. package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  141. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
  142. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
  143. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  147. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
  148. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  149. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
  150. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
  151. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  152. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  153. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
  154. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
  155. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
  156. package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  157. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
  158. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
  160. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
  161. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
  162. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  163. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  165. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  166. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
  167. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
  168. package/deps/rocksdb/rocksdb/table/format.cc +1 -2
  169. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
  170. package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
  171. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
  172. package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
  173. package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
  174. package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
  175. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  176. package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
  177. package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
  178. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
  179. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
  180. package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
  181. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
  182. package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
  183. package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
  184. package/deps/rocksdb/rocksdb/util/random.cc +2 -1
  185. package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
  186. package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
  187. package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
  188. package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
  189. package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
  191. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
  192. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
  193. package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
  194. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
  195. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
  196. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  197. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  198. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
  199. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  200. package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
  201. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
  202. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
  203. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
  204. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
  205. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
  206. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
  207. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
  208. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
  209. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
  210. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  211. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
  212. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  213. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
  214. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
  215. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  216. package/index.js +1 -2
  217. package/package.json +1 -1
  218. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  219. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  220. package/util.h +25 -2
  221. package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
  222. package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
  223. package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
  224. package/.tap/test-results/test/batch-test.js.tap +0 -12
  225. package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
  226. package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
  227. package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
  228. package/.tap/test-results/test/column-test.js.tap +0 -55
  229. package/.tap/test-results/test/common.js.tap +0 -0
  230. package/.tap/test-results/test/compression-test.js.tap +0 -30
  231. package/.tap/test-results/test/db-identity.js.tap +0 -12
  232. package/.tap/test-results/test/electron.js.tap +0 -0
  233. package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
  234. package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
  235. package/.tap/test-results/test/gc.js.tap +0 -0
  236. package/.tap/test-results/test/getproperty-test.js.tap +0 -29
  237. package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
  238. package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
  239. package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
  240. package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
  241. package/.tap/test-results/test/iterator-test.js.tap +0 -6
  242. package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
  243. package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
  244. package/.tap/test-results/test/leak-tester.js.tap +0 -0
  245. package/.tap/test-results/test/lock-test.js.tap +0 -18
  246. package/.tap/test-results/test/lock.js.tap +0 -0
  247. package/.tap/test-results/test/make.js.tap +0 -0
  248. package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
  249. package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
  250. package/.tap/test-results/test/mkdir-test.js.tap +0 -15
  251. package/.tap/test-results/test/segfault-test.js.tap +0 -76
  252. package/.tap/test-results/test/stack-blower.js.tap +0 -0
  253. package/deps/rocksdb/rocksdb/README.md +0 -29
  254. package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
  255. package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
  256. package/deps/rocksdb/rocksdb/port/README +0 -10
  257. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
  258. package/tmp/000099.sst +0 -0
  259. package/tmp/000102.sst +0 -0
  260. package/tmp/000103.log +0 -0
  261. package/tmp/CURRENT +0 -1
  262. package/tmp/IDENTITY +0 -1
  263. package/tmp/LOCK +0 -0
  264. package/tmp/MANIFEST-000104 +0 -0
  265. package/tmp/OPTIONS-000098 +0 -207
  266. package/tmp/OPTIONS-000106 +0 -207
@@ -65,6 +65,7 @@
65
65
  #include "port/lang.h"
66
66
  #include "rocksdb/merge_operator.h"
67
67
  #include "rocksdb/system_clock.h"
68
+ #include "util/aligned_storage.h"
68
69
  #include "util/autovector.h"
69
70
  #include "util/cast_util.h"
70
71
  #include "util/coding.h"
@@ -1900,7 +1901,7 @@ class MemTableInserter : public WriteBatch::Handler {
1900
1901
  // Make creation optional but do not incur
1901
1902
  // std::unique_ptr additional allocation
1902
1903
  using MemPostInfoMap = std::map<MemTable*, MemTablePostProcessInfo>;
1903
- using PostMapType = std::aligned_storage<sizeof(MemPostInfoMap)>::type;
1904
+ using PostMapType = aligned_storage<MemPostInfoMap>::type;
1904
1905
  PostMapType mem_post_info_map_;
1905
1906
  // current recovered transaction we are rebuilding (recovery)
1906
1907
  WriteBatch* rebuilding_trx_;
@@ -1914,7 +1915,7 @@ class MemTableInserter : public WriteBatch::Handler {
1914
1915
  bool write_before_prepare_;
1915
1916
  // Whether this batch was unprepared or not
1916
1917
  bool unprepared_batch_;
1917
- using DupDetector = std::aligned_storage<sizeof(DuplicateDetector)>::type;
1918
+ using DupDetector = aligned_storage<DuplicateDetector>::type;
1918
1919
  DupDetector duplicate_detector_;
1919
1920
  bool dup_dectector_on_;
1920
1921
 
@@ -1922,7 +1923,7 @@ class MemTableInserter : public WriteBatch::Handler {
1922
1923
  bool hint_created_;
1923
1924
  // Hints for this batch
1924
1925
  using HintMap = std::unordered_map<MemTable*, void*>;
1925
- using HintMapType = std::aligned_storage<sizeof(HintMap)>::type;
1926
+ using HintMapType = aligned_storage<HintMap>::type;
1926
1927
  HintMapType hint_;
1927
1928
 
1928
1929
  HintMap& GetHintMap() {
@@ -2121,14 +2122,15 @@ class MemTableInserter : public WriteBatch::Handler {
2121
2122
  return true;
2122
2123
  }
2123
2124
 
2125
+ template <typename RebuildTxnOp>
2124
2126
  Status PutCFImpl(uint32_t column_family_id, const Slice& key,
2125
2127
  const Slice& value, ValueType value_type,
2128
+ RebuildTxnOp rebuild_txn_op,
2126
2129
  const ProtectionInfoKVOS64* kv_prot_info) {
2127
2130
  // optimize for non-recovery mode
2128
2131
  if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) {
2129
2132
  // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
2130
- return WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key,
2131
- value);
2133
+ return rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
2132
2134
  // else insert the values to the memtable right away
2133
2135
  }
2134
2136
 
@@ -2139,8 +2141,8 @@ class MemTableInserter : public WriteBatch::Handler {
2139
2141
  // The CF is probably flushed and hence no need for insert but we still
2140
2142
  // need to keep track of the keys for upcoming rollback/commit.
2141
2143
  // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
2142
- ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
2143
- key, value);
2144
+ ret_status =
2145
+ rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
2144
2146
  if (ret_status.ok()) {
2145
2147
  MaybeAdvanceSeq(IsDuplicateKeySeq(column_family_id, key));
2146
2148
  }
@@ -2264,8 +2266,8 @@ class MemTableInserter : public WriteBatch::Handler {
2264
2266
  if (UNLIKELY(ret_status.ok() && rebuilding_trx_ != nullptr)) {
2265
2267
  assert(!write_after_commit_);
2266
2268
  // TODO(ajkr): propagate `ProtectionInfoKVOS64`.
2267
- ret_status = WriteBatchInternal::Put(rebuilding_trx_, column_family_id,
2268
- key, value);
2269
+ ret_status =
2270
+ rebuild_txn_op(rebuilding_trx_, column_family_id, key, value);
2269
2271
  }
2270
2272
  return ret_status;
2271
2273
  }
@@ -2274,15 +2276,21 @@ class MemTableInserter : public WriteBatch::Handler {
2274
2276
  const Slice& value) override {
2275
2277
  const auto* kv_prot_info = NextProtectionInfo();
2276
2278
  Status ret_status;
2279
+
2280
+ auto rebuild_txn_op = [](WriteBatch* rebuilding_trx, uint32_t cf_id,
2281
+ const Slice& k, const Slice& v) -> Status {
2282
+ return WriteBatchInternal::Put(rebuilding_trx, cf_id, k, v);
2283
+ };
2284
+
2277
2285
  if (kv_prot_info != nullptr) {
2278
2286
  // Memtable needs seqno, doesn't need CF ID
2279
2287
  auto mem_kv_prot_info =
2280
2288
  kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2281
2289
  ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
2282
- &mem_kv_prot_info);
2290
+ rebuild_txn_op, &mem_kv_prot_info);
2283
2291
  } else {
2284
2292
  ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
2285
- nullptr /* kv_prot_info */);
2293
+ rebuild_txn_op, nullptr /* kv_prot_info */);
2286
2294
  }
2287
2295
  // TODO: this assumes that if TryAgain status is returned to the caller,
2288
2296
  // the operation is actually tried again. The proper way to do this is to
@@ -2301,15 +2309,23 @@ class MemTableInserter : public WriteBatch::Handler {
2301
2309
  std::string value_buf;
2302
2310
  Slice packed_value =
2303
2311
  PackValueAndWriteTime(value, unix_write_time, &value_buf);
2312
+
2313
+ auto rebuild_txn_op = [](WriteBatch* /* rebuilding_trx */,
2314
+ uint32_t /* cf_id */, const Slice& /* k */,
2315
+ const Slice& /* v */) -> Status {
2316
+ return Status::NotSupported();
2317
+ };
2318
+
2304
2319
  if (kv_prot_info != nullptr) {
2305
2320
  auto mem_kv_prot_info =
2306
2321
  kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2307
2322
  ret_status = PutCFImpl(column_family_id, key, packed_value,
2308
- kTypeValuePreferredSeqno, &mem_kv_prot_info);
2323
+ kTypeValuePreferredSeqno, rebuild_txn_op,
2324
+ &mem_kv_prot_info);
2309
2325
  } else {
2310
- ret_status =
2311
- PutCFImpl(column_family_id, key, packed_value,
2312
- kTypeValuePreferredSeqno, nullptr /* kv_prot_info */);
2326
+ ret_status = PutCFImpl(column_family_id, key, packed_value,
2327
+ kTypeValuePreferredSeqno, rebuild_txn_op,
2328
+ nullptr /* kv_prot_info */);
2313
2329
  }
2314
2330
 
2315
2331
  // TODO: this assumes that if TryAgain status is returned to the caller,
@@ -2327,14 +2343,27 @@ class MemTableInserter : public WriteBatch::Handler {
2327
2343
  const auto* kv_prot_info = NextProtectionInfo();
2328
2344
 
2329
2345
  Status s;
2346
+
2347
+ auto rebuild_txn_op = [](WriteBatch* rebuilding_trx, uint32_t cf_id,
2348
+ const Slice& k, Slice entity) -> Status {
2349
+ WideColumns columns;
2350
+ const Status st = WideColumnSerialization::Deserialize(entity, columns);
2351
+ if (!st.ok()) {
2352
+ return st;
2353
+ }
2354
+
2355
+ return WriteBatchInternal::PutEntity(rebuilding_trx, cf_id, k, columns);
2356
+ };
2357
+
2330
2358
  if (kv_prot_info) {
2331
2359
  // Memtable needs seqno, doesn't need CF ID
2332
2360
  auto mem_kv_prot_info =
2333
2361
  kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2334
2362
  s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
2335
- &mem_kv_prot_info);
2363
+ rebuild_txn_op, &mem_kv_prot_info);
2336
2364
  } else {
2337
2365
  s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
2366
+ rebuild_txn_op,
2338
2367
  /* kv_prot_info */ nullptr);
2339
2368
  }
2340
2369
 
@@ -2521,11 +2550,6 @@ class MemTableInserter : public WriteBatch::Handler {
2521
2550
  assert(ret_status.ok());
2522
2551
 
2523
2552
  if (db_ != nullptr) {
2524
- if (db_->immutable_db_options().row_cache) {
2525
- ret_status.PermitUncheckedError();
2526
- return Status::NotSupported(
2527
- "DeleteRange is not compatible with row cache.");
2528
- }
2529
2553
  auto cf_handle = cf_mems_->GetColumnFamilyHandle();
2530
2554
  if (cf_handle == nullptr) {
2531
2555
  cf_handle = db_->DefaultColumnFamily();
@@ -2778,16 +2802,23 @@ class MemTableInserter : public WriteBatch::Handler {
2778
2802
  const Slice& value) override {
2779
2803
  const auto* kv_prot_info = NextProtectionInfo();
2780
2804
  Status ret_status;
2805
+
2806
+ auto rebuild_txn_op = [](WriteBatch* /* rebuilding_trx */,
2807
+ uint32_t /* cf_id */, const Slice& /* k */,
2808
+ const Slice& /* v */) -> Status {
2809
+ return Status::NotSupported();
2810
+ };
2811
+
2781
2812
  if (kv_prot_info != nullptr) {
2782
2813
  // Memtable needs seqno, doesn't need CF ID
2783
2814
  auto mem_kv_prot_info =
2784
2815
  kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2785
2816
  // Same as PutCF except for value type.
2786
2817
  ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
2787
- &mem_kv_prot_info);
2818
+ rebuild_txn_op, &mem_kv_prot_info);
2788
2819
  } else {
2789
2820
  ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
2790
- nullptr /* kv_prot_info */);
2821
+ rebuild_txn_op, nullptr /* kv_prot_info */);
2791
2822
  }
2792
2823
  if (UNLIKELY(ret_status.IsTryAgain())) {
2793
2824
  DecrementProtectionInfoIdxForTryAgain();
@@ -2,8 +2,6 @@
2
2
  // This source code is licensed under both the GPLv2 (found in the
3
3
  // COPYING file in the root directory) and Apache 2.0 License
4
4
  // (found in the LICENSE.Apache file in the root directory).
5
-
6
-
7
5
  #include "db/write_callback.h"
8
6
 
9
7
  #include <atomic>
@@ -15,6 +13,7 @@
15
13
  #include "db/db_impl/db_impl.h"
16
14
  #include "port/port.h"
17
15
  #include "rocksdb/db.h"
16
+ #include "rocksdb/user_write_callback.h"
18
17
  #include "rocksdb/write_batch.h"
19
18
  #include "test_util/sync_point.h"
20
19
  #include "test_util/testharness.h"
@@ -84,6 +83,28 @@ class MockWriteCallback : public WriteCallback {
84
83
  bool AllowWriteBatching() override { return allow_batching_; }
85
84
  };
86
85
 
86
+ class MockUserWriteCallback : public UserWriteCallback {
87
+ public:
88
+ std::atomic<bool> write_enqueued_{false};
89
+ std::atomic<bool> wal_write_done_{false};
90
+
91
+ MockUserWriteCallback() = default;
92
+
93
+ MockUserWriteCallback(const MockUserWriteCallback& other) {
94
+ write_enqueued_.store(other.write_enqueued_.load());
95
+ wal_write_done_.store(other.wal_write_done_.load());
96
+ }
97
+
98
+ void OnWriteEnqueued() override { write_enqueued_.store(true); }
99
+
100
+ void OnWalWriteFinish() override { wal_write_done_.store(true); }
101
+
102
+ void Reset() {
103
+ write_enqueued_.store(false);
104
+ wal_write_done_.store(false);
105
+ }
106
+ };
107
+
87
108
  #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
88
109
  class WriteCallbackPTest
89
110
  : public WriteCallbackTest,
@@ -119,9 +140,11 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
119
140
  kvs_.clear();
120
141
  write_batch_.Clear();
121
142
  callback_.was_called_.store(false);
143
+ user_write_cb_.Reset();
122
144
  }
123
145
 
124
146
  MockWriteCallback callback_;
147
+ MockUserWriteCallback user_write_cb_;
125
148
  WriteBatch write_batch_;
126
149
  std::vector<std::pair<string, string>> kvs_;
127
150
  };
@@ -327,18 +350,26 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) {
327
350
  ASSERT_OK(WriteBatchInternal::InsertNoop(&write_op.write_batch_));
328
351
  const size_t ONE_BATCH = 1;
329
352
  s = db_impl->WriteImpl(woptions, &write_op.write_batch_,
330
- &write_op.callback_, nullptr, 0, false, nullptr,
331
- ONE_BATCH,
353
+ &write_op.callback_, &write_op.user_write_cb_,
354
+ nullptr, 0, false, nullptr, ONE_BATCH,
332
355
  two_queues_ ? &publish_seq_callback : nullptr);
333
356
  } else {
334
357
  s = db_impl->WriteWithCallback(woptions, &write_op.write_batch_,
335
- &write_op.callback_);
358
+ &write_op.callback_,
359
+ &write_op.user_write_cb_);
336
360
  }
337
361
 
362
+ ASSERT_TRUE(write_op.user_write_cb_.write_enqueued_.load());
338
363
  if (write_op.callback_.should_fail_) {
339
364
  ASSERT_TRUE(s.IsBusy());
365
+ ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
340
366
  } else {
341
367
  ASSERT_OK(s);
368
+ if (enable_WAL_) {
369
+ ASSERT_TRUE(write_op.user_write_cb_.wal_write_done_.load());
370
+ } else {
371
+ ASSERT_FALSE(write_op.user_write_cb_.wal_write_done_.load());
372
+ }
342
373
  }
343
374
  };
344
375
 
@@ -440,6 +471,16 @@ TEST_F(WriteCallbackTest, WriteCallBackTest) {
440
471
  ASSERT_OK(s);
441
472
  ASSERT_EQ("value.a2", value);
442
473
 
474
+ MockUserWriteCallback user_write_cb;
475
+ WriteBatch wb4;
476
+ ASSERT_OK(wb4.Put("a", "value.a4"));
477
+
478
+ ASSERT_OK(db->WriteWithCallback(write_options, &wb4, &user_write_cb));
479
+ ASSERT_OK(db->Get(read_options, "a", &value));
480
+ ASSERT_EQ(value, "value.a4");
481
+ ASSERT_TRUE(user_write_cb.write_enqueued_.load());
482
+ ASSERT_TRUE(user_write_cb.wal_write_done_.load());
483
+
443
484
  delete db;
444
485
  ASSERT_OK(DestroyDB(dbname, options));
445
486
  }
@@ -404,6 +404,8 @@ void WriteThread::JoinBatchGroup(Writer* w) {
404
404
 
405
405
  bool linked_as_leader = LinkOne(w, &newest_writer_);
406
406
 
407
+ w->CheckWriteEnqueuedCallback();
408
+
407
409
  if (linked_as_leader) {
408
410
  SetState(w, STATE_GROUP_LEADER);
409
411
  }
@@ -428,6 +430,7 @@ void WriteThread::JoinBatchGroup(Writer* w) {
428
430
  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:BeganWaiting", w);
429
431
  AwaitState(w,
430
432
  STATE_GROUP_LEADER | STATE_MEMTABLE_WRITER_LEADER |
433
+ STATE_PARALLEL_MEMTABLE_CALLER |
431
434
  STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
432
435
  &jbg_ctx);
433
436
  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w);
@@ -656,12 +659,57 @@ void WriteThread::ExitAsMemTableWriter(Writer* /*self*/,
656
659
  SetState(leader, STATE_COMPLETED);
657
660
  }
658
661
 
662
+ void WriteThread::SetMemWritersEachStride(Writer* w) {
663
+ WriteGroup* write_group = w->write_group;
664
+ Writer* last_writer = write_group->last_writer;
665
+
666
+ // The stride is the same for each writer in write_group, so w will
667
+ // call the writers with the same number in write_group mod total size
668
+ size_t stride = static_cast<size_t>(std::sqrt(write_group->size));
669
+ size_t count = 0;
670
+ while (w) {
671
+ if (count++ % stride == 0) {
672
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
673
+ }
674
+ w = (w == last_writer) ? nullptr : w->link_newer;
675
+ }
676
+ }
677
+
659
678
  void WriteThread::LaunchParallelMemTableWriters(WriteGroup* write_group) {
660
679
  assert(write_group != nullptr);
661
- write_group->running.store(write_group->size);
662
- for (auto w : *write_group) {
663
- SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
680
+ size_t group_size = write_group->size;
681
+ write_group->running.store(group_size);
682
+
683
+ // The minimum number to allow the group use parallel caller mode.
684
+ // The number must no lower than 3;
685
+ const size_t MinParallelSize = 20;
686
+
687
+ // The group_size is too small, and there is no need to have
688
+ // the parallel partial callers.
689
+ if (group_size < MinParallelSize) {
690
+ for (auto w : *write_group) {
691
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
692
+ }
693
+ return;
664
694
  }
695
+
696
+ // The stride is equal to std::sqrt(group_size) which can minimize
697
+ // the total number of leader SetSate.
698
+ // Set the leader itself STATE_PARALLEL_MEMTABLE_WRITER, and set
699
+ // (stride-1) writers to be STATE_PARALLEL_MEMTABLE_CALLER.
700
+ size_t stride = static_cast<size_t>(std::sqrt(group_size));
701
+ auto w = write_group->leader;
702
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
703
+
704
+ for (size_t i = 1; i < stride; i++) {
705
+ w = w->link_newer;
706
+ SetState(w, STATE_PARALLEL_MEMTABLE_CALLER);
707
+ }
708
+
709
+ // After setting all STATE_PARALLEL_MEMTABLE_CALLER, the leader also
710
+ // does the job as STATE_PARALLEL_MEMTABLE_CALLER.
711
+ w = w->link_newer;
712
+ SetMemWritersEachStride(w);
665
713
  }
666
714
 
667
715
  static WriteThread::AdaptationContext cpmtw_ctx(
@@ -788,8 +836,8 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
788
836
  }
789
837
 
790
838
  AwaitState(leader,
791
- STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_WRITER |
792
- STATE_COMPLETED,
839
+ STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_CALLER |
840
+ STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
793
841
  &eabgl_ctx);
794
842
  } else {
795
843
  Writer* head = newest_writer_.load(std::memory_order_acquire);
@@ -22,7 +22,9 @@
22
22
  #include "rocksdb/options.h"
23
23
  #include "rocksdb/status.h"
24
24
  #include "rocksdb/types.h"
25
+ #include "rocksdb/user_write_callback.h"
25
26
  #include "rocksdb/write_batch.h"
27
+ #include "util/aligned_storage.h"
26
28
  #include "util/autovector.h"
27
29
 
28
30
  namespace ROCKSDB_NAMESPACE {
@@ -71,6 +73,12 @@ class WriteThread {
71
73
  // A state indicating that the thread may be waiting using StateMutex()
72
74
  // and StateCondVar()
73
75
  STATE_LOCKED_WAITING = 32,
76
+
77
+ // The state used to inform a waiting writer that it has become a
78
+ // caller to call some other waiting writers to write to memtable
79
+ // by calling SetMemWritersEachStride. After doing
80
+ // this, it will also write to memtable.
81
+ STATE_PARALLEL_MEMTABLE_CALLER = 64,
74
82
  };
75
83
 
76
84
  struct Writer;
@@ -127,6 +135,7 @@ class WriteThread {
127
135
  uint64_t log_used; // log number that this batch was inserted into
128
136
  uint64_t log_ref; // log number that memtable insert should reference
129
137
  WriteCallback* callback;
138
+ UserWriteCallback* user_write_cb;
130
139
  bool made_waitable; // records lazy construction of mutex and cv
131
140
  std::atomic<uint8_t> state; // write under StateMutex() or pre-link
132
141
  WriteGroup* write_group;
@@ -134,8 +143,8 @@ class WriteThread {
134
143
  Status status;
135
144
  Status callback_status; // status returned by callback->Callback()
136
145
 
137
- std::aligned_storage<sizeof(std::mutex)>::type state_mutex_bytes;
138
- std::aligned_storage<sizeof(std::condition_variable)>::type state_cv_bytes;
146
+ aligned_storage<std::mutex>::type state_mutex_bytes;
147
+ aligned_storage<std::condition_variable>::type state_cv_bytes;
139
148
  Writer* link_older; // read/write only before linking, or as leader
140
149
  Writer* link_newer; // lazy, read/write only before linking, or as leader
141
150
 
@@ -153,6 +162,7 @@ class WriteThread {
153
162
  log_used(0),
154
163
  log_ref(0),
155
164
  callback(nullptr),
165
+ user_write_cb(nullptr),
156
166
  made_waitable(false),
157
167
  state(STATE_INIT),
158
168
  write_group(nullptr),
@@ -161,8 +171,8 @@ class WriteThread {
161
171
  link_newer(nullptr) {}
162
172
 
163
173
  Writer(const WriteOptions& write_options, WriteBatch* _batch,
164
- WriteCallback* _callback, uint64_t _log_ref, bool _disable_memtable,
165
- size_t _batch_cnt = 0,
174
+ WriteCallback* _callback, UserWriteCallback* _user_write_cb,
175
+ uint64_t _log_ref, bool _disable_memtable, size_t _batch_cnt = 0,
166
176
  PreReleaseCallback* _pre_release_callback = nullptr,
167
177
  PostMemTableCallback* _post_memtable_callback = nullptr)
168
178
  : batch(_batch),
@@ -180,6 +190,7 @@ class WriteThread {
180
190
  log_used(0),
181
191
  log_ref(_log_ref),
182
192
  callback(_callback),
193
+ user_write_cb(_user_write_cb),
183
194
  made_waitable(false),
184
195
  state(STATE_INIT),
185
196
  write_group(nullptr),
@@ -203,6 +214,18 @@ class WriteThread {
203
214
  return callback_status.ok();
204
215
  }
205
216
 
217
+ void CheckWriteEnqueuedCallback() {
218
+ if (user_write_cb != nullptr) {
219
+ user_write_cb->OnWriteEnqueued();
220
+ }
221
+ }
222
+
223
+ void CheckPostWalWriteCallback() {
224
+ if (user_write_cb != nullptr) {
225
+ user_write_cb->OnWalWriteFinish();
226
+ }
227
+ }
228
+
206
229
  void CreateMutex() {
207
230
  if (!made_waitable) {
208
231
  // Note that made_waitable is tracked separately from state
@@ -323,10 +346,19 @@ class WriteThread {
323
346
  // Causes JoinBatchGroup to return STATE_PARALLEL_MEMTABLE_WRITER for all of
324
347
  // the non-leader members of this write batch group. Sets Writer::sequence
325
348
  // before waking them up.
349
+ // If the size of write_group n is not small, the leader will call n^0.5
350
+ // members to be PARALLEL_MEMTABLE_CALLER in the write_group to help to set
351
+ // other's status parallel. This ensures that the cost to call SetState
352
+ // sequentially does not exceed 2(n^0.5).
326
353
  //
327
354
  // WriteGroup* write_group: Extra state used to coordinate the parallel add
328
355
  void LaunchParallelMemTableWriters(WriteGroup* write_group);
329
356
 
357
+ // One of the every stride=N number writer in the WriteGroup are set to the
358
+ // MemTableWriters, where N is equal to square of the total number of this
359
+ // write_group, and all of these MemTableWriters will write to memtable.
360
+ void SetMemWritersEachStride(Writer* w);
361
+
330
362
  // Reports the completion of w's batch to the parallel group leader, and
331
363
  // waits for the rest of the parallel batch to complete. Returns true
332
364
  // if this thread is the last to complete, and hence should advance
@@ -4,6 +4,7 @@ add_executable(db_stress${ARTIFACT_SUFFIX}
4
4
  db_stress.cc
5
5
  db_stress_common.cc
6
6
  db_stress_driver.cc
7
+ db_stress_filters.cc
7
8
  db_stress_gflags.cc
8
9
  db_stress_listener.cc
9
10
  db_stress_shared_state.cc
@@ -590,6 +590,11 @@ class BatchedOpsStressTest : public StressTest {
590
590
  // For half of the time, set the upper bound to the next prefix
591
591
  ub_slices[i] = upper_bounds[i];
592
592
  ro_copies[i].iterate_upper_bound = &(ub_slices[i]);
593
+ if (FLAGS_use_sqfc_for_range_queries) {
594
+ ro_copies[i].table_filter =
595
+ sqfc_factory_->GetTableFilterForRangeQuery(prefix_slices[i],
596
+ ub_slices[i]);
597
+ }
593
598
  }
594
599
 
595
600
  iters[i].reset(db_->NewIterator(ro_copies[i], cfh));
@@ -73,13 +73,13 @@ class CfConsistencyStressTest : public StressTest {
73
73
  status = db_->Write(write_opts, &batch);
74
74
  }
75
75
 
76
- if (!status.ok()) {
76
+ if (status.ok()) {
77
+ auto num = static_cast<long>(rand_column_families.size());
78
+ thread->stats.AddBytesForWrites(num, (sz + 1) * num);
79
+ } else if (!IsErrorInjectedAndRetryable(status)) {
77
80
  fprintf(stderr, "multi put or merge error: %s\n",
78
81
  status.ToString().c_str());
79
82
  thread->stats.AddErrors(1);
80
- } else {
81
- auto num = static_cast<long>(rand_column_families.size());
82
- thread->stats.AddBytesForWrites(num, (sz + 1) * num);
83
83
  }
84
84
 
85
85
  return status;
@@ -96,11 +96,11 @@ class CfConsistencyStressTest : public StressTest {
96
96
  batch.Delete(cfh, key);
97
97
  }
98
98
  Status s = db_->Write(write_opts, &batch);
99
- if (!s.ok()) {
99
+ if (s.ok()) {
100
+ thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
101
+ } else if (!IsErrorInjectedAndRetryable(s)) {
100
102
  fprintf(stderr, "multidel error: %s\n", s.ToString().c_str());
101
103
  thread->stats.AddErrors(1);
102
- } else {
103
- thread->stats.AddDeletes(static_cast<long>(rand_column_families.size()));
104
104
  }
105
105
  return s;
106
106
  }
@@ -125,12 +125,12 @@ class CfConsistencyStressTest : public StressTest {
125
125
  batch.DeleteRange(cfh, key, end_key);
126
126
  }
127
127
  Status s = db_->Write(write_opts, &batch);
128
- if (!s.ok()) {
129
- fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
130
- thread->stats.AddErrors(1);
131
- } else {
128
+ if (s.ok()) {
132
129
  thread->stats.AddRangeDeletions(
133
130
  static_cast<long>(rand_column_families.size()));
131
+ } else if (!IsErrorInjectedAndRetryable(s)) {
132
+ fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str());
133
+ thread->stats.AddErrors(1);
134
134
  }
135
135
  return s;
136
136
  }
@@ -170,6 +170,15 @@ class CfConsistencyStressTest : public StressTest {
170
170
  std::string value0;
171
171
  s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]],
172
172
  key, &value0);
173
+
174
+ // Temporarily disable error injection for verification
175
+ if (fault_fs_guard) {
176
+ fault_fs_guard->DisableThreadLocalErrorInjection(
177
+ FaultInjectionIOType::kRead);
178
+ fault_fs_guard->DisableThreadLocalErrorInjection(
179
+ FaultInjectionIOType::kMetadataRead);
180
+ }
181
+
173
182
  if (s.ok() || s.IsNotFound()) {
174
183
  bool found = s.ok();
175
184
  for (size_t i = 1; i < rand_column_families.size(); i++) {
@@ -214,6 +223,13 @@ class CfConsistencyStressTest : public StressTest {
214
223
  }
215
224
  }
216
225
 
226
+ // Enable back error injection disabled for verification
227
+ if (fault_fs_guard) {
228
+ fault_fs_guard->EnableThreadLocalErrorInjection(
229
+ FaultInjectionIOType::kRead);
230
+ fault_fs_guard->EnableThreadLocalErrorInjection(
231
+ FaultInjectionIOType::kMetadataRead);
232
+ }
217
233
  db_->ReleaseSnapshot(snapshot);
218
234
  }
219
235
  if (!is_consistent) {
@@ -225,7 +241,7 @@ class CfConsistencyStressTest : public StressTest {
225
241
  thread->stats.AddGets(1, 1);
226
242
  } else if (s.IsNotFound()) {
227
243
  thread->stats.AddGets(1, 0);
228
- } else {
244
+ } else if (!IsErrorInjectedAndRetryable(s)) {
229
245
  fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str());
230
246
  thread->stats.AddErrors(1);
231
247
  }
@@ -261,7 +277,7 @@ class CfConsistencyStressTest : public StressTest {
261
277
  } else if (s.IsNotFound()) {
262
278
  // not found case
263
279
  thread->stats.AddGets(1, 0);
264
- } else {
280
+ } else if (!IsErrorInjectedAndRetryable(s)) {
265
281
  // errors case
266
282
  fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str());
267
283
  thread->stats.AddErrors(1);
@@ -323,6 +339,14 @@ class CfConsistencyStressTest : public StressTest {
323
339
  column_families_[rand_column_families[0]], key,
324
340
  &cmp_result);
325
341
 
342
+ // Temporarily disable error injection for verification
343
+ if (fault_fs_guard) {
344
+ fault_fs_guard->DisableThreadLocalErrorInjection(
345
+ FaultInjectionIOType::kRead);
346
+ fault_fs_guard->DisableThreadLocalErrorInjection(
347
+ FaultInjectionIOType::kMetadataRead);
348
+ }
349
+
326
350
  if (s.ok() || s.IsNotFound()) {
327
351
  const bool cmp_found = s.ok();
328
352
 
@@ -458,6 +482,14 @@ class CfConsistencyStressTest : public StressTest {
458
482
  }
459
483
  }
460
484
  }
485
+
486
+ // Enable back error injection disabled for verification
487
+ if (fault_fs_guard) {
488
+ fault_fs_guard->EnableThreadLocalErrorInjection(
489
+ FaultInjectionIOType::kRead);
490
+ fault_fs_guard->EnableThreadLocalErrorInjection(
491
+ FaultInjectionIOType::kMetadataRead);
492
+ }
461
493
  }
462
494
 
463
495
  if (!is_consistent) {
@@ -469,7 +501,7 @@ class CfConsistencyStressTest : public StressTest {
469
501
  thread->stats.AddGets(1, 1);
470
502
  } else if (s.IsNotFound()) {
471
503
  thread->stats.AddGets(1, 0);
472
- } else {
504
+ } else if (!IsErrorInjectedAndRetryable(s)) {
473
505
  fprintf(stderr, "TestGetEntity error: %s\n", s.ToString().c_str());
474
506
  thread->stats.AddErrors(1);
475
507
  }
@@ -540,7 +572,9 @@ class CfConsistencyStressTest : public StressTest {
540
572
  for (size_t j = 0; j < num_cfs; ++j) {
541
573
  const Status& s = result[j].status();
542
574
  const WideColumns& columns = result[j].columns();
543
- if (!s.ok() && !s.IsNotFound()) {
575
+ if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
576
+ break;
577
+ } else if (!s.ok() && !s.IsNotFound()) {
544
578
  fprintf(stderr, "TestMultiGetEntity (AttributeGroup) error: %s\n",
545
579
  s.ToString().c_str());
546
580
  thread->stats.AddErrors(1);
@@ -645,7 +679,9 @@ class CfConsistencyStressTest : public StressTest {
645
679
  const Status& s = statuses[j];
646
680
  const WideColumns& columns = results[j].columns();
647
681
 
648
- if (!s.ok() && !s.IsNotFound()) {
682
+ if (!s.ok() && IsErrorInjectedAndRetryable(s)) {
683
+ break;
684
+ } else if (!s.ok() && !s.IsNotFound()) {
649
685
  fprintf(stderr, "TestMultiGetEntity error: %s\n",
650
686
  s.ToString().c_str());
651
687
  thread->stats.AddErrors(1);
@@ -746,6 +782,10 @@ class CfConsistencyStressTest : public StressTest {
746
782
  if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) {
747
783
  ub_slice = Slice(upper_bound);
748
784
  ro_copy.iterate_upper_bound = &ub_slice;
785
+ if (FLAGS_use_sqfc_for_range_queries) {
786
+ ro_copy.table_filter =
787
+ sqfc_factory_->GetTableFilterForRangeQuery(prefix, ub_slice);
788
+ }
749
789
  }
750
790
 
751
791
  ColumnFamilyHandle* const cfh =
@@ -776,7 +816,7 @@ class CfConsistencyStressTest : public StressTest {
776
816
  s = iter->status();
777
817
  }
778
818
 
779
- if (!s.ok()) {
819
+ if (!s.ok() && !IsErrorInjectedAndRetryable(s)) {
780
820
  fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
781
821
  thread->stats.AddErrors(1);
782
822