rocksdb-native 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/binding.c +92 -10
  2. package/index.js +9 -0
  3. package/lib/batch.js +11 -1
  4. package/lib/iterator.js +3 -1
  5. package/lib/snapshot.js +21 -0
  6. package/package.json +1 -1
  7. package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
  8. package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
  9. package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
  10. package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
  11. package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
  12. package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
  13. package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
  14. package/prebuilds/linux-x64/rocksdb-native.node +0 -0
  15. package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
  16. package/prebuilds/win32-x64/rocksdb-native.node +0 -0
  17. package/vendor/librocksdb/include/rocksdb.h +38 -4
  18. package/vendor/librocksdb/src/rocksdb.cc +114 -14
  19. package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
  20. package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
  21. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
  22. package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
  23. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
  24. package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
  25. package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
  26. package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
  27. package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
  28. package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
  29. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
  30. package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
  31. package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
  32. package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
  33. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
  34. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
  35. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
  36. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
  37. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
  38. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
  39. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  40. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
  41. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
  42. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
  43. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
  44. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
  45. package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
  46. package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
  47. package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
  48. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
  49. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
  50. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
  51. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
  52. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
  53. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
  54. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
  55. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
  56. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
  57. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
  58. package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
  59. package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
  60. package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
  61. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
  62. package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
  63. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
  64. package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
  65. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
  66. package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
  67. package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
  68. package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
  69. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
  70. package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
  71. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
  72. package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
  73. package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
  74. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
  75. package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
  76. package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
  77. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
  78. package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
  79. package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
  80. package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
  81. package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
  82. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
  83. package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
  84. package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
  85. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
  86. package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
  87. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
  88. package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
  89. package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
  90. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
  91. package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
  92. package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
  93. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
  94. package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
  95. package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
  96. package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
  97. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
  98. package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
  99. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
  100. package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
  101. package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
  102. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
  103. package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
  104. package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
  105. package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
  106. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
  107. package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
  108. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
  109. package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
  110. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
  111. package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
  112. package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
  113. package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
  114. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
  115. package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
  116. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
  117. package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
  118. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
  119. package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
  120. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
  121. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
  122. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
  123. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
  124. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
  125. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
  126. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
  127. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
  128. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
  129. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
  130. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
  131. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
  132. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
  133. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
  134. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
  135. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
  136. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
  137. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
  138. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
  139. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
  140. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
  141. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
  142. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
  143. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
  144. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
  145. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
  146. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
  147. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
  148. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
  149. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
  150. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
  151. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
  152. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
  153. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
  154. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
  155. package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
  156. package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
  157. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
  158. package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
  159. package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
  160. package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
  161. package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
  162. package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
  163. package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
  164. package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
  165. package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
  166. package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
  167. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
  168. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
  169. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
  170. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
  171. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
  172. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
  173. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
  174. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
  175. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
  176. package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
  177. package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
  178. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
  179. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
  180. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
  181. package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
  182. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
  183. package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
  184. package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
  185. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
  186. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
  187. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
  188. package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
  189. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
  190. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
  191. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
  192. package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
  193. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
  194. package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
  195. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
  196. package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
  197. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
  198. package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
  199. package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
  200. package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
  201. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
  202. package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
  203. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
  204. package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
  205. package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
  206. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
  207. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
  208. package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
  209. package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
  210. package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
  211. package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
  212. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
  213. package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
  214. package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
  215. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
  216. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
  217. package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
  218. package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
  219. package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
  220. package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
  221. package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
  222. package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
  223. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
  224. package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
  225. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
  226. package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
  227. package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
  228. package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
  229. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
  230. package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
  231. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
  232. package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
  233. package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
  234. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
  235. package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
  236. package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
  237. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
  238. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
  239. package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
  240. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
  241. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
  242. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
  243. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
  244. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
  245. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
  246. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
  247. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
  248. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
  249. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
  250. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
  251. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
  252. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
  253. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
  254. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
  255. package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
  256. package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
  257. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
  258. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
  259. package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
  260. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
  261. package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
@@ -404,6 +404,8 @@ void WriteThread::JoinBatchGroup(Writer* w) {
404
404
 
405
405
  bool linked_as_leader = LinkOne(w, &newest_writer_);
406
406
 
407
+ w->CheckWriteEnqueuedCallback();
408
+
407
409
  if (linked_as_leader) {
408
410
  SetState(w, STATE_GROUP_LEADER);
409
411
  }
@@ -428,6 +430,7 @@ void WriteThread::JoinBatchGroup(Writer* w) {
428
430
  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:BeganWaiting", w);
429
431
  AwaitState(w,
430
432
  STATE_GROUP_LEADER | STATE_MEMTABLE_WRITER_LEADER |
433
+ STATE_PARALLEL_MEMTABLE_CALLER |
431
434
  STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
432
435
  &jbg_ctx);
433
436
  TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w);
@@ -656,12 +659,57 @@ void WriteThread::ExitAsMemTableWriter(Writer* /*self*/,
656
659
  SetState(leader, STATE_COMPLETED);
657
660
  }
658
661
 
662
+ void WriteThread::SetMemWritersEachStride(Writer* w) {
663
+ WriteGroup* write_group = w->write_group;
664
+ Writer* last_writer = write_group->last_writer;
665
+
666
+ // The stride is the same for each writer in write_group, so w will
667
+ // call the writers with the same number in write_group mod total size
668
+ size_t stride = static_cast<size_t>(std::sqrt(write_group->size));
669
+ size_t count = 0;
670
+ while (w) {
671
+ if (count++ % stride == 0) {
672
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
673
+ }
674
+ w = (w == last_writer) ? nullptr : w->link_newer;
675
+ }
676
+ }
677
+
659
678
  void WriteThread::LaunchParallelMemTableWriters(WriteGroup* write_group) {
660
679
  assert(write_group != nullptr);
661
- write_group->running.store(write_group->size);
662
- for (auto w : *write_group) {
663
- SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
680
+ size_t group_size = write_group->size;
681
+ write_group->running.store(group_size);
682
+
683
+ // The minimum number to allow the group use parallel caller mode.
684
+ // The number must no lower than 3;
685
+ const size_t MinParallelSize = 20;
686
+
687
+ // The group_size is too small, and there is no need to have
688
+ // the parallel partial callers.
689
+ if (group_size < MinParallelSize) {
690
+ for (auto w : *write_group) {
691
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
692
+ }
693
+ return;
664
694
  }
695
+
696
+ // The stride is equal to std::sqrt(group_size) which can minimize
697
+ // the total number of leader SetSate.
698
+ // Set the leader itself STATE_PARALLEL_MEMTABLE_WRITER, and set
699
+ // (stride-1) writers to be STATE_PARALLEL_MEMTABLE_CALLER.
700
+ size_t stride = static_cast<size_t>(std::sqrt(group_size));
701
+ auto w = write_group->leader;
702
+ SetState(w, STATE_PARALLEL_MEMTABLE_WRITER);
703
+
704
+ for (size_t i = 1; i < stride; i++) {
705
+ w = w->link_newer;
706
+ SetState(w, STATE_PARALLEL_MEMTABLE_CALLER);
707
+ }
708
+
709
+ // After setting all STATE_PARALLEL_MEMTABLE_CALLER, the leader also
710
+ // does the job as STATE_PARALLEL_MEMTABLE_CALLER.
711
+ w = w->link_newer;
712
+ SetMemWritersEachStride(w);
665
713
  }
666
714
 
667
715
  static WriteThread::AdaptationContext cpmtw_ctx(
@@ -788,8 +836,8 @@ void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group,
788
836
  }
789
837
 
790
838
  AwaitState(leader,
791
- STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_WRITER |
792
- STATE_COMPLETED,
839
+ STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_CALLER |
840
+ STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED,
793
841
  &eabgl_ctx);
794
842
  } else {
795
843
  Writer* head = newest_writer_.load(std::memory_order_acquire);
@@ -22,7 +22,9 @@
22
22
  #include "rocksdb/options.h"
23
23
  #include "rocksdb/status.h"
24
24
  #include "rocksdb/types.h"
25
+ #include "rocksdb/user_write_callback.h"
25
26
  #include "rocksdb/write_batch.h"
27
+ #include "util/aligned_storage.h"
26
28
  #include "util/autovector.h"
27
29
 
28
30
  namespace ROCKSDB_NAMESPACE {
@@ -71,6 +73,12 @@ class WriteThread {
71
73
  // A state indicating that the thread may be waiting using StateMutex()
72
74
  // and StateCondVar()
73
75
  STATE_LOCKED_WAITING = 32,
76
+
77
+ // The state used to inform a waiting writer that it has become a
78
+ // caller to call some other waiting writers to write to memtable
79
+ // by calling SetMemWritersEachStride. After doing
80
+ // this, it will also write to memtable.
81
+ STATE_PARALLEL_MEMTABLE_CALLER = 64,
74
82
  };
75
83
 
76
84
  struct Writer;
@@ -127,6 +135,7 @@ class WriteThread {
127
135
  uint64_t log_used; // log number that this batch was inserted into
128
136
  uint64_t log_ref; // log number that memtable insert should reference
129
137
  WriteCallback* callback;
138
+ UserWriteCallback* user_write_cb;
130
139
  bool made_waitable; // records lazy construction of mutex and cv
131
140
  std::atomic<uint8_t> state; // write under StateMutex() or pre-link
132
141
  WriteGroup* write_group;
@@ -134,8 +143,8 @@ class WriteThread {
134
143
  Status status;
135
144
  Status callback_status; // status returned by callback->Callback()
136
145
 
137
- std::aligned_storage<sizeof(std::mutex)>::type state_mutex_bytes;
138
- std::aligned_storage<sizeof(std::condition_variable)>::type state_cv_bytes;
146
+ aligned_storage<std::mutex>::type state_mutex_bytes;
147
+ aligned_storage<std::condition_variable>::type state_cv_bytes;
139
148
  Writer* link_older; // read/write only before linking, or as leader
140
149
  Writer* link_newer; // lazy, read/write only before linking, or as leader
141
150
 
@@ -153,6 +162,7 @@ class WriteThread {
153
162
  log_used(0),
154
163
  log_ref(0),
155
164
  callback(nullptr),
165
+ user_write_cb(nullptr),
156
166
  made_waitable(false),
157
167
  state(STATE_INIT),
158
168
  write_group(nullptr),
@@ -161,8 +171,8 @@ class WriteThread {
161
171
  link_newer(nullptr) {}
162
172
 
163
173
  Writer(const WriteOptions& write_options, WriteBatch* _batch,
164
- WriteCallback* _callback, uint64_t _log_ref, bool _disable_memtable,
165
- size_t _batch_cnt = 0,
174
+ WriteCallback* _callback, UserWriteCallback* _user_write_cb,
175
+ uint64_t _log_ref, bool _disable_memtable, size_t _batch_cnt = 0,
166
176
  PreReleaseCallback* _pre_release_callback = nullptr,
167
177
  PostMemTableCallback* _post_memtable_callback = nullptr)
168
178
  : batch(_batch),
@@ -180,6 +190,7 @@ class WriteThread {
180
190
  log_used(0),
181
191
  log_ref(_log_ref),
182
192
  callback(_callback),
193
+ user_write_cb(_user_write_cb),
183
194
  made_waitable(false),
184
195
  state(STATE_INIT),
185
196
  write_group(nullptr),
@@ -203,6 +214,18 @@ class WriteThread {
203
214
  return callback_status.ok();
204
215
  }
205
216
 
217
+ void CheckWriteEnqueuedCallback() {
218
+ if (user_write_cb != nullptr) {
219
+ user_write_cb->OnWriteEnqueued();
220
+ }
221
+ }
222
+
223
+ void CheckPostWalWriteCallback() {
224
+ if (user_write_cb != nullptr) {
225
+ user_write_cb->OnWalWriteFinish();
226
+ }
227
+ }
228
+
206
229
  void CreateMutex() {
207
230
  if (!made_waitable) {
208
231
  // Note that made_waitable is tracked separately from state
@@ -323,10 +346,19 @@ class WriteThread {
323
346
  // Causes JoinBatchGroup to return STATE_PARALLEL_MEMTABLE_WRITER for all of
324
347
  // the non-leader members of this write batch group. Sets Writer::sequence
325
348
  // before waking them up.
349
+ // If the size of write_group n is not small, the leader will call n^0.5
350
+ // members to be PARALLEL_MEMTABLE_CALLER in the write_group to help to set
351
+ // other's status parallel. This ensures that the cost to call SetState
352
+ // sequentially does not exceed 2(n^0.5).
326
353
  //
327
354
  // WriteGroup* write_group: Extra state used to coordinate the parallel add
328
355
  void LaunchParallelMemTableWriters(WriteGroup* write_group);
329
356
 
357
+ // One of the every stride=N number writer in the WriteGroup are set to the
358
+ // MemTableWriters, where N is equal to square of the total number of this
359
+ // write_group, and all of these MemTableWriters will write to memtable.
360
+ void SetMemWritersEachStride(Writer* w);
361
+
330
362
  // Reports the completion of w's batch to the parallel group leader, and
331
363
  // waits for the rest of the parallel batch to complete. Returns true
332
364
  // if this thread is the last to complete, and hence should advance
@@ -18,6 +18,13 @@
18
18
 
19
19
  namespace ROCKSDB_NAMESPACE {
20
20
 
21
+ // This class supports abstracting different types of an `Env`'s functionality
22
+ // into separate interfaces. It is constructed with a `FileSystem` and a
23
+ // `SystemClock` and delegates:
24
+ // * File system operations to member `file_system_`.
25
+ // * Time related misc operations to member `clock_`.
26
+ // A subclass needs to inherit `CompositeEnv` and provide implementations for
27
+ // the thread management related APIs.
21
28
  class CompositeEnv : public Env {
22
29
  public:
23
30
  // Initialize a CompositeEnvWrapper that delegates all thread/time related
@@ -250,6 +257,20 @@ class CompositeEnv : public Env {
250
257
  }
251
258
  };
252
259
 
260
+ // A `CompositeEnvWrapper` is constructed with a target `Env` object, an
261
+ // optional `FileSystem` object and an optional `SystemClock` object.
262
+ // `Env::GetFileSystem()` is a fallback file system if no such object is
263
+ // explicitly provided. Similarly, `Env::GetSystemClock()` is a fallback system
264
+ // clock.
265
+ // Besides delegating corresponding functionality to `file_system_` and `clock_`
266
+ // which is inherited from `CompositeEnv`, it also implements the thread
267
+ // management APIs by delegating them to the target `Env` object.
268
+ //
269
+ // Effectively, this class helps to support using customized file system
270
+ // implementations such as a remote file system instead of the default file
271
+ // system provided by the operating system.
272
+ //
273
+ // Also see public API `NewCompositeEnv` in rocksdb/include/env.h
253
274
  class CompositeEnvWrapper : public CompositeEnv {
254
275
  public:
255
276
  // Initialize a CompositeEnvWrapper that delegates all thread/time related
@@ -355,6 +355,12 @@ class LegacyDirectoryWrapper : public FSDirectory {
355
355
  std::unique_ptr<Directory> target_;
356
356
  };
357
357
 
358
+ // A helper class to make legacy `Env` implementations be backward compatible
359
+ // now that all `Env` implementations are expected to have a `FileSystem` type
360
+ // member `file_system_` and a `SystemClock` type member `clock_`.
361
+ // This class wraps a legacy `Env` object and expose its file system related
362
+ // APIs as a `FileSystem` interface. Also check `LegacySystemClock` that does
363
+ // the same thing for the clock related APIs.
358
364
  class LegacyFileSystemWrapper : public FileSystem {
359
365
  public:
360
366
  // Initialize an EnvWrapper that delegates all calls to *t
@@ -825,6 +831,15 @@ WritableFile::~WritableFile() = default;
825
831
 
826
832
  MemoryMappedFileBuffer::~MemoryMappedFileBuffer() = default;
827
833
 
834
+ // This const variable can be used in public headers without introducing the
835
+ // possibility of ODR violations due to varying macro definitions.
836
+ const InfoLogLevel Logger::kDefaultLogLevel =
837
+ #ifdef NDEBUG
838
+ INFO_LEVEL;
839
+ #else
840
+ DEBUG_LEVEL;
841
+ #endif // NDEBUG
842
+
828
843
  Logger::~Logger() = default;
829
844
 
830
845
  Status Logger::Close() {
@@ -0,0 +1,331 @@
1
+ // Copyright (c) 2024-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "env/fs_on_demand.h"
7
+
8
+ #include <algorithm>
9
+ #include <set>
10
+
11
+ #include "file/filename.h"
12
+ #include "port/port.h"
13
+ #include "rocksdb/types.h"
14
+
15
+ namespace ROCKSDB_NAMESPACE {
16
+ // Check if the input path is under orig (typically the local directory), and if
17
+ // so, change it to the equivalent path under replace (typically the remote
18
+ // directory). For example, if orig is "/data/follower", replace is
19
+ // "/data/leader", and the given path is "/data/follower/000010.sst", on return
20
+ // the path would be changed to
21
+ // "/data/leader/000010.sst".
22
+ // Return value is true if the path was modified, false otherwise
23
+ bool OnDemandFileSystem::CheckPathAndAdjust(const std::string& orig,
24
+ const std::string& replace,
25
+ std::string& path) {
26
+ size_t pos = path.find(orig);
27
+ if (pos > 0) {
28
+ return false;
29
+ }
30
+ path.replace(pos, orig.length(), replace);
31
+ return true;
32
+ }
33
+
34
+ bool OnDemandFileSystem::LookupFileType(const std::string& name,
35
+ FileType* type) {
36
+ std::size_t found = name.find_last_of('/');
37
+ std::string file_name = name.substr(found);
38
+ uint64_t number = 0;
39
+ return ParseFileName(file_name, &number, type);
40
+ }
41
+
42
+ // RocksDB opens non-SST files for reading in sequential file mode. This
43
+ // includes CURRENT, OPTIONS, MANIFEST etc. For these files, we open them
44
+ // in place in the source directory. For files that are appendable or
45
+ // can be renamed, which is MANIFEST and CURRENT files, we wrap the
46
+ // underlying FSSequentialFile with another class that checks when EOF
47
+ // has been reached and re-opens the file to see the latest data. On some
48
+ // distributed file systems, this is necessary.
49
+ IOStatus OnDemandFileSystem::NewSequentialFile(
50
+ const std::string& fname, const FileOptions& file_opts,
51
+ std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
52
+ FileType type;
53
+ static std::unordered_set<FileType> valid_types(
54
+ {kWalFile, kDescriptorFile, kCurrentFile, kIdentityFile, kOptionsFile});
55
+ if (!LookupFileType(fname, &type) ||
56
+ (valid_types.find(type) == valid_types.end())) {
57
+ return IOStatus::NotSupported();
58
+ }
59
+
60
+ IOStatus s;
61
+ std::string rname = fname;
62
+ if (CheckPathAndAdjust(local_path_, remote_path_, rname)) {
63
+ // First clear any local directory cache as it may be out of date
64
+ target()->DiscardCacheForDirectory(rname);
65
+
66
+ std::unique_ptr<FSSequentialFile> inner_file;
67
+ s = target()->NewSequentialFile(rname, file_opts, &inner_file, dbg);
68
+ if (s.ok() && type == kDescriptorFile) {
69
+ result->reset(new OnDemandSequentialFile(std::move(inner_file), this,
70
+ file_opts, rname));
71
+ } else {
72
+ *result = std::move(inner_file);
73
+ }
74
+ } else {
75
+ s = target()->NewSequentialFile(fname, file_opts, result, dbg);
76
+ }
77
+ return s;
78
+ }
79
+
80
+ // This is only supported for SST files. If the file is present locally,
81
+ // i.e in the destination dir, we just open it and return. If its in the
82
+ // remote, i.e source dir, we link it locally and open the link.
83
+ // TODO: Add support for blob files belonging to the new BlobDB
84
+ IOStatus OnDemandFileSystem::NewRandomAccessFile(
85
+ const std::string& fname, const FileOptions& file_opts,
86
+ std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
87
+ FileType type;
88
+ if (!LookupFileType(fname, &type) || type != kTableFile) {
89
+ return IOStatus::NotSupported();
90
+ }
91
+
92
+ IOStatus s = target()->FileExists(fname, file_opts.io_options, nullptr);
93
+ if (s.IsNotFound() || s.IsPathNotFound()) {
94
+ std::string rname = fname;
95
+ if (CheckPathAndAdjust(local_path_, remote_path_, rname)) {
96
+ // First clear any local directory cache as it may be out of date
97
+ target()->DiscardCacheForDirectory(rname);
98
+
99
+ s = target()->LinkFile(rname, fname, IOOptions(), nullptr);
100
+ if (!s.ok()) {
101
+ return s;
102
+ }
103
+ }
104
+ }
105
+
106
+ return s.ok() ? target()->NewRandomAccessFile(fname, file_opts, result, dbg)
107
+ : s;
108
+ }
109
+
110
+ // We don't expect to create any writable file other than info LOG files.
111
+ IOStatus OnDemandFileSystem::NewWritableFile(
112
+ const std::string& fname, const FileOptions& file_opts,
113
+ std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
114
+ FileType type;
115
+ if (!LookupFileType(fname, &type) || type != kInfoLogFile) {
116
+ return IOStatus::NotSupported();
117
+ }
118
+
119
+ std::string rname = fname;
120
+ if (CheckPathAndAdjust(local_path_, remote_path_, rname)) {
121
+ // First clear any local directory cache as it may be out of date
122
+ target()->DiscardCacheForDirectory(rname);
123
+
124
+ IOStatus s = target()->FileExists(rname, file_opts.io_options, dbg);
125
+ if (s.ok()) {
126
+ return IOStatus::InvalidArgument(
127
+ "Writing to a file present in the remote directory not supoprted");
128
+ }
129
+ }
130
+
131
+ return target()->NewWritableFile(fname, file_opts, result, dbg);
132
+ }
133
+
134
+ // Currently not supported, as there's no need for RocksDB to create a
135
+ // directory object for a DB in read-only mode.
136
+ IOStatus OnDemandFileSystem::NewDirectory(
137
+ const std::string& /*name*/, const IOOptions& /*io_opts*/,
138
+ std::unique_ptr<FSDirectory>* /*result*/, IODebugContext* /*dbg*/) {
139
+ return IOStatus::NotSupported();
140
+ }
141
+
142
+ // Check if the given file exists, either locally or remote. If the file is an
143
+ // SST file, then link it locally. We assume if the file existence is being
144
+ // checked, its for verification purposes, for example while replaying the
145
+ // MANIFEST. The file will be opened for reading some time in the future.
146
+ IOStatus OnDemandFileSystem::FileExists(const std::string& fname,
147
+ const IOOptions& options,
148
+ IODebugContext* dbg) {
149
+ IOStatus s = target()->FileExists(fname, options, dbg);
150
+ if (!s.IsNotFound() && !s.IsPathNotFound()) {
151
+ return s;
152
+ }
153
+
154
+ std::string rname = fname;
155
+ if (CheckPathAndAdjust(local_path_, remote_path_, rname)) {
156
+ // First clear any local directory cache as it may be out of date
157
+ target()->DiscardCacheForDirectory(rname);
158
+
159
+ FileType type;
160
+ if (LookupFileType(fname, &type) && type == kTableFile) {
161
+ s = target()->LinkFile(rname, fname, options, dbg);
162
+ } else {
163
+ s = target()->FileExists(rname, options, dbg);
164
+ }
165
+ }
166
+ return s;
167
+ }
168
+
169
+ // Doa listing of both the local and remote directories and merge the two.
170
+ IOStatus OnDemandFileSystem::GetChildren(const std::string& dir,
171
+ const IOOptions& options,
172
+ std::vector<std::string>* result,
173
+ IODebugContext* dbg) {
174
+ std::string rdir = dir;
175
+ IOStatus s = target()->GetChildren(dir, options, result, dbg);
176
+ if (!s.ok() || !CheckPathAndAdjust(local_path_, remote_path_, rdir)) {
177
+ return s;
178
+ }
179
+
180
+ std::vector<std::string> rchildren;
181
+ // First clear any local directory cache as it may be out of date
182
+ target()->DiscardCacheForDirectory(rdir);
183
+ s = target()->GetChildren(rdir, options, &rchildren, dbg);
184
+ if (s.ok()) {
185
+ std::for_each(rchildren.begin(), rchildren.end(), [&](std::string& name) {
186
+ // Adjust name
187
+ (void)CheckPathAndAdjust(remote_path_, local_path_, name);
188
+ });
189
+ std::sort(result->begin(), result->end());
190
+ std::sort(rchildren.begin(), rchildren.end());
191
+
192
+ std::vector<std::string> output;
193
+ output.reserve(result->size() + rchildren.size());
194
+ std::set_union(result->begin(), result->end(), rchildren.begin(),
195
+ rchildren.end(), std::back_inserter(output));
196
+ *result = std::move(output);
197
+ }
198
+ return s;
199
+ }
200
+
201
+ // Doa listing of both the local and remote directories and merge the two.
202
+ IOStatus OnDemandFileSystem::GetChildrenFileAttributes(
203
+ const std::string& dir, const IOOptions& options,
204
+ std::vector<FileAttributes>* result, IODebugContext* dbg) {
205
+ std::string rdir = dir;
206
+ IOStatus s = target()->GetChildrenFileAttributes(dir, options, result, dbg);
207
+ if (!s.ok() || !CheckPathAndAdjust(local_path_, remote_path_, rdir)) {
208
+ return s;
209
+ }
210
+
211
+ std::vector<FileAttributes> rchildren;
212
+ // First clear any local directory cache as it may be out of date
213
+ target()->DiscardCacheForDirectory(rdir);
214
+ s = target()->GetChildrenFileAttributes(rdir, options, &rchildren, dbg);
215
+ if (s.ok()) {
216
+ struct FileAttributeSorter {
217
+ bool operator()(const FileAttributes& lhs, const FileAttributes& rhs) {
218
+ return lhs.name < rhs.name;
219
+ }
220
+ } file_attr_sorter;
221
+
222
+ std::for_each(
223
+ rchildren.begin(), rchildren.end(), [&](FileAttributes& file) {
224
+ // Adjust name
225
+ (void)CheckPathAndAdjust(remote_path_, local_path_, file.name);
226
+ });
227
+ std::sort(result->begin(), result->end(), file_attr_sorter);
228
+ std::sort(rchildren.begin(), rchildren.end(), file_attr_sorter);
229
+
230
+ std::vector<FileAttributes> output;
231
+ output.reserve(result->size() + rchildren.size());
232
+ std::set_union(rchildren.begin(), rchildren.end(), result->begin(),
233
+ result->end(), std::back_inserter(output), file_attr_sorter);
234
+ *result = std::move(output);
235
+ }
236
+ return s;
237
+ }
238
+
239
+ IOStatus OnDemandFileSystem::GetFileSize(const std::string& fname,
240
+ const IOOptions& options,
241
+ uint64_t* file_size,
242
+ IODebugContext* dbg) {
243
+ uint64_t local_size = 0;
244
+ IOStatus s = target()->GetFileSize(fname, options, &local_size, dbg);
245
+ if (!s.ok() && !s.IsNotFound() && !s.IsPathNotFound()) {
246
+ return s;
247
+ }
248
+
249
+ if (s.IsNotFound() || s.IsPathNotFound()) {
250
+ std::string rname = fname;
251
+ if (CheckPathAndAdjust(local_path_, remote_path_, rname)) {
252
+ // First clear any local directory cache as it may be out of date
253
+ target()->DiscardCacheForDirectory(rname);
254
+
255
+ FileType type;
256
+ if (LookupFileType(fname, &type) && type == kTableFile) {
257
+ s = target()->LinkFile(rname, fname, options, dbg);
258
+ if (s.ok()) {
259
+ s = target()->GetFileSize(fname, options, &local_size, dbg);
260
+ }
261
+ } else {
262
+ s = target()->GetFileSize(rname, options, &local_size, dbg);
263
+ }
264
+ }
265
+ }
266
+ *file_size = local_size;
267
+ return s;
268
+ }
269
+
270
+ // An implementation of Read that tracks whether we've reached EOF. If so,
271
+ // re-open the file to try to read past the previous EOF offset. After
272
+ // re-opening, positing it back to the last read offset.
273
+ IOStatus OnDemandSequentialFile::Read(size_t n, const IOOptions& options,
274
+ Slice* result, char* scratch,
275
+ IODebugContext* dbg) {
276
+ IOStatus s;
277
+ if (eof_) {
278
+ // Reopen the file. With some distributed file systems, this is required
279
+ // in order to get the new size
280
+ file_.reset();
281
+ s = fs_->NewSequentialFile(path_, file_opts_, &file_, dbg);
282
+ if (!s.ok()) {
283
+ return IOStatus::IOError("While opening file after relinking, got error ",
284
+ s.ToString());
285
+ }
286
+ s = file_->Skip(offset_);
287
+ if (!s.ok()) {
288
+ return IOStatus::IOError(
289
+ "While seeking to offset" + std::to_string(offset_) + "got error",
290
+ s.ToString());
291
+ }
292
+ eof_ = false;
293
+ }
294
+
295
+ s = file_->Read(n, options, result, scratch, dbg);
296
+ if (s.ok()) {
297
+ offset_ += result->size();
298
+ if (result->size() < n) {
299
+ // We reached EOF. Mark it so we know to relink next time
300
+ eof_ = true;
301
+ }
302
+ }
303
+ return s;
304
+ }
305
+
306
+ IOStatus OnDemandSequentialFile::Skip(uint64_t n) {
307
+ IOStatus s = file_->Skip(n);
308
+ if (s.ok()) {
309
+ offset_ += n;
310
+ }
311
+ return s;
312
+ }
313
+
314
+ bool OnDemandSequentialFile::use_direct_io() const {
315
+ return file_->use_direct_io();
316
+ }
317
+
318
+ size_t OnDemandSequentialFile::GetRequiredBufferAlignment() const {
319
+ return file_->GetRequiredBufferAlignment();
320
+ }
321
+
322
+ Temperature OnDemandSequentialFile::GetTemperature() const {
323
+ return file_->GetTemperature();
324
+ }
325
+
326
+ std::shared_ptr<FileSystem> NewOnDemandFileSystem(
327
+ const std::shared_ptr<FileSystem>& fs, std::string src_path,
328
+ std::string dest_path) {
329
+ return std::make_shared<OnDemandFileSystem>(fs, src_path, dest_path);
330
+ }
331
+ } // namespace ROCKSDB_NAMESPACE