couchbase 4.2.5 → 4.2.6-dev.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. package/deps/couchbase-cxx-client/CMakeLists.txt +9 -1
  2. package/deps/couchbase-cxx-client/bin/api.rb +234 -0
  3. package/deps/couchbase-cxx-client/bin/create-search-index +18 -135
  4. package/deps/couchbase-cxx-client/bin/init-cluster +17 -139
  5. package/deps/couchbase-cxx-client/bin/load-sample-buckets +54 -0
  6. package/deps/couchbase-cxx-client/core/cluster.hxx +33 -12
  7. package/deps/couchbase-cxx-client/core/cluster_options.hxx +3 -0
  8. package/deps/couchbase-cxx-client/core/crud_component.cxx +51 -22
  9. package/deps/couchbase-cxx-client/core/impl/build_deferred_query_indexes.cxx +115 -50
  10. package/deps/couchbase-cxx-client/core/impl/cluster.cxx +6 -0
  11. package/deps/couchbase-cxx-client/core/impl/create_bucket.cxx +155 -0
  12. package/deps/couchbase-cxx-client/core/impl/create_query_index.cxx +172 -59
  13. package/deps/couchbase-cxx-client/core/impl/dns_srv_tracker.cxx +2 -1
  14. package/deps/couchbase-cxx-client/core/impl/drop_bucket.cxx +66 -0
  15. package/deps/couchbase-cxx-client/core/impl/drop_query_index.cxx +138 -59
  16. package/deps/couchbase-cxx-client/core/impl/flush_bucket.cxx +66 -0
  17. package/deps/couchbase-cxx-client/core/impl/get_all_buckets.cxx +163 -0
  18. package/deps/couchbase-cxx-client/core/impl/get_all_query_indexes.cxx +67 -37
  19. package/deps/couchbase-cxx-client/core/impl/get_bucket.cxx +153 -0
  20. package/deps/couchbase-cxx-client/core/impl/internal_manager_error_context.cxx +113 -0
  21. package/deps/couchbase-cxx-client/core/impl/internal_manager_error_context.hxx +60 -0
  22. package/deps/couchbase-cxx-client/core/impl/key_value_error_category.cxx +2 -4
  23. package/deps/couchbase-cxx-client/core/impl/manager_error_context.cxx +100 -0
  24. package/deps/couchbase-cxx-client/core/impl/query.cxx +1 -0
  25. package/deps/couchbase-cxx-client/core/impl/update_bucket.cxx +130 -0
  26. package/deps/couchbase-cxx-client/core/impl/watch_query_indexes.cxx +53 -29
  27. package/deps/couchbase-cxx-client/core/io/dns_client.cxx +71 -38
  28. package/deps/couchbase-cxx-client/core/io/dns_config.cxx +5 -4
  29. package/deps/couchbase-cxx-client/core/io/mcbp_session.cxx +5 -6
  30. package/deps/couchbase-cxx-client/core/meta/features.hxx +6 -0
  31. package/deps/couchbase-cxx-client/core/operations/document_query.cxx +11 -0
  32. package/deps/couchbase-cxx-client/core/operations/document_query.hxx +1 -0
  33. package/deps/couchbase-cxx-client/core/origin.cxx +270 -0
  34. package/deps/couchbase-cxx-client/core/origin.hxx +2 -0
  35. package/deps/couchbase-cxx-client/core/protocol/status.cxx +2 -2
  36. package/deps/couchbase-cxx-client/core/range_scan_options.cxx +3 -27
  37. package/deps/couchbase-cxx-client/core/range_scan_options.hxx +13 -17
  38. package/deps/couchbase-cxx-client/core/range_scan_orchestrator.cxx +367 -170
  39. package/deps/couchbase-cxx-client/core/range_scan_orchestrator.hxx +13 -2
  40. package/deps/couchbase-cxx-client/core/range_scan_orchestrator_options.hxx +5 -3
  41. package/deps/couchbase-cxx-client/core/scan_options.hxx +0 -19
  42. package/deps/couchbase-cxx-client/core/scan_result.cxx +19 -5
  43. package/deps/couchbase-cxx-client/core/scan_result.hxx +5 -2
  44. package/deps/couchbase-cxx-client/core/timeout_defaults.hxx +2 -3
  45. package/deps/couchbase-cxx-client/core/topology/capabilities.hxx +1 -0
  46. package/deps/couchbase-cxx-client/core/topology/capabilities_fmt.hxx +2 -0
  47. package/deps/couchbase-cxx-client/core/topology/collections_manifest_fmt.hxx +1 -1
  48. package/deps/couchbase-cxx-client/core/topology/configuration.hxx +5 -0
  49. package/deps/couchbase-cxx-client/core/topology/configuration_json.hxx +2 -0
  50. package/deps/couchbase-cxx-client/core/utils/connection_string.cxx +4 -0
  51. package/deps/couchbase-cxx-client/couchbase/behavior_options.hxx +19 -2
  52. package/deps/couchbase-cxx-client/couchbase/bucket_manager.hxx +135 -0
  53. package/deps/couchbase-cxx-client/couchbase/build_query_index_options.hxx +0 -30
  54. package/deps/couchbase-cxx-client/couchbase/cluster.hxx +14 -0
  55. package/deps/couchbase-cxx-client/couchbase/collection_query_index_manager.hxx +7 -48
  56. package/deps/couchbase-cxx-client/couchbase/create_bucket_options.hxx +41 -0
  57. package/deps/couchbase-cxx-client/couchbase/create_primary_query_index_options.hxx +0 -29
  58. package/deps/couchbase-cxx-client/couchbase/create_query_index_options.hxx +0 -33
  59. package/deps/couchbase-cxx-client/couchbase/drop_bucket_options.hxx +41 -0
  60. package/deps/couchbase-cxx-client/couchbase/drop_primary_query_index_options.hxx +0 -30
  61. package/deps/couchbase-cxx-client/couchbase/drop_query_index_options.hxx +0 -31
  62. package/deps/couchbase-cxx-client/couchbase/error_codes.hxx +1 -2
  63. package/deps/couchbase-cxx-client/couchbase/flush_bucket_options.hxx +41 -0
  64. package/deps/couchbase-cxx-client/couchbase/get_all_buckets_options.hxx +44 -0
  65. package/deps/couchbase-cxx-client/couchbase/get_all_query_indexes_options.hxx +0 -30
  66. package/deps/couchbase-cxx-client/couchbase/get_bucket_options.hxx +43 -0
  67. package/deps/couchbase-cxx-client/couchbase/management/bucket_settings.hxx +116 -0
  68. package/deps/couchbase-cxx-client/couchbase/manager_error_context.hxx +29 -53
  69. package/deps/couchbase-cxx-client/couchbase/query_index_manager.hxx +16 -83
  70. package/deps/couchbase-cxx-client/couchbase/query_options.hxx +18 -0
  71. package/deps/couchbase-cxx-client/couchbase/security_options.hxx +15 -0
  72. package/deps/couchbase-cxx-client/couchbase/update_bucket_options.hxx +41 -0
  73. package/deps/couchbase-cxx-client/couchbase/watch_query_indexes_options.hxx +0 -31
  74. package/deps/couchbase-cxx-client/docs/cbc-analytics.md +1 -0
  75. package/deps/couchbase-cxx-client/docs/cbc-get.md +1 -0
  76. package/deps/couchbase-cxx-client/docs/cbc-pillowfight.md +1 -0
  77. package/deps/couchbase-cxx-client/docs/cbc-query.md +1 -0
  78. package/deps/couchbase-cxx-client/docs/cbc.md +10 -0
  79. package/deps/couchbase-cxx-client/test/CMakeLists.txt +1 -0
  80. package/deps/couchbase-cxx-client/test/test_integration_collections.cxx +6 -0
  81. package/deps/couchbase-cxx-client/test/test_integration_crud.cxx +5 -0
  82. package/deps/couchbase-cxx-client/test/test_integration_examples.cxx +137 -1
  83. package/deps/couchbase-cxx-client/test/test_integration_management.cxx +709 -266
  84. package/deps/couchbase-cxx-client/test/test_integration_query.cxx +19 -7
  85. package/deps/couchbase-cxx-client/test/test_integration_range_scan.cxx +351 -112
  86. package/deps/couchbase-cxx-client/test/test_integration_search.cxx +10 -1
  87. package/deps/couchbase-cxx-client/test/test_transaction_public_async_api.cxx +13 -12
  88. package/deps/couchbase-cxx-client/test/test_transaction_public_blocking_api.cxx +27 -21
  89. package/deps/couchbase-cxx-client/test/test_unit_query.cxx +75 -0
  90. package/deps/couchbase-cxx-client/test/utils/server_version.hxx +5 -0
  91. package/deps/couchbase-cxx-client/test/utils/wait_until.cxx +29 -10
  92. package/deps/couchbase-cxx-client/test/utils/wait_until.hxx +3 -1
  93. package/deps/couchbase-cxx-client/tools/utils.cxx +4 -1
  94. package/dist/binding.d.ts +21 -16
  95. package/dist/binding.js +1 -4
  96. package/dist/bindingutilities.d.ts +6 -1
  97. package/dist/bindingutilities.js +36 -1
  98. package/dist/collection.d.ts +65 -3
  99. package/dist/collection.js +107 -0
  100. package/dist/crudoptypes.d.ts +34 -0
  101. package/dist/crudoptypes.js +18 -1
  102. package/dist/queryexecutor.js +1 -0
  103. package/dist/querytypes.d.ts +7 -0
  104. package/dist/rangeScan.d.ts +107 -0
  105. package/dist/rangeScan.js +91 -0
  106. package/dist/streamablepromises.d.ts +6 -0
  107. package/dist/streamablepromises.js +25 -1
  108. package/package.json +13 -13
  109. package/src/addondata.hpp +1 -0
  110. package/src/binding.cpp +5 -2
  111. package/src/connection.cpp +108 -2
  112. package/src/connection.hpp +1 -0
  113. package/src/constants.cpp +2 -12
  114. package/src/jstocbpp_autogen.hpp +49 -22
  115. package/src/jstocbpp_basic.hpp +2 -8
  116. package/src/mutationtoken.cpp +13 -0
  117. package/src/scan_iterator.cpp +90 -0
  118. package/src/scan_iterator.hpp +30 -0
  119. package/tools/gen-bindings-json.py +9 -8
  120. package/deps/couchbase-cxx-client/core/impl/collection_query_index_manager.cxx +0 -93
@@ -19,8 +19,6 @@
19
19
  #include "core/logger/logger.hxx"
20
20
  #include "couchbase/error_codes.hxx"
21
21
 
22
- #include <couchbase/retry_strategy.hxx>
23
-
24
22
  #include <asio/bind_executor.hpp>
25
23
  #include <asio/io_context.hpp>
26
24
  #include <asio/post.hpp>
@@ -30,6 +28,7 @@
30
28
  #include <gsl/narrow>
31
29
 
32
30
  #include <future>
31
+ #include <random>
33
32
 
34
33
  namespace couchbase::core
35
34
  {
@@ -55,6 +54,14 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
55
54
  {
56
55
  struct failed {
57
56
  std::error_code ec;
57
+ bool fatal{ true };
58
+ };
59
+
60
+ struct not_started {
61
+ };
62
+
63
+ struct awaiting_retry {
64
+ std::error_code ec;
58
65
  };
59
66
 
60
67
  struct running {
@@ -68,32 +75,73 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
68
75
  range_scan_stream(asio::io_context& io,
69
76
  agent kv_provider,
70
77
  std::uint16_t vbucket_id,
78
+ std::int16_t node_id,
71
79
  range_scan_create_options create_options,
72
- range_scan_continue_options continue_options)
80
+ range_scan_continue_options continue_options,
81
+ std::shared_ptr<scan_stream_manager> stream_manager)
73
82
  : items_{ io, continue_options.batch_item_limit }
74
83
  , agent_{ std::move(kv_provider) }
75
84
  , vbucket_id_{ vbucket_id }
85
+ , node_id_{ node_id }
76
86
  , create_options_{ std::move(create_options) }
77
87
  , continue_options_{ std::move(continue_options) }
88
+ , stream_manager_{ std::move(stream_manager) }
78
89
  {
79
90
  }
80
91
 
81
92
  void start()
82
93
  {
94
+ // Fail the stream if more time since the timeout has elapsed since the stream was first attempted (if this is a retry)
95
+ if (first_attempt_timestamp_.has_value()) {
96
+ if (std::chrono::steady_clock::now() - first_attempt_timestamp_.value() > create_options_.timeout) {
97
+ CB_LOG_DEBUG("stream for vbucket_id {} cannot be retried any longer because it has exceeded the timeout", vbucket_id_);
98
+ state_ = failed{ errc::common::unambiguous_timeout, !is_sampling_scan() };
99
+ stream_manager_->stream_start_failed(node_id_, error_is_fatal());
100
+ drain_waiting_queue();
101
+ return;
102
+ }
103
+ } else {
104
+ first_attempt_timestamp_ = std::chrono::steady_clock::now();
105
+ }
106
+
107
+ CB_LOG_TRACE("starting stream {} in node {}", vbucket_id_, node_id_);
108
+ state_ = std::monostate{};
83
109
  if (std::holds_alternative<range_scan>(create_options_.scan_type) && !last_seen_key_.empty()) {
84
- std::get<range_scan>(create_options_.scan_type).start_.id = last_seen_key_;
110
+ std::get<range_scan>(create_options_.scan_type).from = scan_term{ last_seen_key_ };
85
111
  }
86
112
 
87
113
  auto op = agent_.range_scan_create(vbucket_id_, create_options_, [self = shared_from_this()](auto res, auto ec) {
88
114
  if (ec) {
89
- self->state_ = failed{ ec };
90
- self->drain_waiting_queue();
91
115
  if (ec == errc::key_value::document_not_found) {
116
+ // Benign error
92
117
  CB_LOG_DEBUG("ignoring vbucket_id {} because no documents exist for it", self->vbucket_id_);
118
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
119
+ self->state_ = failed{ ec, false };
120
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
121
+ } else if (ec == errc::common::temporary_failure) {
122
+ // Retryable error
123
+ CB_LOG_DEBUG("received busy status from vbucket with ID {} - reducing concurrency & will retry", self->vbucket_id_);
124
+ CB_LOG_TRACE("setting state for stream {} to AWAITING_RETRY", self->vbucket_id_);
125
+ self->state_ = awaiting_retry{ ec };
126
+ self->stream_manager_->stream_start_failed_awaiting_retry(self->node_id_, self->vbucket_id_);
127
+ } else if (ec == errc::common::internal_server_failure || ec == errc::common::collection_not_found) {
128
+ // Fatal errors
129
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
130
+ self->state_ = failed{ ec, true };
131
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
132
+ } else {
133
+ // Unexpected errors
134
+ CB_LOG_DEBUG(
135
+ "received unexpected error {} from stream for vbucket {} ({})", ec.value(), self->vbucket_id_, ec.message());
136
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
137
+ self->state_ = failed{ ec, true };
138
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
93
139
  }
140
+ self->drain_waiting_queue();
94
141
  return;
95
142
  }
96
143
  self->state_ = running{ std::move(res.scan_uuid) };
144
+ CB_LOG_TRACE("setting state for stream {} to RUNNING", self->vbucket_id_);
97
145
  self->drain_waiting_queue();
98
146
  self->resume();
99
147
  });
@@ -105,86 +153,126 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
105
153
  if (is_running()) {
106
154
  agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
107
155
  }
108
- state_ = failed{ ec };
156
+
157
+ items_.cancel();
109
158
  items_.close();
159
+
160
+ bool fatal{};
161
+ if (ec == errc::key_value::document_not_found || ec == errc::common::authentication_failure ||
162
+ ec == errc::common::collection_not_found || ec == errc::common::request_canceled) {
163
+ // Errors that are fatal unless this is a sampling scan
164
+ fatal = !is_sampling_scan();
165
+ } else if (ec == errc::common::feature_not_available || ec == errc::common::invalid_argument ||
166
+ ec == errc::common::temporary_failure) {
167
+ // Errors that are always fatal
168
+ fatal = true;
169
+ } else {
170
+ // Unexpected error - always fatal
171
+ CB_LOG_DEBUG("received unexpected error {} from stream for vbucket during range scan continue {} ({})",
172
+ ec.value(),
173
+ vbucket_id_,
174
+ ec.message());
175
+ fatal = true;
176
+ }
177
+
178
+ CB_LOG_TRACE("setting state for stream {} to FAILED after range scan continue", vbucket_id_);
179
+ state_ = failed{ ec, fatal };
180
+ stream_manager_->stream_continue_failed(node_id_, fatal);
110
181
  }
111
182
  }
112
183
 
184
+ void mark_not_started()
185
+ {
186
+ state_ = not_started{};
187
+ }
188
+
113
189
  void complete()
114
190
  {
115
191
  if (!is_failed() && !is_completed()) {
192
+ CB_LOG_TRACE("setting state for stream {} to COMPLETED", vbucket_id_);
193
+
194
+ stream_manager_->stream_completed(node_id_);
116
195
  state_ = completed{};
196
+ drain_waiting_queue();
117
197
  }
118
198
  }
119
199
 
120
- auto pop() -> std::optional<range_scan_item>
200
+ void cancel()
121
201
  {
122
- if (peeked_) {
123
- std::optional<range_scan_item> item{};
124
- std::swap(peeked_, item);
125
- return item;
202
+ if (!should_cancel_) {
203
+ should_cancel_ = true;
204
+ items_.cancel();
205
+ items_.close();
126
206
  }
127
- return peeked_;
128
207
  }
129
208
 
130
209
  template<typename Handler>
131
- void peek(Handler&& handler)
210
+ void take(Handler&& handler)
132
211
  {
133
212
  do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
134
- self->peek_when_ready(std::forward<Handler>(handler));
213
+ self->take_when_ready(std::forward<Handler>(handler));
135
214
  });
136
215
  }
137
216
 
138
- template<typename Handler>
139
- void take(Handler&& handler)
217
+ [[nodiscard]] auto node_id() const -> int16_t
140
218
  {
141
- do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
142
- self->take_when_ready(std::forward<Handler>(handler));
143
- });
219
+ return node_id_;
144
220
  }
145
221
 
146
- private:
147
- template<typename Handler>
148
- void peek_when_ready(Handler&& handler)
222
+ [[nodiscard]] auto is_ready() const -> bool
149
223
  {
150
- if (is_failed()) {
151
- return handler(std::optional<range_scan_item>{});
152
- }
224
+ return !std::holds_alternative<std::monostate>(state_);
225
+ }
153
226
 
154
- if (peeked_) {
155
- return handler(peeked_);
156
- }
227
+ [[nodiscard]] auto is_not_started() const -> bool
228
+ {
229
+ return std::holds_alternative<not_started>(state_);
230
+ }
157
231
 
158
- if (is_completed() && !items_.ready()) {
159
- return handler(std::optional<range_scan_item>{});
160
- }
232
+ [[nodiscard]] auto is_awaiting_retry() const -> bool
233
+ {
234
+ return std::holds_alternative<awaiting_retry>(state_);
235
+ }
161
236
 
162
- items_.async_receive(
163
- [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
164
- if (ec) {
165
- self->peeked_ = {};
166
- } else {
167
- self->peeked_ = std::move(item);
168
- }
169
- handler(self->peeked_);
170
- });
237
+ [[nodiscard]] auto is_running() const -> bool
238
+ {
239
+ return std::holds_alternative<running>(state_);
240
+ }
241
+
242
+ [[nodiscard]] auto is_failed() const -> bool
243
+ {
244
+ return std::holds_alternative<failed>(state_);
171
245
  }
172
246
 
247
+ [[nodiscard]] auto is_completed() const -> bool
248
+ {
249
+ return std::holds_alternative<completed>(state_);
250
+ }
251
+
252
+ private:
173
253
  template<typename Handler>
174
254
  void take_when_ready(Handler&& handler)
175
255
  {
256
+
176
257
  if (is_failed()) {
177
- return handler(std::optional<range_scan_item>{}, false);
258
+ if (error_is_fatal()) {
259
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{ error() });
260
+ } else {
261
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
262
+ }
263
+ }
264
+ if (is_awaiting_retry() || is_not_started()) {
265
+ return handler(std::optional<range_scan_item>{}, true, std::optional<std::error_code>{});
178
266
  }
179
267
  if (!items_.ready()) {
180
- return handler(std::optional<range_scan_item>{}, is_running());
268
+ return handler(std::optional<range_scan_item>{}, is_running(), std::optional<std::error_code>{});
181
269
  }
182
270
  items_.async_receive(
183
271
  [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
184
272
  if (ec) {
185
- return handler(std::optional<range_scan_item>{}, false);
273
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
186
274
  }
187
- handler(std::optional<range_scan_item>{ std::move(item) }, true);
275
+ handler(std::optional<range_scan_item>{ std::move(item) }, true, std::optional<std::error_code>{});
188
276
  });
189
277
  }
190
278
 
@@ -211,6 +299,13 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
211
299
  if (!is_running()) {
212
300
  return;
213
301
  }
302
+ if (should_cancel_) {
303
+ agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
304
+ items_.close();
305
+ items_.cancel();
306
+ return;
307
+ }
308
+
214
309
  agent_.range_scan_continue(
215
310
  uuid(),
216
311
  vbucket_id_,
@@ -236,26 +331,6 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
236
331
  });
237
332
  }
238
333
 
239
- [[nodiscard]] auto is_ready() const -> bool
240
- {
241
- return !std::holds_alternative<std::monostate>(state_);
242
- }
243
-
244
- [[nodiscard]] auto is_running() const -> bool
245
- {
246
- return std::holds_alternative<running>(state_);
247
- }
248
-
249
- [[nodiscard]] auto is_failed() const -> bool
250
- {
251
- return std::holds_alternative<failed>(state_);
252
- }
253
-
254
- [[nodiscard]] auto is_completed() const -> bool
255
- {
256
- return std::holds_alternative<completed>(state_);
257
- }
258
-
259
334
  [[nodiscard]] auto uuid() const -> std::vector<std::byte>
260
335
  {
261
336
  if (is_running()) {
@@ -272,150 +347,297 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
272
347
  return {};
273
348
  }
274
349
 
350
+ [[nodiscard]] auto error_is_fatal() const -> bool
351
+ {
352
+ if (is_failed()) {
353
+ return std::get<failed>(state_).fatal;
354
+ }
355
+ return {};
356
+ }
357
+
358
+ [[nodiscard]] auto is_sampling_scan() const -> bool
359
+ {
360
+ return std::holds_alternative<sampling_scan>(create_options_.scan_type);
361
+ }
362
+
275
363
  asio::experimental::concurrent_channel<void(std::error_code, range_scan_item)> items_;
276
364
  agent agent_;
277
365
  std::uint16_t vbucket_id_;
366
+ std::int16_t node_id_;
278
367
  range_scan_create_options create_options_;
279
368
  range_scan_continue_options continue_options_;
280
- std::vector<std::byte> last_seen_key_{};
281
- std::variant<std::monostate, failed, running, completed> state_{};
282
- std::optional<range_scan_item> peeked_{};
369
+ std::shared_ptr<scan_stream_manager> stream_manager_;
370
+ std::string last_seen_key_{};
371
+ std::variant<std::monostate, not_started, failed, awaiting_retry, running, completed> state_{};
372
+ bool should_cancel_{ false };
373
+ std::optional<std::chrono::time_point<std::chrono::steady_clock>> first_attempt_timestamp_{};
283
374
  std::vector<utils::movable_function<void()>> waiting_queue_{};
284
375
  };
285
376
 
286
- struct lowest_item {
287
- std::uint16_t vbucket_id;
288
- std::vector<std::byte> key;
289
- };
290
-
291
- static auto
292
- less(std::vector<std::byte>& a, std::vector<std::byte>& b) -> bool
293
- {
294
- auto common_size = std::min(a.size(), b.size());
295
- for (std::size_t i = 0; i < common_size; ++i) {
296
- if (a[i] < b[i]) {
297
- return true;
298
- }
299
- if (a[i] > b[i]) {
300
- return false;
301
- }
302
- }
303
- return a.size() < b.size();
304
- }
305
-
306
377
  class range_scan_orchestrator_impl
307
378
  : public std::enable_shared_from_this<range_scan_orchestrator_impl>
308
379
  , public range_scan_item_iterator
380
+ , public scan_stream_manager
309
381
  {
310
382
  public:
311
383
  range_scan_orchestrator_impl(asio::io_context& io,
312
384
  agent kv_provider,
313
- std::size_t num_vbuckets,
385
+ topology::configuration::vbucket_map vbucket_map,
314
386
  std::string scope_name,
315
387
  std::string collection_name,
316
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
388
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
317
389
  range_scan_orchestrator_options options)
318
390
  : io_{ io }
319
391
  , agent_{ std::move(kv_provider) }
320
- , num_vbuckets_{ num_vbuckets }
392
+ , vbucket_map_{ std::move(vbucket_map) }
321
393
  , scope_name_{ std::move(scope_name) }
322
394
  , collection_name_{ std::move(collection_name) }
323
395
  , scan_type_{ std::move(scan_type) }
324
396
  , options_{ std::move(options) }
325
397
  , vbucket_to_snapshot_requirements_{ mutation_state_to_snapshot_requirements(options_.consistent_with) }
398
+ , concurrency_{ options_.concurrency }
326
399
  {
400
+
327
401
  if (std::holds_alternative<sampling_scan>(scan_type_)) {
328
- item_limit = std::get<sampling_scan>(scan_type).limit;
402
+ item_limit_ = std::get<sampling_scan>(scan_type).limit;
329
403
  }
330
404
  }
331
405
 
332
406
  auto scan() -> tl::expected<scan_result, std::error_code>
333
407
  {
334
- if (item_limit == 0) {
408
+ if (item_limit_ == 0) {
409
+ return tl::unexpected(errc::common::invalid_argument);
410
+ }
411
+ if (concurrency_ <= 0) {
335
412
  return tl::unexpected(errc::common::invalid_argument);
336
413
  }
337
- range_scan_continue_options continue_options{
338
- options_.batch_item_limit, options_.batch_byte_limit, options_.batch_time_limit, options_.retry_strategy, options_.ids_only,
414
+
415
+ auto batch_time_limit = std::chrono::duration_cast<std::chrono::milliseconds>(0.9 * options_.timeout);
416
+ range_scan_continue_options const continue_options{
417
+ options_.batch_item_limit, options_.batch_byte_limit, batch_time_limit, options_.timeout, options_.retry_strategy,
339
418
  };
340
- continue_options.batch_time_limit = std::chrono::seconds{ 10 };
341
- for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(num_vbuckets_); ++vbucket) {
419
+ for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(vbucket_map_.size()); ++vbucket) {
420
+ const range_scan_create_options create_options{
421
+ scope_name_, collection_name_, scan_type_, options_.timeout, {}, vbucket_to_snapshot_requirements_[vbucket],
422
+ options_.ids_only, options_.retry_strategy,
423
+ };
424
+
425
+ // Get the active node for the vbucket (values in vbucket map are the active node id followed by the ids of the replicas)
426
+ auto node_id = vbucket_map_[vbucket][0];
427
+
342
428
  auto stream = std::make_shared<range_scan_stream>(io_,
343
429
  agent_,
344
430
  vbucket,
345
- range_scan_create_options{
346
- scope_name_,
347
- collection_name_,
348
- scan_type_,
349
- options_.timeout,
350
- {},
351
- vbucket_to_snapshot_requirements_[vbucket],
352
- options_.ids_only,
353
- options_.retry_strategy,
354
- },
355
- continue_options);
431
+ node_id,
432
+ create_options,
433
+ continue_options,
434
+ std::static_pointer_cast<scan_stream_manager>(shared_from_this()));
356
435
  streams_[vbucket] = stream;
357
- stream->start();
436
+ streams_[vbucket]->mark_not_started();
437
+ if (stream_count_per_node_.count(node_id) == 0) {
438
+ stream_count_per_node_[node_id] = 0;
439
+ }
358
440
  }
441
+ start_streams(concurrency_);
359
442
 
360
443
  return scan_result(shared_from_this());
361
444
  }
362
445
 
363
- auto next() -> std::future<std::optional<range_scan_item>> override
446
+ void cancel() override
364
447
  {
365
- auto barrier = std::make_shared<std::promise<std::optional<range_scan_item>>>();
366
- if (item_limit == 0 || item_limit-- == 0) {
367
- barrier->set_value(std::nullopt);
368
- streams_.clear();
448
+ cancelled_ = true;
449
+ for (const auto& [vbucket_id, stream] : streams_) {
450
+ stream->cancel();
451
+ }
452
+ }
453
+
454
+ bool is_cancelled() override
455
+ {
456
+ return cancelled_;
457
+ }
458
+
459
+ auto next() -> std::future<tl::expected<range_scan_item, std::error_code>> override
460
+ {
461
+ auto barrier = std::make_shared<std::promise<tl::expected<range_scan_item, std::error_code>>>();
462
+ if (item_limit_ == 0 || item_limit_-- == 0) {
463
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
464
+ cancel();
369
465
  } else {
370
- if (options_.sort == scan_sort::none) {
371
- next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
372
- } else {
373
- next_item_sorted(
374
- {}, streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
375
- }
466
+ next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item, std::optional<std::error_code> ec) {
467
+ if (item) {
468
+ barrier->set_value(std::move(item.value()));
469
+ } else if (ec) {
470
+ barrier->set_value(tl::unexpected{ ec.value() });
471
+ } else {
472
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
473
+ }
474
+ });
376
475
  }
377
476
  return barrier->get_future();
378
477
  }
379
478
 
380
479
  void next(utils::movable_function<void(range_scan_item, std::error_code)> callback) override
381
480
  {
382
- auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item) mutable {
481
+ auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item, std::optional<std::error_code> ec) mutable {
383
482
  if (item) {
384
483
  callback(std::move(item.value()), {});
484
+ } else if (ec) {
485
+ callback({}, ec.value());
385
486
  } else {
386
487
  callback({}, errc::key_value::range_scan_completed);
387
488
  }
388
489
  };
389
- if (item_limit == 0 || item_limit-- == 0) {
390
- handler({});
490
+ if (item_limit_ == 0 || item_limit_-- == 0) {
491
+ handler({}, {});
492
+ cancel();
391
493
  } else {
392
- if (options_.sort == scan_sort::none) {
393
- next_item(streams_.begin(), std::move(handler));
394
- } else {
395
- next_item_sorted({}, streams_.begin(), std::move(handler));
494
+ next_item(streams_.begin(), std::move(handler));
495
+ }
496
+ }
497
+
498
+ void start_streams(std::uint16_t stream_count)
499
+ {
500
+ std::lock_guard<std::recursive_mutex> const lock(stream_start_mutex_);
501
+
502
+ if (cancelled_) {
503
+ CB_LOG_TRACE("scan has been cancelled, do not start another stream");
504
+ return;
505
+ }
506
+
507
+ if (stream_count_per_node_.empty()) {
508
+ CB_LOG_TRACE("no more vbuckets to scan");
509
+ return;
510
+ }
511
+
512
+ std::uint16_t counter = 0;
513
+ while (counter < stream_count) {
514
+ // Find the node with the least number of active streams from those recorded in stream_count_per_node_
515
+ int16_t least_busy_node{};
516
+ {
517
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
518
+
519
+ // Pick a random node
520
+ std::random_device rd;
521
+ std::mt19937_64 gen(rd());
522
+ std::uniform_int_distribution<std::size_t> dis(0, stream_count_per_node_.size() - 1);
523
+ auto it = stream_count_per_node_.begin();
524
+ std::advance(it, static_cast<decltype(stream_count_per_node_)::difference_type>(dis(gen)));
525
+ least_busy_node = it->first;
526
+
527
+ // If any other node has fewer streams running use that
528
+ for (const auto& [node_id, count] : stream_count_per_node_) {
529
+ if (count < stream_count_per_node_[least_busy_node]) {
530
+ least_busy_node = node_id;
531
+ }
532
+ }
396
533
  }
534
+
535
+ std::shared_ptr<range_scan_stream> stream{};
536
+ {
537
+ std::lock_guard<std::mutex> const stream_map_lock(stream_map_mutex_);
538
+
539
+ for (const auto& [v, s] : streams_) {
540
+ if ((s->is_not_started() || s->is_awaiting_retry()) && (s->node_id() == least_busy_node)) {
541
+ CB_LOG_TRACE("selected vbucket {} to scan", v);
542
+ stream = s;
543
+ break;
544
+ }
545
+ }
546
+ }
547
+
548
+ if (stream == nullptr) {
549
+ CB_LOG_TRACE("no vbuckets to scan for node {}", least_busy_node);
550
+ {
551
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
552
+ stream_count_per_node_.erase(least_busy_node);
553
+ }
554
+ return start_streams(static_cast<std::uint16_t>(stream_count - counter));
555
+ }
556
+
557
+ auto node_id = stream->node_id();
558
+ active_stream_count_++;
559
+ stream_count_per_node_[node_id]++;
560
+ stream->start();
561
+ counter++;
562
+ }
563
+ }
564
+
565
+ void stream_start_failed(std::int16_t node_id, bool fatal) override
566
+ {
567
+ stream_no_longer_running(node_id);
568
+ if (fatal) {
569
+ cancel();
570
+ } else {
571
+ start_streams(1);
572
+ }
573
+ }
574
+
575
+ void stream_start_failed_awaiting_retry(std::int16_t node_id, std::uint16_t /* vbucket_id */) override
576
+ {
577
+ {
578
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
579
+ if (stream_count_per_node_.count(node_id) == 0) {
580
+ stream_count_per_node_[node_id] = 1;
581
+ }
582
+ }
583
+ stream_no_longer_running(node_id);
584
+ if (active_stream_count_ == 0) {
585
+ start_streams(1);
586
+ }
587
+ }
588
+
589
+ void stream_continue_failed(std::int16_t node_id, bool fatal) override
590
+ {
591
+ stream_no_longer_running(node_id);
592
+ if (fatal) {
593
+ cancel();
594
+ } else {
595
+ start_streams(1);
397
596
  }
398
597
  }
399
598
 
599
+ void stream_completed(std::int16_t node_id) override
600
+ {
601
+ stream_no_longer_running(node_id);
602
+ start_streams(1);
603
+ }
604
+
400
605
  private:
606
+ void stream_no_longer_running(std::int16_t node_id)
607
+ {
608
+ {
609
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
610
+ if (stream_count_per_node_.count(node_id) > 0) {
611
+ stream_count_per_node_[node_id]--;
612
+ }
613
+ }
614
+ active_stream_count_--;
615
+ }
616
+
401
617
  template<typename Iterator, typename Handler>
402
618
  void next_item(Iterator it, Handler&& handler)
403
619
  {
404
- if (streams_.empty()) {
405
- return handler({});
620
+ if (streams_.empty() || cancelled_) {
621
+ return handler({}, {});
406
622
  }
407
623
  auto vbucket_id = it->first;
408
624
  auto stream = it->second;
409
625
  stream->take([it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
410
- auto item, bool has_more) mutable {
626
+ auto item, bool has_more, auto ec) mutable {
627
+ if (ec) {
628
+ // Fatal error
629
+ self->streams_.clear();
630
+ return handler({}, ec);
631
+ }
411
632
  if (!has_more) {
633
+ std::lock_guard<std::mutex> const lock(self->stream_map_mutex_);
412
634
  self->streams_.erase(vbucket_id);
413
635
  }
414
636
  if (item) {
415
- return handler(std::move(item));
637
+ return handler(std::move(item), {});
416
638
  }
417
639
  if (self->streams_.empty()) {
418
- return handler({});
640
+ return handler({}, {});
419
641
  }
420
642
  if (it == self->streams_.end()) {
421
643
  it = self->streams_.begin();
@@ -426,60 +648,35 @@ class range_scan_orchestrator_impl
426
648
  });
427
649
  }
428
650
 
429
- template<typename Iterator, typename Handler>
430
- void next_item_sorted(std::optional<lowest_item> lowest, Iterator it, Handler&& handler)
431
- {
432
- if (streams_.empty()) {
433
- return handler({});
434
- }
435
- auto vbucket_id = it->first;
436
- auto stream = it->second;
437
- stream->peek(
438
- [lowest = std::move(lowest), it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
439
- auto item) mutable {
440
- if (item) {
441
- if (!lowest || less(item->key, lowest->key)) {
442
- lowest = { vbucket_id, item->key };
443
- }
444
- } else {
445
- self->streams_.erase(vbucket_id);
446
- }
447
-
448
- if (it != self->streams_.end()) {
449
- return asio::post(asio::bind_executor(
450
- self->io_, [lowest = std::move(lowest), it, self, handler = std::forward<Handler>(handler)]() mutable {
451
- self->next_item_sorted(std::move(lowest), it, std::forward<Handler>(handler));
452
- }));
453
- } else if (lowest) {
454
- return handler(self->streams_[lowest->vbucket_id]->pop());
455
- } else {
456
- return handler({});
457
- }
458
- });
459
- }
460
-
461
651
  asio::io_context& io_;
462
652
  agent agent_;
463
- std::size_t num_vbuckets_;
653
+ topology::configuration::vbucket_map vbucket_map_;
464
654
  std::string scope_name_;
465
655
  std::string collection_name_;
466
- std::variant<std::monostate, range_scan, sampling_scan> scan_type_;
656
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type_;
467
657
  range_scan_orchestrator_options options_;
468
658
  std::map<std::size_t, std::optional<range_snapshot_requirements>> vbucket_to_snapshot_requirements_;
469
659
  std::map<std::uint16_t, std::shared_ptr<range_scan_stream>> streams_{};
470
- std::size_t item_limit{ std::numeric_limits<size_t>::max() };
660
+ std::map<std::int16_t, std::atomic_uint16_t> stream_count_per_node_{};
661
+ std::recursive_mutex stream_start_mutex_{};
662
+ std::mutex stream_map_mutex_{};
663
+ std::mutex stream_count_per_node_mutex_{};
664
+ std::atomic_uint16_t active_stream_count_ = 0;
665
+ std::uint16_t concurrency_ = 1;
666
+ std::size_t item_limit_{ std::numeric_limits<size_t>::max() };
667
+ bool cancelled_{ false };
471
668
  };
472
669
 
473
670
  range_scan_orchestrator::range_scan_orchestrator(asio::io_context& io,
474
671
  agent kv_provider,
475
- std::size_t num_vbuckets,
672
+ topology::configuration::vbucket_map vbucket_map,
476
673
  std::string scope_name,
477
674
  std::string collection_name,
478
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
675
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
479
676
  range_scan_orchestrator_options options)
480
677
  : impl_{ std::make_shared<range_scan_orchestrator_impl>(io,
481
678
  std::move(kv_provider),
482
- num_vbuckets,
679
+ std::move(vbucket_map),
483
680
  std::move(scope_name),
484
681
  std::move(collection_name),
485
682
  std::move(scan_type),