couchbase 4.2.5-dev.3 → 4.2.6-dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +81 -9
  2. package/deps/couchbase-cxx-client/CMakeLists.txt +9 -1
  3. package/deps/couchbase-cxx-client/bin/api.rb +234 -0
  4. package/deps/couchbase-cxx-client/bin/create-search-index +18 -135
  5. package/deps/couchbase-cxx-client/bin/init-cluster +17 -139
  6. package/deps/couchbase-cxx-client/bin/load-sample-buckets +54 -0
  7. package/deps/couchbase-cxx-client/core/cluster.hxx +33 -12
  8. package/deps/couchbase-cxx-client/core/cluster_options.hxx +3 -0
  9. package/deps/couchbase-cxx-client/core/crud_component.cxx +51 -22
  10. package/deps/couchbase-cxx-client/core/impl/build_deferred_query_indexes.cxx +115 -50
  11. package/deps/couchbase-cxx-client/core/impl/cluster.cxx +6 -0
  12. package/deps/couchbase-cxx-client/core/impl/create_bucket.cxx +155 -0
  13. package/deps/couchbase-cxx-client/core/impl/create_query_index.cxx +172 -59
  14. package/deps/couchbase-cxx-client/core/impl/dns_srv_tracker.cxx +2 -1
  15. package/deps/couchbase-cxx-client/core/impl/drop_bucket.cxx +66 -0
  16. package/deps/couchbase-cxx-client/core/impl/drop_query_index.cxx +138 -59
  17. package/deps/couchbase-cxx-client/core/impl/flush_bucket.cxx +66 -0
  18. package/deps/couchbase-cxx-client/core/impl/get_all_buckets.cxx +163 -0
  19. package/deps/couchbase-cxx-client/core/impl/get_all_query_indexes.cxx +67 -37
  20. package/deps/couchbase-cxx-client/core/impl/get_bucket.cxx +153 -0
  21. package/deps/couchbase-cxx-client/core/impl/internal_manager_error_context.cxx +113 -0
  22. package/deps/couchbase-cxx-client/core/impl/internal_manager_error_context.hxx +60 -0
  23. package/deps/couchbase-cxx-client/core/impl/key_value_error_category.cxx +2 -4
  24. package/deps/couchbase-cxx-client/core/impl/manager_error_context.cxx +100 -0
  25. package/deps/couchbase-cxx-client/core/impl/query.cxx +1 -0
  26. package/deps/couchbase-cxx-client/core/impl/update_bucket.cxx +130 -0
  27. package/deps/couchbase-cxx-client/core/impl/watch_query_indexes.cxx +53 -29
  28. package/deps/couchbase-cxx-client/core/io/dns_client.cxx +71 -38
  29. package/deps/couchbase-cxx-client/core/io/dns_config.cxx +5 -4
  30. package/deps/couchbase-cxx-client/core/io/mcbp_session.cxx +5 -6
  31. package/deps/couchbase-cxx-client/core/meta/features.hxx +6 -0
  32. package/deps/couchbase-cxx-client/core/operations/document_query.cxx +11 -0
  33. package/deps/couchbase-cxx-client/core/operations/document_query.hxx +1 -0
  34. package/deps/couchbase-cxx-client/core/origin.cxx +270 -0
  35. package/deps/couchbase-cxx-client/core/origin.hxx +2 -0
  36. package/deps/couchbase-cxx-client/core/protocol/status.cxx +2 -2
  37. package/deps/couchbase-cxx-client/core/range_scan_options.cxx +3 -27
  38. package/deps/couchbase-cxx-client/core/range_scan_options.hxx +13 -17
  39. package/deps/couchbase-cxx-client/core/range_scan_orchestrator.cxx +367 -170
  40. package/deps/couchbase-cxx-client/core/range_scan_orchestrator.hxx +13 -2
  41. package/deps/couchbase-cxx-client/core/range_scan_orchestrator_options.hxx +5 -3
  42. package/deps/couchbase-cxx-client/core/scan_options.hxx +0 -19
  43. package/deps/couchbase-cxx-client/core/scan_result.cxx +19 -5
  44. package/deps/couchbase-cxx-client/core/scan_result.hxx +5 -2
  45. package/deps/couchbase-cxx-client/core/timeout_defaults.hxx +2 -3
  46. package/deps/couchbase-cxx-client/core/topology/capabilities.hxx +1 -0
  47. package/deps/couchbase-cxx-client/core/topology/capabilities_fmt.hxx +2 -0
  48. package/deps/couchbase-cxx-client/core/topology/collections_manifest_fmt.hxx +1 -1
  49. package/deps/couchbase-cxx-client/core/topology/configuration.hxx +5 -0
  50. package/deps/couchbase-cxx-client/core/topology/configuration_json.hxx +2 -0
  51. package/deps/couchbase-cxx-client/core/utils/connection_string.cxx +4 -0
  52. package/deps/couchbase-cxx-client/couchbase/behavior_options.hxx +19 -2
  53. package/deps/couchbase-cxx-client/couchbase/bucket_manager.hxx +135 -0
  54. package/deps/couchbase-cxx-client/couchbase/build_query_index_options.hxx +0 -30
  55. package/deps/couchbase-cxx-client/couchbase/cluster.hxx +14 -0
  56. package/deps/couchbase-cxx-client/couchbase/collection_query_index_manager.hxx +7 -48
  57. package/deps/couchbase-cxx-client/couchbase/create_bucket_options.hxx +41 -0
  58. package/deps/couchbase-cxx-client/couchbase/create_primary_query_index_options.hxx +0 -29
  59. package/deps/couchbase-cxx-client/couchbase/create_query_index_options.hxx +0 -33
  60. package/deps/couchbase-cxx-client/couchbase/drop_bucket_options.hxx +41 -0
  61. package/deps/couchbase-cxx-client/couchbase/drop_primary_query_index_options.hxx +0 -30
  62. package/deps/couchbase-cxx-client/couchbase/drop_query_index_options.hxx +0 -31
  63. package/deps/couchbase-cxx-client/couchbase/error_codes.hxx +1 -2
  64. package/deps/couchbase-cxx-client/couchbase/flush_bucket_options.hxx +41 -0
  65. package/deps/couchbase-cxx-client/couchbase/get_all_buckets_options.hxx +44 -0
  66. package/deps/couchbase-cxx-client/couchbase/get_all_query_indexes_options.hxx +0 -30
  67. package/deps/couchbase-cxx-client/couchbase/get_bucket_options.hxx +43 -0
  68. package/deps/couchbase-cxx-client/couchbase/management/bucket_settings.hxx +116 -0
  69. package/deps/couchbase-cxx-client/couchbase/manager_error_context.hxx +29 -53
  70. package/deps/couchbase-cxx-client/couchbase/query_index_manager.hxx +16 -83
  71. package/deps/couchbase-cxx-client/couchbase/query_options.hxx +18 -0
  72. package/deps/couchbase-cxx-client/couchbase/security_options.hxx +15 -0
  73. package/deps/couchbase-cxx-client/couchbase/update_bucket_options.hxx +41 -0
  74. package/deps/couchbase-cxx-client/couchbase/watch_query_indexes_options.hxx +0 -31
  75. package/deps/couchbase-cxx-client/docs/cbc-analytics.md +1 -0
  76. package/deps/couchbase-cxx-client/docs/cbc-get.md +1 -0
  77. package/deps/couchbase-cxx-client/docs/cbc-pillowfight.md +1 -0
  78. package/deps/couchbase-cxx-client/docs/cbc-query.md +1 -0
  79. package/deps/couchbase-cxx-client/docs/cbc.md +10 -0
  80. package/deps/couchbase-cxx-client/test/CMakeLists.txt +1 -0
  81. package/deps/couchbase-cxx-client/test/test_integration_collections.cxx +6 -0
  82. package/deps/couchbase-cxx-client/test/test_integration_crud.cxx +5 -0
  83. package/deps/couchbase-cxx-client/test/test_integration_examples.cxx +137 -1
  84. package/deps/couchbase-cxx-client/test/test_integration_management.cxx +709 -266
  85. package/deps/couchbase-cxx-client/test/test_integration_query.cxx +19 -7
  86. package/deps/couchbase-cxx-client/test/test_integration_range_scan.cxx +351 -112
  87. package/deps/couchbase-cxx-client/test/test_integration_search.cxx +10 -1
  88. package/deps/couchbase-cxx-client/test/test_transaction_public_async_api.cxx +13 -12
  89. package/deps/couchbase-cxx-client/test/test_transaction_public_blocking_api.cxx +27 -21
  90. package/deps/couchbase-cxx-client/test/test_unit_query.cxx +75 -0
  91. package/deps/couchbase-cxx-client/test/utils/server_version.hxx +5 -0
  92. package/deps/couchbase-cxx-client/test/utils/wait_until.cxx +29 -10
  93. package/deps/couchbase-cxx-client/test/utils/wait_until.hxx +3 -1
  94. package/deps/couchbase-cxx-client/tools/utils.cxx +4 -1
  95. package/dist/binding.d.ts +21 -16
  96. package/dist/binding.js +1 -4
  97. package/dist/bindingutilities.d.ts +6 -1
  98. package/dist/bindingutilities.js +36 -1
  99. package/dist/collection.d.ts +65 -3
  100. package/dist/collection.js +107 -0
  101. package/dist/crudoptypes.d.ts +34 -0
  102. package/dist/crudoptypes.js +18 -1
  103. package/dist/queryexecutor.js +1 -0
  104. package/dist/querytypes.d.ts +7 -0
  105. package/dist/rangeScan.d.ts +107 -0
  106. package/dist/rangeScan.js +91 -0
  107. package/dist/streamablepromises.d.ts +6 -0
  108. package/dist/streamablepromises.js +25 -1
  109. package/package.json +13 -14
  110. package/scripts/createPlatformPackages.js +1 -4
  111. package/src/addondata.hpp +1 -0
  112. package/src/binding.cpp +5 -2
  113. package/src/connection.cpp +108 -2
  114. package/src/connection.hpp +1 -0
  115. package/src/constants.cpp +2 -12
  116. package/src/jstocbpp_autogen.hpp +49 -22
  117. package/src/jstocbpp_basic.hpp +2 -8
  118. package/src/mutationtoken.cpp +13 -0
  119. package/src/scan_iterator.cpp +90 -0
  120. package/src/scan_iterator.hpp +30 -0
  121. package/tools/gen-bindings-json.py +9 -8
  122. package/deps/couchbase-cxx-client/core/impl/collection_query_index_manager.cxx +0 -93
@@ -19,8 +19,6 @@
19
19
  #include "core/logger/logger.hxx"
20
20
  #include "couchbase/error_codes.hxx"
21
21
 
22
- #include <couchbase/retry_strategy.hxx>
23
-
24
22
  #include <asio/bind_executor.hpp>
25
23
  #include <asio/io_context.hpp>
26
24
  #include <asio/post.hpp>
@@ -30,6 +28,7 @@
30
28
  #include <gsl/narrow>
31
29
 
32
30
  #include <future>
31
+ #include <random>
33
32
 
34
33
  namespace couchbase::core
35
34
  {
@@ -55,6 +54,14 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
55
54
  {
56
55
  struct failed {
57
56
  std::error_code ec;
57
+ bool fatal{ true };
58
+ };
59
+
60
+ struct not_started {
61
+ };
62
+
63
+ struct awaiting_retry {
64
+ std::error_code ec;
58
65
  };
59
66
 
60
67
  struct running {
@@ -68,32 +75,73 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
68
75
  range_scan_stream(asio::io_context& io,
69
76
  agent kv_provider,
70
77
  std::uint16_t vbucket_id,
78
+ std::int16_t node_id,
71
79
  range_scan_create_options create_options,
72
- range_scan_continue_options continue_options)
80
+ range_scan_continue_options continue_options,
81
+ std::shared_ptr<scan_stream_manager> stream_manager)
73
82
  : items_{ io, continue_options.batch_item_limit }
74
83
  , agent_{ std::move(kv_provider) }
75
84
  , vbucket_id_{ vbucket_id }
85
+ , node_id_{ node_id }
76
86
  , create_options_{ std::move(create_options) }
77
87
  , continue_options_{ std::move(continue_options) }
88
+ , stream_manager_{ std::move(stream_manager) }
78
89
  {
79
90
  }
80
91
 
81
92
  void start()
82
93
  {
94
+ // Fail the stream if more time since the timeout has elapsed since the stream was first attempted (if this is a retry)
95
+ if (first_attempt_timestamp_.has_value()) {
96
+ if (std::chrono::steady_clock::now() - first_attempt_timestamp_.value() > create_options_.timeout) {
97
+ CB_LOG_DEBUG("stream for vbucket_id {} cannot be retried any longer because it has exceeded the timeout", vbucket_id_);
98
+ state_ = failed{ errc::common::unambiguous_timeout, !is_sampling_scan() };
99
+ stream_manager_->stream_start_failed(node_id_, error_is_fatal());
100
+ drain_waiting_queue();
101
+ return;
102
+ }
103
+ } else {
104
+ first_attempt_timestamp_ = std::chrono::steady_clock::now();
105
+ }
106
+
107
+ CB_LOG_TRACE("starting stream {} in node {}", vbucket_id_, node_id_);
108
+ state_ = std::monostate{};
83
109
  if (std::holds_alternative<range_scan>(create_options_.scan_type) && !last_seen_key_.empty()) {
84
- std::get<range_scan>(create_options_.scan_type).start_.id = last_seen_key_;
110
+ std::get<range_scan>(create_options_.scan_type).from = scan_term{ last_seen_key_ };
85
111
  }
86
112
 
87
113
  auto op = agent_.range_scan_create(vbucket_id_, create_options_, [self = shared_from_this()](auto res, auto ec) {
88
114
  if (ec) {
89
- self->state_ = failed{ ec };
90
- self->drain_waiting_queue();
91
115
  if (ec == errc::key_value::document_not_found) {
116
+ // Benign error
92
117
  CB_LOG_DEBUG("ignoring vbucket_id {} because no documents exist for it", self->vbucket_id_);
118
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
119
+ self->state_ = failed{ ec, false };
120
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
121
+ } else if (ec == errc::common::temporary_failure) {
122
+ // Retryable error
123
+ CB_LOG_DEBUG("received busy status from vbucket with ID {} - reducing concurrency & will retry", self->vbucket_id_);
124
+ CB_LOG_TRACE("setting state for stream {} to AWAITING_RETRY", self->vbucket_id_);
125
+ self->state_ = awaiting_retry{ ec };
126
+ self->stream_manager_->stream_start_failed_awaiting_retry(self->node_id_, self->vbucket_id_);
127
+ } else if (ec == errc::common::internal_server_failure || ec == errc::common::collection_not_found) {
128
+ // Fatal errors
129
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
130
+ self->state_ = failed{ ec, true };
131
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
132
+ } else {
133
+ // Unexpected errors
134
+ CB_LOG_DEBUG(
135
+ "received unexpected error {} from stream for vbucket {} ({})", ec.value(), self->vbucket_id_, ec.message());
136
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
137
+ self->state_ = failed{ ec, true };
138
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
93
139
  }
140
+ self->drain_waiting_queue();
94
141
  return;
95
142
  }
96
143
  self->state_ = running{ std::move(res.scan_uuid) };
144
+ CB_LOG_TRACE("setting state for stream {} to RUNNING", self->vbucket_id_);
97
145
  self->drain_waiting_queue();
98
146
  self->resume();
99
147
  });
@@ -105,86 +153,126 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
105
153
  if (is_running()) {
106
154
  agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
107
155
  }
108
- state_ = failed{ ec };
156
+
157
+ items_.cancel();
109
158
  items_.close();
159
+
160
+ bool fatal{};
161
+ if (ec == errc::key_value::document_not_found || ec == errc::common::authentication_failure ||
162
+ ec == errc::common::collection_not_found || ec == errc::common::request_canceled) {
163
+ // Errors that are fatal unless this is a sampling scan
164
+ fatal = !is_sampling_scan();
165
+ } else if (ec == errc::common::feature_not_available || ec == errc::common::invalid_argument ||
166
+ ec == errc::common::temporary_failure) {
167
+ // Errors that are always fatal
168
+ fatal = true;
169
+ } else {
170
+ // Unexpected error - always fatal
171
+ CB_LOG_DEBUG("received unexpected error {} from stream for vbucket during range scan continue {} ({})",
172
+ ec.value(),
173
+ vbucket_id_,
174
+ ec.message());
175
+ fatal = true;
176
+ }
177
+
178
+ CB_LOG_TRACE("setting state for stream {} to FAILED after range scan continue", vbucket_id_);
179
+ state_ = failed{ ec, fatal };
180
+ stream_manager_->stream_continue_failed(node_id_, fatal);
110
181
  }
111
182
  }
112
183
 
184
+ void mark_not_started()
185
+ {
186
+ state_ = not_started{};
187
+ }
188
+
113
189
  void complete()
114
190
  {
115
191
  if (!is_failed() && !is_completed()) {
192
+ CB_LOG_TRACE("setting state for stream {} to COMPLETED", vbucket_id_);
193
+
194
+ stream_manager_->stream_completed(node_id_);
116
195
  state_ = completed{};
196
+ drain_waiting_queue();
117
197
  }
118
198
  }
119
199
 
120
- auto pop() -> std::optional<range_scan_item>
200
+ void cancel()
121
201
  {
122
- if (peeked_) {
123
- std::optional<range_scan_item> item{};
124
- std::swap(peeked_, item);
125
- return item;
202
+ if (!should_cancel_) {
203
+ should_cancel_ = true;
204
+ items_.cancel();
205
+ items_.close();
126
206
  }
127
- return peeked_;
128
207
  }
129
208
 
130
209
  template<typename Handler>
131
- void peek(Handler&& handler)
210
+ void take(Handler&& handler)
132
211
  {
133
212
  do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
134
- self->peek_when_ready(std::forward<Handler>(handler));
213
+ self->take_when_ready(std::forward<Handler>(handler));
135
214
  });
136
215
  }
137
216
 
138
- template<typename Handler>
139
- void take(Handler&& handler)
217
+ [[nodiscard]] auto node_id() const -> int16_t
140
218
  {
141
- do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
142
- self->take_when_ready(std::forward<Handler>(handler));
143
- });
219
+ return node_id_;
144
220
  }
145
221
 
146
- private:
147
- template<typename Handler>
148
- void peek_when_ready(Handler&& handler)
222
+ [[nodiscard]] auto is_ready() const -> bool
149
223
  {
150
- if (is_failed()) {
151
- return handler(std::optional<range_scan_item>{});
152
- }
224
+ return !std::holds_alternative<std::monostate>(state_);
225
+ }
153
226
 
154
- if (peeked_) {
155
- return handler(peeked_);
156
- }
227
+ [[nodiscard]] auto is_not_started() const -> bool
228
+ {
229
+ return std::holds_alternative<not_started>(state_);
230
+ }
157
231
 
158
- if (is_completed() && !items_.ready()) {
159
- return handler(std::optional<range_scan_item>{});
160
- }
232
+ [[nodiscard]] auto is_awaiting_retry() const -> bool
233
+ {
234
+ return std::holds_alternative<awaiting_retry>(state_);
235
+ }
161
236
 
162
- items_.async_receive(
163
- [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
164
- if (ec) {
165
- self->peeked_ = {};
166
- } else {
167
- self->peeked_ = std::move(item);
168
- }
169
- handler(self->peeked_);
170
- });
237
+ [[nodiscard]] auto is_running() const -> bool
238
+ {
239
+ return std::holds_alternative<running>(state_);
240
+ }
241
+
242
+ [[nodiscard]] auto is_failed() const -> bool
243
+ {
244
+ return std::holds_alternative<failed>(state_);
171
245
  }
172
246
 
247
+ [[nodiscard]] auto is_completed() const -> bool
248
+ {
249
+ return std::holds_alternative<completed>(state_);
250
+ }
251
+
252
+ private:
173
253
  template<typename Handler>
174
254
  void take_when_ready(Handler&& handler)
175
255
  {
256
+
176
257
  if (is_failed()) {
177
- return handler(std::optional<range_scan_item>{}, false);
258
+ if (error_is_fatal()) {
259
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{ error() });
260
+ } else {
261
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
262
+ }
263
+ }
264
+ if (is_awaiting_retry() || is_not_started()) {
265
+ return handler(std::optional<range_scan_item>{}, true, std::optional<std::error_code>{});
178
266
  }
179
267
  if (!items_.ready()) {
180
- return handler(std::optional<range_scan_item>{}, is_running());
268
+ return handler(std::optional<range_scan_item>{}, is_running(), std::optional<std::error_code>{});
181
269
  }
182
270
  items_.async_receive(
183
271
  [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
184
272
  if (ec) {
185
- return handler(std::optional<range_scan_item>{}, false);
273
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
186
274
  }
187
- handler(std::optional<range_scan_item>{ std::move(item) }, true);
275
+ handler(std::optional<range_scan_item>{ std::move(item) }, true, std::optional<std::error_code>{});
188
276
  });
189
277
  }
190
278
 
@@ -211,6 +299,13 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
211
299
  if (!is_running()) {
212
300
  return;
213
301
  }
302
+ if (should_cancel_) {
303
+ agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
304
+ items_.close();
305
+ items_.cancel();
306
+ return;
307
+ }
308
+
214
309
  agent_.range_scan_continue(
215
310
  uuid(),
216
311
  vbucket_id_,
@@ -236,26 +331,6 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
236
331
  });
237
332
  }
238
333
 
239
- [[nodiscard]] auto is_ready() const -> bool
240
- {
241
- return !std::holds_alternative<std::monostate>(state_);
242
- }
243
-
244
- [[nodiscard]] auto is_running() const -> bool
245
- {
246
- return std::holds_alternative<running>(state_);
247
- }
248
-
249
- [[nodiscard]] auto is_failed() const -> bool
250
- {
251
- return std::holds_alternative<failed>(state_);
252
- }
253
-
254
- [[nodiscard]] auto is_completed() const -> bool
255
- {
256
- return std::holds_alternative<completed>(state_);
257
- }
258
-
259
334
  [[nodiscard]] auto uuid() const -> std::vector<std::byte>
260
335
  {
261
336
  if (is_running()) {
@@ -272,150 +347,297 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
272
347
  return {};
273
348
  }
274
349
 
350
+ [[nodiscard]] auto error_is_fatal() const -> bool
351
+ {
352
+ if (is_failed()) {
353
+ return std::get<failed>(state_).fatal;
354
+ }
355
+ return {};
356
+ }
357
+
358
+ [[nodiscard]] auto is_sampling_scan() const -> bool
359
+ {
360
+ return std::holds_alternative<sampling_scan>(create_options_.scan_type);
361
+ }
362
+
275
363
  asio::experimental::concurrent_channel<void(std::error_code, range_scan_item)> items_;
276
364
  agent agent_;
277
365
  std::uint16_t vbucket_id_;
366
+ std::int16_t node_id_;
278
367
  range_scan_create_options create_options_;
279
368
  range_scan_continue_options continue_options_;
280
- std::vector<std::byte> last_seen_key_{};
281
- std::variant<std::monostate, failed, running, completed> state_{};
282
- std::optional<range_scan_item> peeked_{};
369
+ std::shared_ptr<scan_stream_manager> stream_manager_;
370
+ std::string last_seen_key_{};
371
+ std::variant<std::monostate, not_started, failed, awaiting_retry, running, completed> state_{};
372
+ bool should_cancel_{ false };
373
+ std::optional<std::chrono::time_point<std::chrono::steady_clock>> first_attempt_timestamp_{};
283
374
  std::vector<utils::movable_function<void()>> waiting_queue_{};
284
375
  };
285
376
 
286
- struct lowest_item {
287
- std::uint16_t vbucket_id;
288
- std::vector<std::byte> key;
289
- };
290
-
291
- static auto
292
- less(std::vector<std::byte>& a, std::vector<std::byte>& b) -> bool
293
- {
294
- auto common_size = std::min(a.size(), b.size());
295
- for (std::size_t i = 0; i < common_size; ++i) {
296
- if (a[i] < b[i]) {
297
- return true;
298
- }
299
- if (a[i] > b[i]) {
300
- return false;
301
- }
302
- }
303
- return a.size() < b.size();
304
- }
305
-
306
377
  class range_scan_orchestrator_impl
307
378
  : public std::enable_shared_from_this<range_scan_orchestrator_impl>
308
379
  , public range_scan_item_iterator
380
+ , public scan_stream_manager
309
381
  {
310
382
  public:
311
383
  range_scan_orchestrator_impl(asio::io_context& io,
312
384
  agent kv_provider,
313
- std::size_t num_vbuckets,
385
+ topology::configuration::vbucket_map vbucket_map,
314
386
  std::string scope_name,
315
387
  std::string collection_name,
316
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
388
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
317
389
  range_scan_orchestrator_options options)
318
390
  : io_{ io }
319
391
  , agent_{ std::move(kv_provider) }
320
- , num_vbuckets_{ num_vbuckets }
392
+ , vbucket_map_{ std::move(vbucket_map) }
321
393
  , scope_name_{ std::move(scope_name) }
322
394
  , collection_name_{ std::move(collection_name) }
323
395
  , scan_type_{ std::move(scan_type) }
324
396
  , options_{ std::move(options) }
325
397
  , vbucket_to_snapshot_requirements_{ mutation_state_to_snapshot_requirements(options_.consistent_with) }
398
+ , concurrency_{ options_.concurrency }
326
399
  {
400
+
327
401
  if (std::holds_alternative<sampling_scan>(scan_type_)) {
328
- item_limit = std::get<sampling_scan>(scan_type).limit;
402
+ item_limit_ = std::get<sampling_scan>(scan_type).limit;
329
403
  }
330
404
  }
331
405
 
332
406
  auto scan() -> tl::expected<scan_result, std::error_code>
333
407
  {
334
- if (item_limit == 0) {
408
+ if (item_limit_ == 0) {
409
+ return tl::unexpected(errc::common::invalid_argument);
410
+ }
411
+ if (concurrency_ <= 0) {
335
412
  return tl::unexpected(errc::common::invalid_argument);
336
413
  }
337
- range_scan_continue_options continue_options{
338
- options_.batch_item_limit, options_.batch_byte_limit, options_.batch_time_limit, options_.retry_strategy, options_.ids_only,
414
+
415
+ auto batch_time_limit = std::chrono::duration_cast<std::chrono::milliseconds>(0.9 * options_.timeout);
416
+ range_scan_continue_options const continue_options{
417
+ options_.batch_item_limit, options_.batch_byte_limit, batch_time_limit, options_.timeout, options_.retry_strategy,
339
418
  };
340
- continue_options.batch_time_limit = std::chrono::seconds{ 10 };
341
- for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(num_vbuckets_); ++vbucket) {
419
+ for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(vbucket_map_.size()); ++vbucket) {
420
+ const range_scan_create_options create_options{
421
+ scope_name_, collection_name_, scan_type_, options_.timeout, {}, vbucket_to_snapshot_requirements_[vbucket],
422
+ options_.ids_only, options_.retry_strategy,
423
+ };
424
+
425
+ // Get the active node for the vbucket (values in vbucket map are the active node id followed by the ids of the replicas)
426
+ auto node_id = vbucket_map_[vbucket][0];
427
+
342
428
  auto stream = std::make_shared<range_scan_stream>(io_,
343
429
  agent_,
344
430
  vbucket,
345
- range_scan_create_options{
346
- scope_name_,
347
- collection_name_,
348
- scan_type_,
349
- options_.timeout,
350
- {},
351
- vbucket_to_snapshot_requirements_[vbucket],
352
- options_.ids_only,
353
- options_.retry_strategy,
354
- },
355
- continue_options);
431
+ node_id,
432
+ create_options,
433
+ continue_options,
434
+ std::static_pointer_cast<scan_stream_manager>(shared_from_this()));
356
435
  streams_[vbucket] = stream;
357
- stream->start();
436
+ streams_[vbucket]->mark_not_started();
437
+ if (stream_count_per_node_.count(node_id) == 0) {
438
+ stream_count_per_node_[node_id] = 0;
439
+ }
358
440
  }
441
+ start_streams(concurrency_);
359
442
 
360
443
  return scan_result(shared_from_this());
361
444
  }
362
445
 
363
- auto next() -> std::future<std::optional<range_scan_item>> override
446
+ void cancel() override
364
447
  {
365
- auto barrier = std::make_shared<std::promise<std::optional<range_scan_item>>>();
366
- if (item_limit == 0 || item_limit-- == 0) {
367
- barrier->set_value(std::nullopt);
368
- streams_.clear();
448
+ cancelled_ = true;
449
+ for (const auto& [vbucket_id, stream] : streams_) {
450
+ stream->cancel();
451
+ }
452
+ }
453
+
454
+ bool is_cancelled() override
455
+ {
456
+ return cancelled_;
457
+ }
458
+
459
+ auto next() -> std::future<tl::expected<range_scan_item, std::error_code>> override
460
+ {
461
+ auto barrier = std::make_shared<std::promise<tl::expected<range_scan_item, std::error_code>>>();
462
+ if (item_limit_ == 0 || item_limit_-- == 0) {
463
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
464
+ cancel();
369
465
  } else {
370
- if (options_.sort == scan_sort::none) {
371
- next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
372
- } else {
373
- next_item_sorted(
374
- {}, streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
375
- }
466
+ next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item, std::optional<std::error_code> ec) {
467
+ if (item) {
468
+ barrier->set_value(std::move(item.value()));
469
+ } else if (ec) {
470
+ barrier->set_value(tl::unexpected{ ec.value() });
471
+ } else {
472
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
473
+ }
474
+ });
376
475
  }
377
476
  return barrier->get_future();
378
477
  }
379
478
 
380
479
  void next(utils::movable_function<void(range_scan_item, std::error_code)> callback) override
381
480
  {
382
- auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item) mutable {
481
+ auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item, std::optional<std::error_code> ec) mutable {
383
482
  if (item) {
384
483
  callback(std::move(item.value()), {});
484
+ } else if (ec) {
485
+ callback({}, ec.value());
385
486
  } else {
386
487
  callback({}, errc::key_value::range_scan_completed);
387
488
  }
388
489
  };
389
- if (item_limit == 0 || item_limit-- == 0) {
390
- handler({});
490
+ if (item_limit_ == 0 || item_limit_-- == 0) {
491
+ handler({}, {});
492
+ cancel();
391
493
  } else {
392
- if (options_.sort == scan_sort::none) {
393
- next_item(streams_.begin(), std::move(handler));
394
- } else {
395
- next_item_sorted({}, streams_.begin(), std::move(handler));
494
+ next_item(streams_.begin(), std::move(handler));
495
+ }
496
+ }
497
+
498
+ void start_streams(std::uint16_t stream_count)
499
+ {
500
+ std::lock_guard<std::recursive_mutex> const lock(stream_start_mutex_);
501
+
502
+ if (cancelled_) {
503
+ CB_LOG_TRACE("scan has been cancelled, do not start another stream");
504
+ return;
505
+ }
506
+
507
+ if (stream_count_per_node_.empty()) {
508
+ CB_LOG_TRACE("no more vbuckets to scan");
509
+ return;
510
+ }
511
+
512
+ std::uint16_t counter = 0;
513
+ while (counter < stream_count) {
514
+ // Find the node with the least number of active streams from those recorded in stream_count_per_node_
515
+ int16_t least_busy_node{};
516
+ {
517
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
518
+
519
+ // Pick a random node
520
+ std::random_device rd;
521
+ std::mt19937_64 gen(rd());
522
+ std::uniform_int_distribution<std::size_t> dis(0, stream_count_per_node_.size() - 1);
523
+ auto it = stream_count_per_node_.begin();
524
+ std::advance(it, static_cast<decltype(stream_count_per_node_)::difference_type>(dis(gen)));
525
+ least_busy_node = it->first;
526
+
527
+ // If any other node has fewer streams running use that
528
+ for (const auto& [node_id, count] : stream_count_per_node_) {
529
+ if (count < stream_count_per_node_[least_busy_node]) {
530
+ least_busy_node = node_id;
531
+ }
532
+ }
396
533
  }
534
+
535
+ std::shared_ptr<range_scan_stream> stream{};
536
+ {
537
+ std::lock_guard<std::mutex> const stream_map_lock(stream_map_mutex_);
538
+
539
+ for (const auto& [v, s] : streams_) {
540
+ if ((s->is_not_started() || s->is_awaiting_retry()) && (s->node_id() == least_busy_node)) {
541
+ CB_LOG_TRACE("selected vbucket {} to scan", v);
542
+ stream = s;
543
+ break;
544
+ }
545
+ }
546
+ }
547
+
548
+ if (stream == nullptr) {
549
+ CB_LOG_TRACE("no vbuckets to scan for node {}", least_busy_node);
550
+ {
551
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
552
+ stream_count_per_node_.erase(least_busy_node);
553
+ }
554
+ return start_streams(static_cast<std::uint16_t>(stream_count - counter));
555
+ }
556
+
557
+ auto node_id = stream->node_id();
558
+ active_stream_count_++;
559
+ stream_count_per_node_[node_id]++;
560
+ stream->start();
561
+ counter++;
562
+ }
563
+ }
564
+
565
+ void stream_start_failed(std::int16_t node_id, bool fatal) override
566
+ {
567
+ stream_no_longer_running(node_id);
568
+ if (fatal) {
569
+ cancel();
570
+ } else {
571
+ start_streams(1);
572
+ }
573
+ }
574
+
575
+ void stream_start_failed_awaiting_retry(std::int16_t node_id, std::uint16_t /* vbucket_id */) override
576
+ {
577
+ {
578
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
579
+ if (stream_count_per_node_.count(node_id) == 0) {
580
+ stream_count_per_node_[node_id] = 1;
581
+ }
582
+ }
583
+ stream_no_longer_running(node_id);
584
+ if (active_stream_count_ == 0) {
585
+ start_streams(1);
586
+ }
587
+ }
588
+
589
+ void stream_continue_failed(std::int16_t node_id, bool fatal) override
590
+ {
591
+ stream_no_longer_running(node_id);
592
+ if (fatal) {
593
+ cancel();
594
+ } else {
595
+ start_streams(1);
397
596
  }
398
597
  }
399
598
 
599
+ void stream_completed(std::int16_t node_id) override
600
+ {
601
+ stream_no_longer_running(node_id);
602
+ start_streams(1);
603
+ }
604
+
400
605
  private:
606
+ void stream_no_longer_running(std::int16_t node_id)
607
+ {
608
+ {
609
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
610
+ if (stream_count_per_node_.count(node_id) > 0) {
611
+ stream_count_per_node_[node_id]--;
612
+ }
613
+ }
614
+ active_stream_count_--;
615
+ }
616
+
401
617
  template<typename Iterator, typename Handler>
402
618
  void next_item(Iterator it, Handler&& handler)
403
619
  {
404
- if (streams_.empty()) {
405
- return handler({});
620
+ if (streams_.empty() || cancelled_) {
621
+ return handler({}, {});
406
622
  }
407
623
  auto vbucket_id = it->first;
408
624
  auto stream = it->second;
409
625
  stream->take([it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
410
- auto item, bool has_more) mutable {
626
+ auto item, bool has_more, auto ec) mutable {
627
+ if (ec) {
628
+ // Fatal error
629
+ self->streams_.clear();
630
+ return handler({}, ec);
631
+ }
411
632
  if (!has_more) {
633
+ std::lock_guard<std::mutex> const lock(self->stream_map_mutex_);
412
634
  self->streams_.erase(vbucket_id);
413
635
  }
414
636
  if (item) {
415
- return handler(std::move(item));
637
+ return handler(std::move(item), {});
416
638
  }
417
639
  if (self->streams_.empty()) {
418
- return handler({});
640
+ return handler({}, {});
419
641
  }
420
642
  if (it == self->streams_.end()) {
421
643
  it = self->streams_.begin();
@@ -426,60 +648,35 @@ class range_scan_orchestrator_impl
426
648
  });
427
649
  }
428
650
 
429
- template<typename Iterator, typename Handler>
430
- void next_item_sorted(std::optional<lowest_item> lowest, Iterator it, Handler&& handler)
431
- {
432
- if (streams_.empty()) {
433
- return handler({});
434
- }
435
- auto vbucket_id = it->first;
436
- auto stream = it->second;
437
- stream->peek(
438
- [lowest = std::move(lowest), it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
439
- auto item) mutable {
440
- if (item) {
441
- if (!lowest || less(item->key, lowest->key)) {
442
- lowest = { vbucket_id, item->key };
443
- }
444
- } else {
445
- self->streams_.erase(vbucket_id);
446
- }
447
-
448
- if (it != self->streams_.end()) {
449
- return asio::post(asio::bind_executor(
450
- self->io_, [lowest = std::move(lowest), it, self, handler = std::forward<Handler>(handler)]() mutable {
451
- self->next_item_sorted(std::move(lowest), it, std::forward<Handler>(handler));
452
- }));
453
- } else if (lowest) {
454
- return handler(self->streams_[lowest->vbucket_id]->pop());
455
- } else {
456
- return handler({});
457
- }
458
- });
459
- }
460
-
461
651
  asio::io_context& io_;
462
652
  agent agent_;
463
- std::size_t num_vbuckets_;
653
+ topology::configuration::vbucket_map vbucket_map_;
464
654
  std::string scope_name_;
465
655
  std::string collection_name_;
466
- std::variant<std::monostate, range_scan, sampling_scan> scan_type_;
656
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type_;
467
657
  range_scan_orchestrator_options options_;
468
658
  std::map<std::size_t, std::optional<range_snapshot_requirements>> vbucket_to_snapshot_requirements_;
469
659
  std::map<std::uint16_t, std::shared_ptr<range_scan_stream>> streams_{};
470
- std::size_t item_limit{ std::numeric_limits<size_t>::max() };
660
+ std::map<std::int16_t, std::atomic_uint16_t> stream_count_per_node_{};
661
+ std::recursive_mutex stream_start_mutex_{};
662
+ std::mutex stream_map_mutex_{};
663
+ std::mutex stream_count_per_node_mutex_{};
664
+ std::atomic_uint16_t active_stream_count_ = 0;
665
+ std::uint16_t concurrency_ = 1;
666
+ std::size_t item_limit_{ std::numeric_limits<size_t>::max() };
667
+ bool cancelled_{ false };
471
668
  };
472
669
 
473
670
  range_scan_orchestrator::range_scan_orchestrator(asio::io_context& io,
474
671
  agent kv_provider,
475
- std::size_t num_vbuckets,
672
+ topology::configuration::vbucket_map vbucket_map,
476
673
  std::string scope_name,
477
674
  std::string collection_name,
478
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
675
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
479
676
  range_scan_orchestrator_options options)
480
677
  : impl_{ std::make_shared<range_scan_orchestrator_impl>(io,
481
678
  std::move(kv_provider),
482
- num_vbuckets,
679
+ std::move(vbucket_map),
483
680
  std::move(scope_name),
484
681
  std::move(collection_name),
485
682
  std::move(scan_type),