couchbase 3.4.3 → 3.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/ext/couchbase/CMakeLists.txt +15 -1
  4. data/ext/couchbase/core/bucket.cxx +183 -152
  5. data/ext/couchbase/core/bucket.hxx +17 -4
  6. data/ext/couchbase/core/cluster.hxx +34 -13
  7. data/ext/couchbase/core/cluster_options.hxx +3 -0
  8. data/ext/couchbase/core/crud_component.cxx +51 -22
  9. data/ext/couchbase/core/error_context/key_value.cxx +2 -1
  10. data/ext/couchbase/core/error_context/key_value.hxx +10 -12
  11. data/ext/couchbase/core/impl/build_deferred_query_indexes.cxx +115 -50
  12. data/ext/couchbase/core/impl/cluster.cxx +6 -0
  13. data/ext/couchbase/core/impl/create_bucket.cxx +155 -0
  14. data/ext/couchbase/core/impl/create_query_index.cxx +172 -59
  15. data/ext/couchbase/core/impl/dns_srv_tracker.cxx +2 -1
  16. data/ext/couchbase/core/impl/drop_bucket.cxx +66 -0
  17. data/ext/couchbase/core/impl/drop_query_index.cxx +138 -59
  18. data/ext/couchbase/core/impl/flush_bucket.cxx +66 -0
  19. data/ext/couchbase/core/impl/get_all_buckets.cxx +163 -0
  20. data/ext/couchbase/core/impl/get_all_query_indexes.cxx +67 -37
  21. data/ext/couchbase/core/impl/get_bucket.cxx +153 -0
  22. data/ext/couchbase/core/impl/internal_manager_error_context.cxx +113 -0
  23. data/ext/couchbase/core/impl/internal_manager_error_context.hxx +60 -0
  24. data/ext/couchbase/core/impl/key_value_error_category.cxx +2 -4
  25. data/ext/couchbase/core/impl/key_value_error_context.cxx +98 -0
  26. data/ext/couchbase/core/impl/lookup_in.cxx +1 -0
  27. data/ext/couchbase/core/impl/lookup_in_all_replicas.cxx +176 -0
  28. data/ext/couchbase/core/impl/lookup_in_all_replicas.hxx +80 -0
  29. data/ext/couchbase/core/impl/lookup_in_any_replica.cxx +167 -0
  30. data/ext/couchbase/core/impl/lookup_in_any_replica.hxx +75 -0
  31. data/ext/couchbase/core/impl/lookup_in_replica.cxx +97 -0
  32. data/ext/couchbase/core/impl/lookup_in_replica.hxx +67 -0
  33. data/ext/couchbase/core/impl/manager_error_context.cxx +100 -0
  34. data/ext/couchbase/core/impl/query.cxx +1 -0
  35. data/ext/couchbase/core/impl/query_error_context.cxx +75 -0
  36. data/ext/couchbase/core/impl/update_bucket.cxx +130 -0
  37. data/ext/couchbase/core/impl/watch_query_indexes.cxx +53 -29
  38. data/ext/couchbase/core/io/dns_client.cxx +111 -40
  39. data/ext/couchbase/core/io/dns_config.cxx +5 -4
  40. data/ext/couchbase/core/io/http_session.hxx +24 -1
  41. data/ext/couchbase/core/io/mcbp_command.hxx +9 -2
  42. data/ext/couchbase/core/io/mcbp_session.cxx +80 -43
  43. data/ext/couchbase/core/io/mcbp_session.hxx +4 -3
  44. data/ext/couchbase/core/logger/custom_rotating_file_sink.cxx +1 -1
  45. data/ext/couchbase/core/logger/logger.cxx +80 -20
  46. data/ext/couchbase/core/logger/logger.hxx +31 -0
  47. data/ext/couchbase/core/meta/features.hxx +25 -0
  48. data/ext/couchbase/core/operations/document_lookup_in_all_replicas.hxx +192 -0
  49. data/ext/couchbase/core/operations/document_lookup_in_any_replica.hxx +188 -0
  50. data/ext/couchbase/core/operations/document_query.cxx +11 -0
  51. data/ext/couchbase/core/operations/document_query.hxx +1 -0
  52. data/ext/couchbase/core/operations.hxx +2 -0
  53. data/ext/couchbase/core/origin.cxx +270 -0
  54. data/ext/couchbase/core/origin.hxx +2 -0
  55. data/ext/couchbase/core/protocol/client_response.hxx +1 -0
  56. data/ext/couchbase/core/protocol/cmd_hello.hxx +1 -0
  57. data/ext/couchbase/core/protocol/cmd_lookup_in_replica.cxx +107 -0
  58. data/ext/couchbase/core/protocol/cmd_lookup_in_replica.hxx +137 -0
  59. data/ext/couchbase/core/protocol/hello_feature.hxx +6 -0
  60. data/ext/couchbase/core/protocol/hello_feature_fmt.hxx +3 -0
  61. data/ext/couchbase/core/protocol/status.cxx +2 -2
  62. data/ext/couchbase/core/range_scan_options.cxx +3 -27
  63. data/ext/couchbase/core/range_scan_options.hxx +13 -17
  64. data/ext/couchbase/core/range_scan_orchestrator.cxx +388 -170
  65. data/ext/couchbase/core/range_scan_orchestrator.hxx +13 -2
  66. data/ext/couchbase/core/range_scan_orchestrator_options.hxx +5 -3
  67. data/ext/couchbase/core/scan_options.hxx +0 -19
  68. data/ext/couchbase/core/scan_result.cxx +19 -5
  69. data/ext/couchbase/core/scan_result.hxx +5 -2
  70. data/ext/couchbase/core/timeout_defaults.hxx +2 -3
  71. data/ext/couchbase/core/topology/capabilities.hxx +3 -0
  72. data/ext/couchbase/core/topology/capabilities_fmt.hxx +8 -0
  73. data/ext/couchbase/core/topology/collections_manifest_fmt.hxx +1 -1
  74. data/ext/couchbase/core/topology/configuration.hxx +15 -0
  75. data/ext/couchbase/core/topology/configuration_json.hxx +6 -1
  76. data/ext/couchbase/core/utils/connection_string.cxx +62 -47
  77. data/ext/couchbase/core/utils/connection_string.hxx +1 -0
  78. data/ext/couchbase/couchbase/analytics_error_context.hxx +1 -1
  79. data/ext/couchbase/couchbase/behavior_options.hxx +19 -2
  80. data/ext/couchbase/couchbase/bucket_manager.hxx +135 -0
  81. data/ext/couchbase/couchbase/build_query_index_options.hxx +0 -30
  82. data/ext/couchbase/couchbase/cluster.hxx +14 -0
  83. data/ext/couchbase/couchbase/collection.hxx +111 -0
  84. data/ext/couchbase/couchbase/collection_query_index_manager.hxx +7 -48
  85. data/ext/couchbase/couchbase/create_bucket_options.hxx +41 -0
  86. data/ext/couchbase/couchbase/create_primary_query_index_options.hxx +0 -29
  87. data/ext/couchbase/couchbase/create_query_index_options.hxx +0 -33
  88. data/ext/couchbase/couchbase/drop_bucket_options.hxx +41 -0
  89. data/ext/couchbase/couchbase/drop_primary_query_index_options.hxx +0 -30
  90. data/ext/couchbase/couchbase/drop_query_index_options.hxx +0 -31
  91. data/ext/couchbase/couchbase/error_codes.hxx +1 -2
  92. data/ext/couchbase/couchbase/error_context.hxx +10 -2
  93. data/ext/couchbase/couchbase/flush_bucket_options.hxx +41 -0
  94. data/ext/couchbase/{core/topology/error_map_fmt.hxx → couchbase/fmt/key_value_error_map_attribute.hxx} +21 -21
  95. data/ext/couchbase/couchbase/get_all_buckets_options.hxx +44 -0
  96. data/ext/couchbase/couchbase/get_all_query_indexes_options.hxx +0 -30
  97. data/ext/couchbase/couchbase/get_and_lock_options.hxx +2 -2
  98. data/ext/couchbase/couchbase/get_and_touch_options.hxx +2 -2
  99. data/ext/couchbase/couchbase/get_bucket_options.hxx +43 -0
  100. data/ext/couchbase/couchbase/get_options.hxx +2 -2
  101. data/ext/couchbase/couchbase/insert_options.hxx +3 -3
  102. data/ext/couchbase/couchbase/key_value_error_context.hxx +7 -2
  103. data/ext/couchbase/couchbase/lookup_in_all_replicas_options.hxx +109 -0
  104. data/ext/couchbase/couchbase/lookup_in_any_replica_options.hxx +101 -0
  105. data/ext/couchbase/couchbase/lookup_in_options.hxx +2 -2
  106. data/ext/couchbase/couchbase/lookup_in_replica_result.hxx +74 -0
  107. data/ext/couchbase/couchbase/lookup_in_result.hxx +26 -0
  108. data/ext/couchbase/couchbase/management/bucket_settings.hxx +116 -0
  109. data/ext/couchbase/couchbase/manager_error_context.hxx +29 -53
  110. data/ext/couchbase/couchbase/mutate_in_options.hxx +2 -2
  111. data/ext/couchbase/couchbase/query_error_context.hxx +3 -1
  112. data/ext/couchbase/couchbase/query_index_manager.hxx +16 -83
  113. data/ext/couchbase/couchbase/query_options.hxx +18 -0
  114. data/ext/couchbase/couchbase/remove_options.hxx +2 -2
  115. data/ext/couchbase/couchbase/replace_options.hxx +3 -3
  116. data/ext/couchbase/couchbase/security_options.hxx +15 -0
  117. data/ext/couchbase/couchbase/subdocument_error_context.hxx +4 -2
  118. data/ext/couchbase/couchbase/touch_options.hxx +2 -2
  119. data/ext/couchbase/couchbase/unlock_options.hxx +2 -2
  120. data/ext/couchbase/couchbase/update_bucket_options.hxx +41 -0
  121. data/ext/couchbase/couchbase/upsert_options.hxx +3 -3
  122. data/ext/couchbase/couchbase/watch_query_indexes_options.hxx +0 -31
  123. data/ext/couchbase/test/CMakeLists.txt +1 -0
  124. data/ext/couchbase/test/test_integration_collections.cxx +6 -0
  125. data/ext/couchbase/test/test_integration_crud.cxx +5 -0
  126. data/ext/couchbase/test/test_integration_examples.cxx +137 -1
  127. data/ext/couchbase/test/test_integration_management.cxx +709 -266
  128. data/ext/couchbase/test/test_integration_query.cxx +19 -7
  129. data/ext/couchbase/test/test_integration_range_scan.cxx +351 -112
  130. data/ext/couchbase/test/test_integration_search.cxx +10 -1
  131. data/ext/couchbase/test/test_integration_subdoc.cxx +655 -0
  132. data/ext/couchbase/test/test_transaction_public_async_api.cxx +13 -12
  133. data/ext/couchbase/test/test_transaction_public_blocking_api.cxx +27 -21
  134. data/ext/couchbase/test/test_unit_connection_string.cxx +29 -0
  135. data/ext/couchbase/test/test_unit_query.cxx +75 -0
  136. data/ext/couchbase.cxx +583 -29
  137. data/ext/revisions.rb +3 -3
  138. data/lib/couchbase/cluster.rb +1 -1
  139. data/lib/couchbase/collection.rb +108 -0
  140. data/lib/couchbase/collection_options.rb +100 -0
  141. data/lib/couchbase/errors.rb +5 -0
  142. data/lib/couchbase/key_value_scan.rb +125 -0
  143. data/lib/couchbase/options.rb +151 -0
  144. data/lib/couchbase/scope.rb +1 -1
  145. data/lib/couchbase/utils/time.rb +14 -1
  146. data/lib/couchbase/version.rb +1 -1
  147. metadata +41 -7
  148. data/ext/couchbase/core/impl/collection_query_index_manager.cxx +0 -93
@@ -19,8 +19,6 @@
19
19
  #include "core/logger/logger.hxx"
20
20
  #include "couchbase/error_codes.hxx"
21
21
 
22
- #include <couchbase/retry_strategy.hxx>
23
-
24
22
  #include <asio/bind_executor.hpp>
25
23
  #include <asio/io_context.hpp>
26
24
  #include <asio/post.hpp>
@@ -30,6 +28,7 @@
30
28
  #include <gsl/narrow>
31
29
 
32
30
  #include <future>
31
+ #include <random>
33
32
 
34
33
  namespace couchbase::core
35
34
  {
@@ -55,6 +54,14 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
55
54
  {
56
55
  struct failed {
57
56
  std::error_code ec;
57
+ bool fatal{ true };
58
+ };
59
+
60
+ struct not_started {
61
+ };
62
+
63
+ struct awaiting_retry {
64
+ std::error_code ec;
58
65
  };
59
66
 
60
67
  struct running {
@@ -68,32 +75,73 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
68
75
  range_scan_stream(asio::io_context& io,
69
76
  agent kv_provider,
70
77
  std::uint16_t vbucket_id,
78
+ std::int16_t node_id,
71
79
  range_scan_create_options create_options,
72
- range_scan_continue_options continue_options)
80
+ range_scan_continue_options continue_options,
81
+ std::shared_ptr<scan_stream_manager> stream_manager)
73
82
  : items_{ io, continue_options.batch_item_limit }
74
83
  , agent_{ std::move(kv_provider) }
75
84
  , vbucket_id_{ vbucket_id }
85
+ , node_id_{ node_id }
76
86
  , create_options_{ std::move(create_options) }
77
87
  , continue_options_{ std::move(continue_options) }
88
+ , stream_manager_{ std::move(stream_manager) }
78
89
  {
79
90
  }
80
91
 
81
92
  void start()
82
93
  {
94
+ // Fail the stream if more time since the timeout has elapsed since the stream was first attempted (if this is a retry)
95
+ if (first_attempt_timestamp_.has_value()) {
96
+ if (std::chrono::steady_clock::now() - first_attempt_timestamp_.value() > create_options_.timeout) {
97
+ CB_LOG_DEBUG("stream for vbucket_id {} cannot be retried any longer because it has exceeded the timeout", vbucket_id_);
98
+ state_ = failed{ errc::common::unambiguous_timeout, !is_sampling_scan() };
99
+ stream_manager_->stream_start_failed(node_id_, error_is_fatal());
100
+ drain_waiting_queue();
101
+ return;
102
+ }
103
+ } else {
104
+ first_attempt_timestamp_ = std::chrono::steady_clock::now();
105
+ }
106
+
107
+ CB_LOG_TRACE("starting stream {} in node {}", vbucket_id_, node_id_);
108
+ state_ = std::monostate{};
83
109
  if (std::holds_alternative<range_scan>(create_options_.scan_type) && !last_seen_key_.empty()) {
84
- std::get<range_scan>(create_options_.scan_type).start_.id = last_seen_key_;
110
+ std::get<range_scan>(create_options_.scan_type).from = scan_term{ last_seen_key_ };
85
111
  }
86
112
 
87
113
  auto op = agent_.range_scan_create(vbucket_id_, create_options_, [self = shared_from_this()](auto res, auto ec) {
88
114
  if (ec) {
89
- self->state_ = failed{ ec };
90
- self->drain_waiting_queue();
91
115
  if (ec == errc::key_value::document_not_found) {
116
+ // Benign error
92
117
  CB_LOG_DEBUG("ignoring vbucket_id {} because no documents exist for it", self->vbucket_id_);
118
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
119
+ self->state_ = failed{ ec, false };
120
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
121
+ } else if (ec == errc::common::temporary_failure) {
122
+ // Retryable error
123
+ CB_LOG_DEBUG("received busy status from vbucket with ID {} - reducing concurrency & will retry", self->vbucket_id_);
124
+ CB_LOG_TRACE("setting state for stream {} to AWAITING_RETRY", self->vbucket_id_);
125
+ self->state_ = awaiting_retry{ ec };
126
+ self->stream_manager_->stream_start_failed_awaiting_retry(self->node_id_, self->vbucket_id_);
127
+ } else if (ec == errc::common::internal_server_failure || ec == errc::common::collection_not_found) {
128
+ // Fatal errors
129
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
130
+ self->state_ = failed{ ec, true };
131
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
132
+ } else {
133
+ // Unexpected errors
134
+ CB_LOG_DEBUG(
135
+ "received unexpected error {} from stream for vbucket {} ({})", ec.value(), self->vbucket_id_, ec.message());
136
+ CB_LOG_TRACE("setting state for stream {} to FAILED", self->vbucket_id_);
137
+ self->state_ = failed{ ec, true };
138
+ self->stream_manager_->stream_start_failed(self->node_id_, self->error_is_fatal());
93
139
  }
140
+ self->drain_waiting_queue();
94
141
  return;
95
142
  }
96
143
  self->state_ = running{ std::move(res.scan_uuid) };
144
+ CB_LOG_TRACE("setting state for stream {} to RUNNING", self->vbucket_id_);
97
145
  self->drain_waiting_queue();
98
146
  self->resume();
99
147
  });
@@ -105,86 +153,126 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
105
153
  if (is_running()) {
106
154
  agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
107
155
  }
108
- state_ = failed{ ec };
156
+
157
+ items_.cancel();
109
158
  items_.close();
159
+
160
+ bool fatal{};
161
+ if (ec == errc::key_value::document_not_found || ec == errc::common::authentication_failure ||
162
+ ec == errc::common::collection_not_found || ec == errc::common::request_canceled) {
163
+ // Errors that are fatal unless this is a sampling scan
164
+ fatal = !is_sampling_scan();
165
+ } else if (ec == errc::common::feature_not_available || ec == errc::common::invalid_argument ||
166
+ ec == errc::common::temporary_failure) {
167
+ // Errors that are always fatal
168
+ fatal = true;
169
+ } else {
170
+ // Unexpected error - always fatal
171
+ CB_LOG_DEBUG("received unexpected error {} from stream for vbucket during range scan continue {} ({})",
172
+ ec.value(),
173
+ vbucket_id_,
174
+ ec.message());
175
+ fatal = true;
176
+ }
177
+
178
+ CB_LOG_TRACE("setting state for stream {} to FAILED after range scan continue", vbucket_id_);
179
+ state_ = failed{ ec, fatal };
180
+ stream_manager_->stream_continue_failed(node_id_, fatal);
110
181
  }
111
182
  }
112
183
 
184
+ void mark_not_started()
185
+ {
186
+ state_ = not_started{};
187
+ }
188
+
113
189
  void complete()
114
190
  {
115
191
  if (!is_failed() && !is_completed()) {
192
+ CB_LOG_TRACE("setting state for stream {} to COMPLETED", vbucket_id_);
193
+
194
+ stream_manager_->stream_completed(node_id_);
116
195
  state_ = completed{};
196
+ drain_waiting_queue();
117
197
  }
118
198
  }
119
199
 
120
- auto pop() -> std::optional<range_scan_item>
200
+ void cancel()
121
201
  {
122
- if (peeked_) {
123
- std::optional<range_scan_item> item{};
124
- std::swap(peeked_, item);
125
- return item;
202
+ if (!should_cancel_) {
203
+ should_cancel_ = true;
204
+ items_.cancel();
205
+ items_.close();
126
206
  }
127
- return peeked_;
128
207
  }
129
208
 
130
209
  template<typename Handler>
131
- void peek(Handler&& handler)
210
+ void take(Handler&& handler)
132
211
  {
133
212
  do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
134
- self->peek_when_ready(std::forward<Handler>(handler));
213
+ self->take_when_ready(std::forward<Handler>(handler));
135
214
  });
136
215
  }
137
216
 
138
- template<typename Handler>
139
- void take(Handler&& handler)
217
+ [[nodiscard]] auto node_id() const -> int16_t
140
218
  {
141
- do_when_ready([self = shared_from_this(), handler = std::forward<Handler>(handler)]() mutable {
142
- self->take_when_ready(std::forward<Handler>(handler));
143
- });
219
+ return node_id_;
144
220
  }
145
221
 
146
- private:
147
- template<typename Handler>
148
- void peek_when_ready(Handler&& handler)
222
+ [[nodiscard]] auto is_ready() const -> bool
149
223
  {
150
- if (is_failed()) {
151
- return handler(std::optional<range_scan_item>{});
152
- }
224
+ return !std::holds_alternative<std::monostate>(state_);
225
+ }
153
226
 
154
- if (peeked_) {
155
- return handler(peeked_);
156
- }
227
+ [[nodiscard]] auto is_not_started() const -> bool
228
+ {
229
+ return std::holds_alternative<not_started>(state_);
230
+ }
157
231
 
158
- if (is_completed() && !items_.ready()) {
159
- return handler(std::optional<range_scan_item>{});
160
- }
232
+ [[nodiscard]] auto is_awaiting_retry() const -> bool
233
+ {
234
+ return std::holds_alternative<awaiting_retry>(state_);
235
+ }
161
236
 
162
- items_.async_receive(
163
- [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
164
- if (ec) {
165
- self->peeked_ = {};
166
- } else {
167
- self->peeked_ = std::move(item);
168
- }
169
- handler(self->peeked_);
170
- });
237
+ [[nodiscard]] auto is_running() const -> bool
238
+ {
239
+ return std::holds_alternative<running>(state_);
240
+ }
241
+
242
+ [[nodiscard]] auto is_failed() const -> bool
243
+ {
244
+ return std::holds_alternative<failed>(state_);
245
+ }
246
+
247
+ [[nodiscard]] auto is_completed() const -> bool
248
+ {
249
+ return std::holds_alternative<completed>(state_);
171
250
  }
172
251
 
252
+ private:
173
253
  template<typename Handler>
174
254
  void take_when_ready(Handler&& handler)
175
255
  {
256
+
176
257
  if (is_failed()) {
177
- return handler(std::optional<range_scan_item>{}, false);
258
+ if (error_is_fatal()) {
259
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{ error() });
260
+ } else {
261
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
262
+ }
263
+ }
264
+ if (is_awaiting_retry() || is_not_started()) {
265
+ return handler(std::optional<range_scan_item>{}, true, std::optional<std::error_code>{});
178
266
  }
179
267
  if (!items_.ready()) {
180
- return handler(std::optional<range_scan_item>{}, is_running());
268
+ return handler(std::optional<range_scan_item>{}, is_running(), std::optional<std::error_code>{});
181
269
  }
182
270
  items_.async_receive(
183
271
  [self = shared_from_this(), handler = std::forward<Handler>(handler)](std::error_code ec, range_scan_item item) mutable {
184
272
  if (ec) {
185
- return handler(std::optional<range_scan_item>{}, false);
273
+ return handler(std::optional<range_scan_item>{}, false, std::optional<std::error_code>{});
186
274
  }
187
- handler(std::optional<range_scan_item>{ std::move(item) }, true);
275
+ handler(std::optional<range_scan_item>{ std::move(item) }, true, std::optional<std::error_code>{});
188
276
  });
189
277
  }
190
278
 
@@ -211,6 +299,13 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
211
299
  if (!is_running()) {
212
300
  return;
213
301
  }
302
+ if (should_cancel_) {
303
+ agent_.range_scan_cancel(uuid(), vbucket_id_, {}, [](auto /* res */, auto /* ec */) {});
304
+ items_.close();
305
+ items_.cancel();
306
+ return;
307
+ }
308
+
214
309
  agent_.range_scan_continue(
215
310
  uuid(),
216
311
  vbucket_id_,
@@ -236,26 +331,6 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
236
331
  });
237
332
  }
238
333
 
239
- [[nodiscard]] auto is_ready() const -> bool
240
- {
241
- return !std::holds_alternative<std::monostate>(state_);
242
- }
243
-
244
- [[nodiscard]] auto is_running() const -> bool
245
- {
246
- return std::holds_alternative<running>(state_);
247
- }
248
-
249
- [[nodiscard]] auto is_failed() const -> bool
250
- {
251
- return std::holds_alternative<failed>(state_);
252
- }
253
-
254
- [[nodiscard]] auto is_completed() const -> bool
255
- {
256
- return std::holds_alternative<completed>(state_);
257
- }
258
-
259
334
  [[nodiscard]] auto uuid() const -> std::vector<std::byte>
260
335
  {
261
336
  if (is_running()) {
@@ -272,150 +347,317 @@ class range_scan_stream : public std::enable_shared_from_this<range_scan_stream>
272
347
  return {};
273
348
  }
274
349
 
350
+ [[nodiscard]] auto error_is_fatal() const -> bool
351
+ {
352
+ if (is_failed()) {
353
+ return std::get<failed>(state_).fatal;
354
+ }
355
+ return {};
356
+ }
357
+
358
+ [[nodiscard]] auto is_sampling_scan() const -> bool
359
+ {
360
+ return std::holds_alternative<sampling_scan>(create_options_.scan_type);
361
+ }
362
+
275
363
  asio::experimental::concurrent_channel<void(std::error_code, range_scan_item)> items_;
276
364
  agent agent_;
277
365
  std::uint16_t vbucket_id_;
366
+ std::int16_t node_id_;
278
367
  range_scan_create_options create_options_;
279
368
  range_scan_continue_options continue_options_;
280
- std::vector<std::byte> last_seen_key_{};
281
- std::variant<std::monostate, failed, running, completed> state_{};
282
- std::optional<range_scan_item> peeked_{};
369
+ std::shared_ptr<scan_stream_manager> stream_manager_;
370
+ std::string last_seen_key_{};
371
+ std::variant<std::monostate, not_started, failed, awaiting_retry, running, completed> state_{};
372
+ bool should_cancel_{ false };
373
+ std::optional<std::chrono::time_point<std::chrono::steady_clock>> first_attempt_timestamp_{};
283
374
  std::vector<utils::movable_function<void()>> waiting_queue_{};
284
375
  };
285
376
 
286
- struct lowest_item {
287
- std::uint16_t vbucket_id;
288
- std::vector<std::byte> key;
289
- };
290
-
291
- static auto
292
- less(std::vector<std::byte>& a, std::vector<std::byte>& b) -> bool
293
- {
294
- auto common_size = std::min(a.size(), b.size());
295
- for (std::size_t i = 0; i < common_size; ++i) {
296
- if (a[i] < b[i]) {
297
- return true;
298
- }
299
- if (a[i] > b[i]) {
300
- return false;
301
- }
302
- }
303
- return a.size() < b.size();
304
- }
305
-
306
377
  class range_scan_orchestrator_impl
307
378
  : public std::enable_shared_from_this<range_scan_orchestrator_impl>
308
379
  , public range_scan_item_iterator
380
+ , public scan_stream_manager
309
381
  {
310
382
  public:
311
383
  range_scan_orchestrator_impl(asio::io_context& io,
312
384
  agent kv_provider,
313
- std::size_t num_vbuckets,
385
+ topology::configuration::vbucket_map vbucket_map,
314
386
  std::string scope_name,
315
387
  std::string collection_name,
316
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
388
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
317
389
  range_scan_orchestrator_options options)
318
390
  : io_{ io }
319
391
  , agent_{ std::move(kv_provider) }
320
- , num_vbuckets_{ num_vbuckets }
392
+ , vbucket_map_{ std::move(vbucket_map) }
321
393
  , scope_name_{ std::move(scope_name) }
322
394
  , collection_name_{ std::move(collection_name) }
323
395
  , scan_type_{ std::move(scan_type) }
324
396
  , options_{ std::move(options) }
325
397
  , vbucket_to_snapshot_requirements_{ mutation_state_to_snapshot_requirements(options_.consistent_with) }
398
+ , concurrency_{ options_.concurrency }
326
399
  {
400
+
327
401
  if (std::holds_alternative<sampling_scan>(scan_type_)) {
328
- item_limit = std::get<sampling_scan>(scan_type).limit;
402
+ item_limit_ = std::get<sampling_scan>(scan_type).limit;
329
403
  }
330
404
  }
331
405
 
332
406
  auto scan() -> tl::expected<scan_result, std::error_code>
333
407
  {
334
- if (item_limit == 0) {
408
+ if (item_limit_ == 0) {
409
+ return tl::unexpected(errc::common::invalid_argument);
410
+ }
411
+ if (concurrency_ <= 0) {
335
412
  return tl::unexpected(errc::common::invalid_argument);
336
413
  }
337
- range_scan_continue_options continue_options{
338
- options_.batch_item_limit, options_.batch_byte_limit, options_.batch_time_limit, options_.retry_strategy, options_.ids_only,
414
+
415
+ // Get the collection ID before starting any of the streams
416
+ {
417
+ auto barrier = std::make_shared<std::promise<tl::expected<get_collection_id_result, std::error_code>>>();
418
+ auto f = barrier->get_future();
419
+ get_collection_id_options const get_cid_options{ options_.retry_strategy, options_.timeout, options_.parent_span };
420
+ agent_.get_collection_id(scope_name_, collection_name_, get_cid_options, [barrier](auto result, auto ec) {
421
+ if (ec) {
422
+ return barrier->set_value(tl::unexpected(ec));
423
+ }
424
+ barrier->set_value(result);
425
+ });
426
+ auto get_cid_res = f.get();
427
+ if (!get_cid_res.has_value()) {
428
+ return tl::unexpected(get_cid_res.error());
429
+ }
430
+ collection_id_ = get_cid_res->collection_id;
431
+ }
432
+
433
+ auto batch_time_limit = std::chrono::duration_cast<std::chrono::milliseconds>(0.9 * options_.timeout);
434
+ range_scan_continue_options const continue_options{
435
+ options_.batch_item_limit, options_.batch_byte_limit, batch_time_limit, options_.timeout, options_.retry_strategy,
339
436
  };
340
- continue_options.batch_time_limit = std::chrono::seconds{ 10 };
341
- for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(num_vbuckets_); ++vbucket) {
437
+ for (std::uint16_t vbucket = 0; vbucket < gsl::narrow_cast<std::uint16_t>(vbucket_map_.size()); ++vbucket) {
438
+ const range_scan_create_options create_options{
439
+ scope_name_, {},
440
+ scan_type_, options_.timeout,
441
+ collection_id_, vbucket_to_snapshot_requirements_[vbucket],
442
+ options_.ids_only, options_.retry_strategy,
443
+ };
444
+
445
+ // Get the active node for the vbucket (values in vbucket map are the active node id followed by the ids of the replicas)
446
+ auto node_id = vbucket_map_[vbucket][0];
447
+
342
448
  auto stream = std::make_shared<range_scan_stream>(io_,
343
449
  agent_,
344
450
  vbucket,
345
- range_scan_create_options{
346
- scope_name_,
347
- collection_name_,
348
- scan_type_,
349
- options_.timeout,
350
- {},
351
- vbucket_to_snapshot_requirements_[vbucket],
352
- options_.ids_only,
353
- options_.retry_strategy,
354
- },
355
- continue_options);
451
+ node_id,
452
+ create_options,
453
+ continue_options,
454
+ std::static_pointer_cast<scan_stream_manager>(shared_from_this()));
356
455
  streams_[vbucket] = stream;
357
- stream->start();
456
+ streams_[vbucket]->mark_not_started();
457
+ if (stream_count_per_node_.count(node_id) == 0) {
458
+ stream_count_per_node_[node_id] = 0;
459
+ }
358
460
  }
461
+ start_streams(concurrency_);
359
462
 
360
463
  return scan_result(shared_from_this());
361
464
  }
362
465
 
363
- auto next() -> std::future<std::optional<range_scan_item>> override
466
+ void cancel() override
467
+ {
468
+ cancelled_ = true;
469
+ for (const auto& [vbucket_id, stream] : streams_) {
470
+ stream->cancel();
471
+ }
472
+ }
473
+
474
+ bool is_cancelled() override
475
+ {
476
+ return cancelled_;
477
+ }
478
+
479
+ auto next() -> std::future<tl::expected<range_scan_item, std::error_code>> override
364
480
  {
365
- auto barrier = std::make_shared<std::promise<std::optional<range_scan_item>>>();
366
- if (item_limit == 0 || item_limit-- == 0) {
367
- barrier->set_value(std::nullopt);
368
- streams_.clear();
481
+ auto barrier = std::make_shared<std::promise<tl::expected<range_scan_item, std::error_code>>>();
482
+ if (item_limit_ == 0 || item_limit_-- == 0) {
483
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
484
+ cancel();
369
485
  } else {
370
- if (options_.sort == scan_sort::none) {
371
- next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
372
- } else {
373
- next_item_sorted(
374
- {}, streams_.begin(), [barrier](std::optional<range_scan_item> item) { barrier->set_value(std::move(item)); });
375
- }
486
+ next_item(streams_.begin(), [barrier](std::optional<range_scan_item> item, std::optional<std::error_code> ec) {
487
+ if (item) {
488
+ barrier->set_value(std::move(item.value()));
489
+ } else if (ec) {
490
+ barrier->set_value(tl::unexpected{ ec.value() });
491
+ } else {
492
+ barrier->set_value(tl::unexpected{ errc::key_value::range_scan_completed });
493
+ }
494
+ });
376
495
  }
377
496
  return barrier->get_future();
378
497
  }
379
498
 
380
499
  void next(utils::movable_function<void(range_scan_item, std::error_code)> callback) override
381
500
  {
382
- auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item) mutable {
501
+ auto handler = [callback = std::move(callback)](std::optional<range_scan_item> item, std::optional<std::error_code> ec) mutable {
383
502
  if (item) {
384
503
  callback(std::move(item.value()), {});
504
+ } else if (ec) {
505
+ callback({}, ec.value());
385
506
  } else {
386
507
  callback({}, errc::key_value::range_scan_completed);
387
508
  }
388
509
  };
389
- if (item_limit == 0 || item_limit-- == 0) {
390
- handler({});
510
+ if (item_limit_ == 0 || item_limit_-- == 0) {
511
+ handler({}, {});
512
+ cancel();
391
513
  } else {
392
- if (options_.sort == scan_sort::none) {
393
- next_item(streams_.begin(), std::move(handler));
394
- } else {
395
- next_item_sorted({}, streams_.begin(), std::move(handler));
514
+ next_item(streams_.begin(), std::move(handler));
515
+ }
516
+ }
517
+
518
+ void start_streams(std::uint16_t stream_count)
519
+ {
520
+ std::lock_guard<std::recursive_mutex> const lock(stream_start_mutex_);
521
+
522
+ if (cancelled_) {
523
+ CB_LOG_TRACE("scan has been cancelled, do not start another stream");
524
+ return;
525
+ }
526
+
527
+ if (stream_count_per_node_.empty()) {
528
+ CB_LOG_TRACE("no more vbuckets to scan");
529
+ return;
530
+ }
531
+
532
+ std::uint16_t counter = 0;
533
+ while (counter < stream_count) {
534
+ // Find the node with the least number of active streams from those recorded in stream_count_per_node_
535
+ int16_t least_busy_node{};
536
+ {
537
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
538
+
539
+ // Pick a random node
540
+ std::random_device rd;
541
+ std::mt19937_64 gen(rd());
542
+ std::uniform_int_distribution<std::size_t> dis(0, stream_count_per_node_.size() - 1);
543
+ auto it = stream_count_per_node_.begin();
544
+ std::advance(it, static_cast<decltype(stream_count_per_node_)::difference_type>(dis(gen)));
545
+ least_busy_node = it->first;
546
+
547
+ // If any other node has fewer streams running use that
548
+ for (const auto& [node_id, count] : stream_count_per_node_) {
549
+ if (count < stream_count_per_node_[least_busy_node]) {
550
+ least_busy_node = node_id;
551
+ }
552
+ }
396
553
  }
554
+
555
+ std::shared_ptr<range_scan_stream> stream{};
556
+ {
557
+ std::lock_guard<std::mutex> const stream_map_lock(stream_map_mutex_);
558
+
559
+ for (const auto& [v, s] : streams_) {
560
+ if ((s->is_not_started() || s->is_awaiting_retry()) && (s->node_id() == least_busy_node)) {
561
+ CB_LOG_TRACE("selected vbucket {} to scan", v);
562
+ stream = s;
563
+ break;
564
+ }
565
+ }
566
+ }
567
+
568
+ if (stream == nullptr) {
569
+ CB_LOG_TRACE("no vbuckets to scan for node {}", least_busy_node);
570
+ {
571
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
572
+ stream_count_per_node_.erase(least_busy_node);
573
+ }
574
+ return start_streams(static_cast<std::uint16_t>(stream_count - counter));
575
+ }
576
+
577
+ auto node_id = stream->node_id();
578
+ active_stream_count_++;
579
+ stream_count_per_node_[node_id]++;
580
+ stream->start();
581
+ counter++;
582
+ }
583
+ }
584
+
585
+ void stream_start_failed(std::int16_t node_id, bool fatal) override
586
+ {
587
+ stream_no_longer_running(node_id);
588
+ if (fatal) {
589
+ cancel();
590
+ } else {
591
+ start_streams(1);
592
+ }
593
+ }
594
+
595
+ void stream_start_failed_awaiting_retry(std::int16_t node_id, std::uint16_t /* vbucket_id */) override
596
+ {
597
+ {
598
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
599
+ if (stream_count_per_node_.count(node_id) == 0) {
600
+ stream_count_per_node_[node_id] = 1;
601
+ }
602
+ }
603
+ stream_no_longer_running(node_id);
604
+ if (active_stream_count_ == 0) {
605
+ start_streams(1);
397
606
  }
398
607
  }
399
608
 
609
+ void stream_continue_failed(std::int16_t node_id, bool fatal) override
610
+ {
611
+ stream_no_longer_running(node_id);
612
+ if (fatal) {
613
+ cancel();
614
+ } else {
615
+ start_streams(1);
616
+ }
617
+ }
618
+
619
+ void stream_completed(std::int16_t node_id) override
620
+ {
621
+ stream_no_longer_running(node_id);
622
+ start_streams(1);
623
+ }
624
+
400
625
  private:
626
+ void stream_no_longer_running(std::int16_t node_id)
627
+ {
628
+ {
629
+ std::lock_guard<std::mutex> const stream_count_lock(stream_count_per_node_mutex_);
630
+ if (stream_count_per_node_.count(node_id) > 0) {
631
+ stream_count_per_node_[node_id]--;
632
+ }
633
+ }
634
+ active_stream_count_--;
635
+ }
636
+
401
637
  template<typename Iterator, typename Handler>
402
638
  void next_item(Iterator it, Handler&& handler)
403
639
  {
404
- if (streams_.empty()) {
405
- return handler({});
640
+ if (streams_.empty() || cancelled_) {
641
+ return handler({}, {});
406
642
  }
407
643
  auto vbucket_id = it->first;
408
644
  auto stream = it->second;
409
645
  stream->take([it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
410
- auto item, bool has_more) mutable {
646
+ auto item, bool has_more, auto ec) mutable {
647
+ if (ec) {
648
+ // Fatal error
649
+ self->streams_.clear();
650
+ return handler({}, ec);
651
+ }
411
652
  if (!has_more) {
653
+ std::lock_guard<std::mutex> const lock(self->stream_map_mutex_);
412
654
  self->streams_.erase(vbucket_id);
413
655
  }
414
656
  if (item) {
415
- return handler(std::move(item));
657
+ return handler(std::move(item), {});
416
658
  }
417
659
  if (self->streams_.empty()) {
418
- return handler({});
660
+ return handler({}, {});
419
661
  }
420
662
  if (it == self->streams_.end()) {
421
663
  it = self->streams_.begin();
@@ -426,60 +668,36 @@ class range_scan_orchestrator_impl
426
668
  });
427
669
  }
428
670
 
429
- template<typename Iterator, typename Handler>
430
- void next_item_sorted(std::optional<lowest_item> lowest, Iterator it, Handler&& handler)
431
- {
432
- if (streams_.empty()) {
433
- return handler({});
434
- }
435
- auto vbucket_id = it->first;
436
- auto stream = it->second;
437
- stream->peek(
438
- [lowest = std::move(lowest), it = std::next(it), vbucket_id, self = shared_from_this(), handler = std::forward<Handler>(handler)](
439
- auto item) mutable {
440
- if (item) {
441
- if (!lowest || less(item->key, lowest->key)) {
442
- lowest = { vbucket_id, item->key };
443
- }
444
- } else {
445
- self->streams_.erase(vbucket_id);
446
- }
447
-
448
- if (it != self->streams_.end()) {
449
- return asio::post(asio::bind_executor(
450
- self->io_, [lowest = std::move(lowest), it, self, handler = std::forward<Handler>(handler)]() mutable {
451
- self->next_item_sorted(std::move(lowest), it, std::forward<Handler>(handler));
452
- }));
453
- } else if (lowest) {
454
- return handler(self->streams_[lowest->vbucket_id]->pop());
455
- } else {
456
- return handler({});
457
- }
458
- });
459
- }
460
-
461
671
  asio::io_context& io_;
462
672
  agent agent_;
463
- std::size_t num_vbuckets_;
673
+ topology::configuration::vbucket_map vbucket_map_;
464
674
  std::string scope_name_;
465
675
  std::string collection_name_;
466
- std::variant<std::monostate, range_scan, sampling_scan> scan_type_;
676
+ std::uint32_t collection_id_;
677
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type_;
467
678
  range_scan_orchestrator_options options_;
468
679
  std::map<std::size_t, std::optional<range_snapshot_requirements>> vbucket_to_snapshot_requirements_;
469
680
  std::map<std::uint16_t, std::shared_ptr<range_scan_stream>> streams_{};
470
- std::size_t item_limit{ std::numeric_limits<size_t>::max() };
681
+ std::map<std::int16_t, std::atomic_uint16_t> stream_count_per_node_{};
682
+ std::recursive_mutex stream_start_mutex_{};
683
+ std::mutex stream_map_mutex_{};
684
+ std::mutex stream_count_per_node_mutex_{};
685
+ std::atomic_uint16_t active_stream_count_ = 0;
686
+ std::uint16_t concurrency_ = 1;
687
+ std::size_t item_limit_{ std::numeric_limits<size_t>::max() };
688
+ bool cancelled_{ false };
471
689
  };
472
690
 
473
691
  range_scan_orchestrator::range_scan_orchestrator(asio::io_context& io,
474
692
  agent kv_provider,
475
- std::size_t num_vbuckets,
693
+ topology::configuration::vbucket_map vbucket_map,
476
694
  std::string scope_name,
477
695
  std::string collection_name,
478
- std::variant<std::monostate, range_scan, sampling_scan> scan_type,
696
+ std::variant<std::monostate, range_scan, prefix_scan, sampling_scan> scan_type,
479
697
  range_scan_orchestrator_options options)
480
698
  : impl_{ std::make_shared<range_scan_orchestrator_impl>(io,
481
699
  std::move(kv_provider),
482
- num_vbuckets,
700
+ std::move(vbucket_map),
483
701
  std::move(scope_name),
484
702
  std::move(collection_name),
485
703
  std::move(scan_type),