@temporalio/core-bridge 0.20.2 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +137 -127
- package/index.d.ts +7 -2
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/docker-compose.yaml +5 -4
- package/sdk-core/client/Cargo.toml +1 -0
- package/sdk-core/client/src/lib.rs +52 -9
- package/sdk-core/client/src/raw.rs +9 -1
- package/sdk-core/client/src/retry.rs +12 -1
- package/sdk-core/client/src/workflow_handle/mod.rs +183 -0
- package/sdk-core/core/src/abstractions.rs +10 -3
- package/sdk-core/core/src/core_tests/child_workflows.rs +7 -9
- package/sdk-core/core/src/core_tests/determinism.rs +8 -19
- package/sdk-core/core/src/core_tests/local_activities.rs +22 -32
- package/sdk-core/core/src/core_tests/queries.rs +272 -5
- package/sdk-core/core/src/core_tests/workers.rs +4 -34
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +197 -41
- package/sdk-core/core/src/pending_activations.rs +11 -0
- package/sdk-core/core/src/telemetry/mod.rs +1 -1
- package/sdk-core/core/src/test_help/mod.rs +57 -7
- package/sdk-core/core/src/worker/mod.rs +64 -15
- package/sdk-core/core/src/workflow/machines/mod.rs +1 -1
- package/sdk-core/core/src/workflow/machines/timer_state_machine.rs +2 -2
- package/sdk-core/core/src/workflow/machines/workflow_machines.rs +14 -3
- package/sdk-core/core/src/workflow/mod.rs +5 -2
- package/sdk-core/core/src/workflow/workflow_tasks/cache_manager.rs +47 -2
- package/sdk-core/core/src/workflow/workflow_tasks/concurrency_manager.rs +16 -2
- package/sdk-core/core/src/workflow/workflow_tasks/mod.rs +252 -125
- package/sdk-core/core-api/src/worker.rs +9 -0
- package/sdk-core/sdk/Cargo.toml +1 -0
- package/sdk-core/sdk/src/activity_context.rs +223 -0
- package/sdk-core/sdk/src/interceptors.rs +8 -2
- package/sdk-core/sdk/src/lib.rs +167 -122
- package/sdk-core/sdk-core-protos/src/history_info.rs +3 -7
- package/sdk-core/test-utils/Cargo.toml +1 -0
- package/sdk-core/test-utils/src/lib.rs +78 -37
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +11 -4
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +0 -1
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +0 -3
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +33 -17
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +10 -1
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +0 -1
- package/sdk-core/tests/integ_tests/workflow_tests.rs +71 -3
- package/sdk-core/tests/load_tests.rs +80 -6
- package/src/errors.rs +9 -2
- package/src/lib.rs +39 -16
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
|
@@ -23,6 +23,7 @@ use futures::FutureExt;
|
|
|
23
23
|
use parking_lot::Mutex;
|
|
24
24
|
use std::{
|
|
25
25
|
fmt::Debug,
|
|
26
|
+
future::Future,
|
|
26
27
|
ops::Add,
|
|
27
28
|
sync::Arc,
|
|
28
29
|
time::{Duration, Instant},
|
|
@@ -57,7 +58,7 @@ pub struct WorkflowTaskManager {
|
|
|
57
58
|
pending_activations: PendingActivations,
|
|
58
59
|
/// Holds activations which are purely query activations needed to respond to legacy queries.
|
|
59
60
|
/// Activations may only be added here for runs which do not have other pending activations.
|
|
60
|
-
|
|
61
|
+
pending_queries: SegQueue<WorkflowActivation>,
|
|
61
62
|
/// Holds poll wft responses from the server that need to be applied
|
|
62
63
|
ready_buffered_wft: SegQueue<ValidPollWFTQResponse>,
|
|
63
64
|
/// Used to wake blocked workflow task polling
|
|
@@ -74,9 +75,8 @@ pub struct WorkflowTaskManager {
|
|
|
74
75
|
#[derive(Clone, Debug)]
|
|
75
76
|
pub(crate) struct OutstandingTask {
|
|
76
77
|
pub info: WorkflowTaskInfo,
|
|
77
|
-
///
|
|
78
|
-
|
|
79
|
-
pub legacy_query: Option<QueryWorkflow>,
|
|
78
|
+
/// Set if the outstanding task has quer(ies) which must be fulfilled upon finishing replay
|
|
79
|
+
pub pending_queries: Vec<QueryWorkflow>,
|
|
80
80
|
start_time: Instant,
|
|
81
81
|
}
|
|
82
82
|
|
|
@@ -86,17 +86,29 @@ pub(crate) enum OutstandingActivation {
|
|
|
86
86
|
Normal {
|
|
87
87
|
/// True if there is an eviction in the joblist
|
|
88
88
|
contains_eviction: bool,
|
|
89
|
+
/// Number of jobs in the activation
|
|
90
|
+
num_jobs: usize,
|
|
89
91
|
},
|
|
90
92
|
/// An activation for a legacy query
|
|
91
93
|
LegacyQuery,
|
|
92
94
|
}
|
|
93
95
|
|
|
94
96
|
impl OutstandingActivation {
|
|
97
|
+
const fn has_only_eviction(self) -> bool {
|
|
98
|
+
matches!(
|
|
99
|
+
self,
|
|
100
|
+
OutstandingActivation::Normal {
|
|
101
|
+
contains_eviction: true,
|
|
102
|
+
num_jobs: nj
|
|
103
|
+
}
|
|
104
|
+
if nj == 1)
|
|
105
|
+
}
|
|
95
106
|
const fn has_eviction(self) -> bool {
|
|
96
107
|
matches!(
|
|
97
108
|
self,
|
|
98
109
|
OutstandingActivation::Normal {
|
|
99
|
-
contains_eviction: true
|
|
110
|
+
contains_eviction: true,
|
|
111
|
+
..
|
|
100
112
|
}
|
|
101
113
|
)
|
|
102
114
|
}
|
|
@@ -152,9 +164,9 @@ pub(crate) enum ActivationAction {
|
|
|
152
164
|
|
|
153
165
|
#[derive(Debug, Eq, PartialEq, Hash)]
|
|
154
166
|
pub(crate) enum EvictionRequestResult {
|
|
155
|
-
|
|
167
|
+
EvictionRequested(Option<u32>),
|
|
156
168
|
NotFound,
|
|
157
|
-
|
|
169
|
+
EvictionAlreadyRequested(Option<u32>),
|
|
158
170
|
}
|
|
159
171
|
|
|
160
172
|
macro_rules! machine_mut {
|
|
@@ -179,7 +191,7 @@ impl WorkflowTaskManager {
|
|
|
179
191
|
Self {
|
|
180
192
|
workflow_machines: WorkflowConcurrencyManager::new(),
|
|
181
193
|
pending_activations: Default::default(),
|
|
182
|
-
|
|
194
|
+
pending_queries: Default::default(),
|
|
183
195
|
ready_buffered_wft: Default::default(),
|
|
184
196
|
pending_activations_notifier,
|
|
185
197
|
cache_manager: Mutex::new(WorkflowCacheManager::new(eviction_policy, metrics.clone())),
|
|
@@ -187,9 +199,69 @@ impl WorkflowTaskManager {
|
|
|
187
199
|
}
|
|
188
200
|
}
|
|
189
201
|
|
|
202
|
+
/// Returns number of currently cached workflows
|
|
203
|
+
pub fn cached_workflows(&self) -> usize {
|
|
204
|
+
self.workflow_machines.cached_workflows()
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/// Resolves once there is either capacity in the cache, or there are no pending evictions.
|
|
208
|
+
/// Inversely: Waits while there are pending evictions and the cache is full.
|
|
209
|
+
/// Waiting while there are no pending evictions must be avoided because it would block forever,
|
|
210
|
+
/// since there is no way for the cache size to be reduced.
|
|
211
|
+
pub fn wait_for_cache_capacity(&self) -> Option<impl Future<Output = ()> + '_> {
|
|
212
|
+
let are_no_pending_evictions = || {
|
|
213
|
+
!self.pending_activations.is_some_eviction()
|
|
214
|
+
&& !self.workflow_machines.are_outstanding_evictions()
|
|
215
|
+
};
|
|
216
|
+
if !are_no_pending_evictions() {
|
|
217
|
+
let wait_fut = {
|
|
218
|
+
self.cache_manager
|
|
219
|
+
.lock()
|
|
220
|
+
.wait_for_capacity(are_no_pending_evictions)?
|
|
221
|
+
};
|
|
222
|
+
return Some(wait_fut);
|
|
223
|
+
}
|
|
224
|
+
None
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/// Add a new run (as just received from polling) to the cache. If doing so would overflow the
|
|
228
|
+
/// cache, an eviction is queued to make room and the passed-in task is buffered and `None` is
|
|
229
|
+
/// returned.
|
|
230
|
+
///
|
|
231
|
+
/// If the task is for a run already in the cache, the poll response is returned right away
|
|
232
|
+
/// and should be issued.
|
|
233
|
+
pub async fn add_new_run_to_cache(
|
|
234
|
+
&self,
|
|
235
|
+
poll_resp: ValidPollWFTQResponse,
|
|
236
|
+
) -> Option<ValidPollWFTQResponse> {
|
|
237
|
+
let run_id = &poll_resp.workflow_execution.run_id;
|
|
238
|
+
let maybe_evicted = self.cache_manager.lock().insert(run_id);
|
|
239
|
+
|
|
240
|
+
if let Some(evicted_run_id) = maybe_evicted {
|
|
241
|
+
self.request_eviction(
|
|
242
|
+
&evicted_run_id,
|
|
243
|
+
"Workflow cache full",
|
|
244
|
+
EvictionReason::CacheFull,
|
|
245
|
+
);
|
|
246
|
+
debug!(run_id=%poll_resp.workflow_execution.run_id,
|
|
247
|
+
"Received a WFT for a new run while at the cache limit. Buffering the task.");
|
|
248
|
+
// Buffer the task
|
|
249
|
+
if let Some(not_buffered) = self
|
|
250
|
+
.workflow_machines
|
|
251
|
+
.buffer_resp_if_outstanding_work(poll_resp)
|
|
252
|
+
{
|
|
253
|
+
self.make_buffered_poll_ready(not_buffered);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
return None;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
Some(poll_resp)
|
|
260
|
+
}
|
|
261
|
+
|
|
190
262
|
pub(crate) fn next_pending_activation(&self) -> Option<WorkflowActivation> {
|
|
191
|
-
// Dispatch pending
|
|
192
|
-
if let leg_q @ Some(_) = self.
|
|
263
|
+
// Dispatch pending queries first
|
|
264
|
+
if let leg_q @ Some(_) = self.pending_queries.pop() {
|
|
193
265
|
return leg_q;
|
|
194
266
|
}
|
|
195
267
|
// It is important that we do not issue pending activations for any workflows which already
|
|
@@ -205,15 +277,24 @@ impl WorkflowTaskManager {
|
|
|
205
277
|
.workflow_machines
|
|
206
278
|
.access_sync(&pending_info.run_id, |wfm| wfm.machines.get_wf_activation())
|
|
207
279
|
.and_then(|mut act| {
|
|
208
|
-
|
|
209
|
-
|
|
280
|
+
// Only evict workflows after all other pending work is complete.
|
|
281
|
+
if act.jobs.is_empty() {
|
|
282
|
+
if let Some(reason) = pending_info.needs_eviction {
|
|
283
|
+
act.append_evict_job(reason);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if !act.jobs.is_empty() {
|
|
287
|
+
self.insert_outstanding_activation(&act)?;
|
|
288
|
+
self.cache_manager.lock().touch(&act.run_id);
|
|
289
|
+
Ok(Some(act))
|
|
290
|
+
} else {
|
|
291
|
+
// If for whatever reason we triggered a pending activation but there wasn't
|
|
292
|
+
// actually any work to be done, just ignore that.
|
|
293
|
+
Ok(None)
|
|
210
294
|
}
|
|
211
|
-
self.insert_outstanding_activation(&act)?;
|
|
212
|
-
Ok(act)
|
|
213
295
|
})
|
|
214
296
|
{
|
|
215
|
-
|
|
216
|
-
Some(act)
|
|
297
|
+
act
|
|
217
298
|
} else {
|
|
218
299
|
self.request_eviction(
|
|
219
300
|
&pending_info.run_id,
|
|
@@ -256,6 +337,10 @@ impl WorkflowTaskManager {
|
|
|
256
337
|
reason: EvictionReason,
|
|
257
338
|
) -> EvictionRequestResult {
|
|
258
339
|
if self.workflow_machines.exists(run_id) {
|
|
340
|
+
let attempts = self
|
|
341
|
+
.workflow_machines
|
|
342
|
+
.get_task(run_id)
|
|
343
|
+
.map(|wt| wt.info.attempt);
|
|
259
344
|
if !self.activation_has_eviction(run_id) {
|
|
260
345
|
let message = message.into();
|
|
261
346
|
debug!(%run_id, %message, "Eviction requested");
|
|
@@ -263,13 +348,9 @@ impl WorkflowTaskManager {
|
|
|
263
348
|
self.pending_activations
|
|
264
349
|
.notify_needs_eviction(run_id, message, reason);
|
|
265
350
|
self.pending_activations_notifier.notify_waiters();
|
|
266
|
-
EvictionRequestResult::
|
|
267
|
-
self.workflow_machines
|
|
268
|
-
.get_task(run_id)
|
|
269
|
-
.map(|wt| wt.info.attempt),
|
|
270
|
-
)
|
|
351
|
+
EvictionRequestResult::EvictionRequested(attempts)
|
|
271
352
|
} else {
|
|
272
|
-
EvictionRequestResult::
|
|
353
|
+
EvictionRequestResult::EvictionAlreadyRequested(attempts)
|
|
273
354
|
}
|
|
274
355
|
} else {
|
|
275
356
|
warn!(%run_id, "Eviction requested for unknown run");
|
|
@@ -277,11 +358,8 @@ impl WorkflowTaskManager {
|
|
|
277
358
|
}
|
|
278
359
|
}
|
|
279
360
|
|
|
280
|
-
/// Evict a workflow from the cache by its run id
|
|
281
|
-
///
|
|
282
|
-
/// activations invalidated.
|
|
283
|
-
///
|
|
284
|
-
/// Returns that workflow's task info if it was present.
|
|
361
|
+
/// Evict a workflow from the cache by its run id. Any existing pending activations will be
|
|
362
|
+
/// destroyed, and any outstanding activations invalidated.
|
|
285
363
|
fn evict_run(&self, run_id: &str) {
|
|
286
364
|
debug!(run_id=%run_id, "Evicting run");
|
|
287
365
|
|
|
@@ -333,33 +411,45 @@ impl WorkflowTaskManager {
|
|
|
333
411
|
.take()
|
|
334
412
|
.map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
|
|
335
413
|
|
|
336
|
-
let (info, mut next_activation) =
|
|
414
|
+
let (info, mut next_activation, mut pending_queries) =
|
|
337
415
|
match self.instantiate_or_update_workflow(work, client).await {
|
|
338
|
-
Ok(
|
|
416
|
+
Ok(res) => res,
|
|
339
417
|
Err(e) => {
|
|
340
418
|
return NewWfTaskOutcome::Evict(e);
|
|
341
419
|
}
|
|
342
420
|
};
|
|
343
421
|
|
|
422
|
+
if !pending_queries.is_empty() && legacy_query.is_some() {
|
|
423
|
+
error!(
|
|
424
|
+
"Server issued both normal and legacy queries. This should not happen. Please \
|
|
425
|
+
file a bug report."
|
|
426
|
+
);
|
|
427
|
+
return NewWfTaskOutcome::Evict(WorkflowUpdateError {
|
|
428
|
+
source: WFMachinesError::Fatal(
|
|
429
|
+
"Server issued both normal and legacy query".to_string(),
|
|
430
|
+
),
|
|
431
|
+
run_id: next_activation.run_id,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
|
|
344
435
|
// Immediately dispatch query activation if no other jobs
|
|
345
|
-
let
|
|
346
|
-
if
|
|
436
|
+
if let Some(lq) = legacy_query {
|
|
437
|
+
if next_activation.jobs.is_empty() {
|
|
347
438
|
debug!("Dispatching legacy query {}", &lq);
|
|
348
439
|
next_activation
|
|
349
440
|
.jobs
|
|
350
441
|
.push(workflow_activation_job::Variant::QueryWorkflow(lq).into());
|
|
442
|
+
} else {
|
|
443
|
+
pending_queries.push(lq);
|
|
351
444
|
}
|
|
352
|
-
|
|
353
|
-
} else {
|
|
354
|
-
legacy_query
|
|
355
|
-
};
|
|
445
|
+
}
|
|
356
446
|
|
|
357
447
|
self.workflow_machines
|
|
358
448
|
.insert_wft(
|
|
359
449
|
&next_activation.run_id,
|
|
360
450
|
OutstandingTask {
|
|
361
451
|
info,
|
|
362
|
-
|
|
452
|
+
pending_queries,
|
|
363
453
|
start_time: task_start_time,
|
|
364
454
|
},
|
|
365
455
|
)
|
|
@@ -396,20 +486,23 @@ impl WorkflowTaskManager {
|
|
|
396
486
|
mut commands: Vec<WFCommand>,
|
|
397
487
|
local_activity_request_sink: impl FnOnce(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>,
|
|
398
488
|
) -> Result<Option<ServerCommandsWithWorkflowInfo>, WorkflowUpdateError> {
|
|
399
|
-
//
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
489
|
+
// There used to be code here that would return right away if the run reply had no commands
|
|
490
|
+
// and the activation that was just completed only had an eviction in it. That was bad
|
|
491
|
+
// because we wouldn't have yet sent any previously buffered commands since there was a
|
|
492
|
+
// pending activation (the eviction) and then we would *skip* doing anything with them here,
|
|
493
|
+
// because there were no new commands. In general it seems best to avoid short-circuiting
|
|
494
|
+
// here.
|
|
495
|
+
|
|
496
|
+
let activation_was_only_eviction = self.activation_has_only_eviction(run_id);
|
|
497
|
+
let (task_token, has_pending_query, start_time) =
|
|
405
498
|
if let Some(entry) = self.workflow_machines.get_task(run_id) {
|
|
406
499
|
(
|
|
407
500
|
entry.info.task_token.clone(),
|
|
408
|
-
entry.
|
|
501
|
+
!entry.pending_queries.is_empty(),
|
|
409
502
|
entry.start_time,
|
|
410
503
|
)
|
|
411
504
|
} else {
|
|
412
|
-
if !
|
|
505
|
+
if !activation_was_only_eviction {
|
|
413
506
|
// Don't bother warning if this was an eviction, since it's normal to issue
|
|
414
507
|
// eviction activations without an associated workflow task in that case.
|
|
415
508
|
warn!(
|
|
@@ -458,6 +551,7 @@ impl WorkflowTaskManager {
|
|
|
458
551
|
}
|
|
459
552
|
}
|
|
460
553
|
|
|
554
|
+
let activation_was_eviction = self.activation_has_eviction(run_id);
|
|
461
555
|
let (are_pending, server_cmds, local_activities, wft_timeout) = machine_mut!(
|
|
462
556
|
self,
|
|
463
557
|
run_id,
|
|
@@ -466,7 +560,13 @@ impl WorkflowTaskManager {
|
|
|
466
560
|
// Send commands from lang into the machines then check if the workflow run
|
|
467
561
|
// needs another activation and mark it if so
|
|
468
562
|
wfm.push_commands(commands).await?;
|
|
469
|
-
|
|
563
|
+
// Don't bother applying the next task if we're evicting at the end of
|
|
564
|
+
// this activation
|
|
565
|
+
let are_pending = if !activation_was_eviction {
|
|
566
|
+
wfm.apply_next_task_if_ready().await?
|
|
567
|
+
} else {
|
|
568
|
+
false
|
|
569
|
+
};
|
|
470
570
|
// We want to fetch the outgoing commands only after a next WFT may have
|
|
471
571
|
// been applied, as outgoing server commands may be affected.
|
|
472
572
|
let outgoing_cmds = wfm.get_server_commands();
|
|
@@ -506,36 +606,34 @@ impl WorkflowTaskManager {
|
|
|
506
606
|
let must_heartbeat = self
|
|
507
607
|
.wait_for_local_acts_or_heartbeat(run_id, wft_heartbeat_deadline)
|
|
508
608
|
.await;
|
|
509
|
-
let
|
|
609
|
+
let has_query_responses = !query_responses.is_empty();
|
|
610
|
+
let is_query_playback = has_pending_query && !has_query_responses;
|
|
510
611
|
|
|
511
612
|
// We only actually want to send commands back to the server if there are no more
|
|
512
613
|
// pending activations and we are caught up on replay. We don't want to complete a wft
|
|
513
614
|
// if we already saw the final event in the workflow, or if we are playing back for the
|
|
514
|
-
// express purpose of fulfilling a query
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
615
|
+
// express purpose of fulfilling a query. If the activation we sent was *only* an
|
|
616
|
+
// eviction, and there were no commands produced during iteration, don't send that
|
|
617
|
+
// either.
|
|
618
|
+
let no_commands_and_evicting =
|
|
619
|
+
server_cmds.commands.is_empty() && activation_was_only_eviction;
|
|
620
|
+
let to_be_sent = ServerCommandsWithWorkflowInfo {
|
|
621
|
+
task_token,
|
|
622
|
+
action: ActivationAction::WftComplete {
|
|
623
|
+
// TODO: Don't force if also sending complete execution cmd
|
|
624
|
+
force_new_wft: must_heartbeat,
|
|
625
|
+
commands: server_cmds.commands,
|
|
626
|
+
query_responses,
|
|
627
|
+
},
|
|
628
|
+
};
|
|
629
|
+
let should_respond = !(self.pending_activations.has_pending(run_id)
|
|
630
|
+
|| server_cmds.replaying
|
|
631
|
+
|| is_query_playback
|
|
632
|
+
|| no_commands_and_evicting);
|
|
633
|
+
if should_respond || has_query_responses {
|
|
634
|
+
Some(to_be_sent)
|
|
530
635
|
} else {
|
|
531
|
-
|
|
532
|
-
task_token,
|
|
533
|
-
action: ActivationAction::WftComplete {
|
|
534
|
-
commands: vec![],
|
|
535
|
-
query_responses,
|
|
536
|
-
force_new_wft: false,
|
|
537
|
-
},
|
|
538
|
-
})
|
|
636
|
+
None
|
|
539
637
|
}
|
|
540
638
|
};
|
|
541
639
|
Ok(ret)
|
|
@@ -573,7 +671,8 @@ impl WorkflowTaskManager {
|
|
|
573
671
|
} else {
|
|
574
672
|
// Blow up any cached data associated with the workflow
|
|
575
673
|
let should_report = match self.request_eviction(run_id, failstr, reason) {
|
|
576
|
-
EvictionRequestResult::
|
|
674
|
+
EvictionRequestResult::EvictionRequested(Some(attempt))
|
|
675
|
+
| EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
|
|
577
676
|
_ => false,
|
|
578
677
|
};
|
|
579
678
|
if should_report {
|
|
@@ -592,7 +691,8 @@ impl WorkflowTaskManager {
|
|
|
592
691
|
&self,
|
|
593
692
|
poll_wf_resp: ValidPollWFTQResponse,
|
|
594
693
|
client: Arc<WorkerClientBag>,
|
|
595
|
-
) -> Result<(WorkflowTaskInfo, WorkflowActivation), WorkflowUpdateError>
|
|
694
|
+
) -> Result<(WorkflowTaskInfo, WorkflowActivation, Vec<QueryWorkflow>), WorkflowUpdateError>
|
|
695
|
+
{
|
|
596
696
|
let run_id = poll_wf_resp.workflow_execution.run_id.clone();
|
|
597
697
|
|
|
598
698
|
let wft_info = WorkflowTaskInfo {
|
|
@@ -606,11 +706,16 @@ impl WorkflowTaskManager {
|
|
|
606
706
|
.get(0)
|
|
607
707
|
.map(|ev| ev.event_id > 1)
|
|
608
708
|
.unwrap_or_default();
|
|
709
|
+
let poll_resp_is_incremental =
|
|
710
|
+
poll_resp_is_incremental || poll_wf_resp.history.events.is_empty();
|
|
711
|
+
|
|
712
|
+
let mut did_miss_cache = !poll_resp_is_incremental;
|
|
609
713
|
|
|
610
714
|
let page_token = if !self.workflow_machines.exists(&run_id) && poll_resp_is_incremental {
|
|
611
715
|
debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
|
|
612
716
|
cache. Will fetch history");
|
|
613
717
|
self.metrics.sticky_cache_miss();
|
|
718
|
+
did_miss_cache = true;
|
|
614
719
|
NextPageToken::FetchFromStart
|
|
615
720
|
} else {
|
|
616
721
|
poll_wf_resp.next_page_token.into()
|
|
@@ -639,16 +744,26 @@ impl WorkflowTaskManager {
|
|
|
639
744
|
.await
|
|
640
745
|
{
|
|
641
746
|
Ok(mut activation) => {
|
|
642
|
-
// If there are in-poll queries, insert jobs for those queries into the activation
|
|
747
|
+
// If there are in-poll queries, insert jobs for those queries into the activation,
|
|
748
|
+
// but only if we hit the cache. If we didn't, those queries will need to be dealt
|
|
749
|
+
// with once replay is over
|
|
750
|
+
let mut pending_queries = vec![];
|
|
643
751
|
if !poll_wf_resp.query_requests.is_empty() {
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
752
|
+
if !did_miss_cache {
|
|
753
|
+
let query_jobs = poll_wf_resp
|
|
754
|
+
.query_requests
|
|
755
|
+
.into_iter()
|
|
756
|
+
.map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
|
|
757
|
+
activation.jobs.extend(query_jobs);
|
|
758
|
+
} else {
|
|
759
|
+
poll_wf_resp
|
|
760
|
+
.query_requests
|
|
761
|
+
.into_iter()
|
|
762
|
+
.for_each(|q| pending_queries.push(q));
|
|
763
|
+
}
|
|
649
764
|
}
|
|
650
765
|
|
|
651
|
-
Ok((wft_info, activation))
|
|
766
|
+
Ok((wft_info, activation, pending_queries))
|
|
652
767
|
}
|
|
653
768
|
Err(source) => Err(WorkflowUpdateError { source, run_id }),
|
|
654
769
|
}
|
|
@@ -660,12 +775,14 @@ impl WorkflowTaskManager {
|
|
|
660
775
|
/// eviction, which could be avoided if this is called too early.
|
|
661
776
|
///
|
|
662
777
|
/// Returns true if WFT was marked completed internally
|
|
663
|
-
pub(crate) fn after_wft_report(&self, run_id: &str,
|
|
778
|
+
pub(crate) fn after_wft_report(&self, run_id: &str, reported_wft_to_server: bool) -> bool {
|
|
664
779
|
let mut just_evicted = false;
|
|
665
780
|
|
|
666
|
-
if
|
|
667
|
-
|
|
668
|
-
|
|
781
|
+
if self
|
|
782
|
+
.workflow_machines
|
|
783
|
+
.get_activation(run_id)
|
|
784
|
+
.map(|a| a.has_eviction())
|
|
785
|
+
.unwrap_or_default()
|
|
669
786
|
{
|
|
670
787
|
self.evict_run(run_id);
|
|
671
788
|
just_evicted = true;
|
|
@@ -673,50 +790,48 @@ impl WorkflowTaskManager {
|
|
|
673
790
|
|
|
674
791
|
// Workflows with no more pending activations (IE: They have completed a WFT) must be
|
|
675
792
|
// removed from the outstanding tasks map
|
|
676
|
-
|
|
677
|
-
if
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
{
|
|
685
|
-
|
|
793
|
+
if !self.pending_activations.has_pending(run_id) && !just_evicted {
|
|
794
|
+
if let Some(ref mut ot) = &mut *self
|
|
795
|
+
.workflow_machines
|
|
796
|
+
.get_task_mut(run_id)
|
|
797
|
+
.expect("Machine must exist")
|
|
798
|
+
{
|
|
799
|
+
// Check if there was a pending query which must be fulfilled, and if there is
|
|
800
|
+
// create a new pending activation for it.
|
|
801
|
+
if !ot.pending_queries.is_empty() {
|
|
802
|
+
for query in ot.pending_queries.drain(..) {
|
|
686
803
|
let na = create_query_activation(run_id.to_string(), [query]);
|
|
687
|
-
self.
|
|
688
|
-
self.pending_activations_notifier.notify_waiters();
|
|
689
|
-
return false;
|
|
804
|
+
self.pending_queries.push(na);
|
|
690
805
|
}
|
|
806
|
+
self.pending_activations_notifier.notify_waiters();
|
|
807
|
+
return false;
|
|
691
808
|
}
|
|
809
|
+
}
|
|
692
810
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
811
|
+
// Evict run id if cache is full. Non-sticky will always evict.
|
|
812
|
+
let maybe_evicted = self.cache_manager.lock().insert(run_id);
|
|
813
|
+
if let Some(evicted_run_id) = maybe_evicted {
|
|
814
|
+
self.request_eviction(
|
|
815
|
+
&evicted_run_id,
|
|
816
|
+
"Workflow cache full",
|
|
817
|
+
EvictionReason::CacheFull,
|
|
818
|
+
);
|
|
819
|
+
}
|
|
702
820
|
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
}
|
|
821
|
+
// If there was a buffered poll response from the server, it is now ready to
|
|
822
|
+
// be handled.
|
|
823
|
+
if let Some(buffd) = self.workflow_machines.take_buffered_poll(run_id) {
|
|
824
|
+
self.make_buffered_poll_ready(buffd);
|
|
708
825
|
}
|
|
826
|
+
}
|
|
709
827
|
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
} else {
|
|
716
|
-
false
|
|
717
|
-
};
|
|
828
|
+
// If we reported to server, we always want to mark it complete.
|
|
829
|
+
let wft_marked_complete = self
|
|
830
|
+
.workflow_machines
|
|
831
|
+
.complete_wft(run_id, reported_wft_to_server)
|
|
832
|
+
.is_some();
|
|
718
833
|
self.on_activation_done(run_id);
|
|
719
|
-
|
|
834
|
+
wft_marked_complete
|
|
720
835
|
}
|
|
721
836
|
|
|
722
837
|
/// Must be called after *every* activation is replied to, regardless of whether or not we
|
|
@@ -725,10 +840,11 @@ impl WorkflowTaskManager {
|
|
|
725
840
|
///
|
|
726
841
|
/// Any subsequent action that needs to be taken will be created as a new activation
|
|
727
842
|
fn on_activation_done(&self, run_id: &str) {
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
//
|
|
843
|
+
self.workflow_machines.delete_activation(run_id);
|
|
844
|
+
// It's important to use `notify_one` here to avoid possible races where we're waiting
|
|
845
|
+
// on a cache slot and fail to realize pending activations must be issued before a slot
|
|
846
|
+
// will free up.
|
|
847
|
+
self.pending_activations_notifier.notify_one();
|
|
732
848
|
}
|
|
733
849
|
|
|
734
850
|
/// Let a workflow know that something we've been waiting locally on has resolved, like a local
|
|
@@ -739,7 +855,8 @@ impl WorkflowTaskManager {
|
|
|
739
855
|
run_id: &str,
|
|
740
856
|
resolved: LocalResolution,
|
|
741
857
|
) -> Result<(), WorkflowUpdateError> {
|
|
742
|
-
self
|
|
858
|
+
let result_was_important = self
|
|
859
|
+
.workflow_machines
|
|
743
860
|
.access_sync(run_id, |wfm: &mut WorkflowManager| {
|
|
744
861
|
wfm.notify_of_local_result(resolved)
|
|
745
862
|
})?
|
|
@@ -748,7 +865,9 @@ impl WorkflowTaskManager {
|
|
|
748
865
|
run_id: run_id.to_string(),
|
|
749
866
|
})?;
|
|
750
867
|
|
|
751
|
-
|
|
868
|
+
if result_was_important {
|
|
869
|
+
self.needs_activation(run_id);
|
|
870
|
+
}
|
|
752
871
|
Ok(())
|
|
753
872
|
}
|
|
754
873
|
|
|
@@ -765,6 +884,7 @@ impl WorkflowTaskManager {
|
|
|
765
884
|
} else {
|
|
766
885
|
OutstandingActivation::Normal {
|
|
767
886
|
contains_eviction: act.eviction_index().is_some(),
|
|
887
|
+
num_jobs: act.jobs.len(),
|
|
768
888
|
}
|
|
769
889
|
};
|
|
770
890
|
match self
|
|
@@ -785,6 +905,13 @@ impl WorkflowTaskManager {
|
|
|
785
905
|
}
|
|
786
906
|
}
|
|
787
907
|
|
|
908
|
+
fn activation_has_only_eviction(&self, run_id: &str) -> bool {
|
|
909
|
+
self.workflow_machines
|
|
910
|
+
.get_activation(run_id)
|
|
911
|
+
.map(OutstandingActivation::has_only_eviction)
|
|
912
|
+
.unwrap_or_default()
|
|
913
|
+
}
|
|
914
|
+
|
|
788
915
|
fn activation_has_eviction(&self, run_id: &str) -> bool {
|
|
789
916
|
self.workflow_machines
|
|
790
917
|
.get_activation(run_id)
|
|
@@ -20,6 +20,8 @@ pub struct WorkerConfig {
|
|
|
20
20
|
/// The maximum allowed number of workflow tasks that will ever be given to this worker at one
|
|
21
21
|
/// time. Note that one workflow task may require multiple activations - so the WFT counts as
|
|
22
22
|
/// "outstanding" until all activations it requires have been completed.
|
|
23
|
+
///
|
|
24
|
+
/// Cannot be larger than `max_cached_workflows`.
|
|
23
25
|
#[builder(default = "100")]
|
|
24
26
|
pub max_outstanding_workflow_tasks: usize,
|
|
25
27
|
/// The maximum number of activity tasks that will ever be given to this worker concurrently
|
|
@@ -90,6 +92,13 @@ impl WorkerConfigBuilder {
|
|
|
90
92
|
if self.max_concurrent_wft_polls == Some(0) {
|
|
91
93
|
return Err("`max_concurrent_wft_polls` must be at least 1".to_owned());
|
|
92
94
|
}
|
|
95
|
+
if self.max_outstanding_workflow_tasks > self.max_cached_workflows {
|
|
96
|
+
return Err(
|
|
97
|
+
"Maximum concurrent workflow tasks cannot exceed the maximum number of cached \
|
|
98
|
+
workflows"
|
|
99
|
+
.to_owned(),
|
|
100
|
+
);
|
|
101
|
+
}
|
|
93
102
|
Ok(())
|
|
94
103
|
}
|
|
95
104
|
}
|
package/sdk-core/sdk/Cargo.toml
CHANGED