@temporalio/core-bridge 0.16.3 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.node +0 -0
- package/package.json +3 -3
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/sdk-core-protos/src/lib.rs +7 -0
- package/sdk-core/src/core_tests/queries.rs +9 -2
- package/sdk-core/src/core_tests/workflow_tasks.rs +85 -6
- package/sdk-core/src/errors.rs +2 -4
- package/sdk-core/src/machines/child_workflow_state_machine.rs +1 -10
- package/sdk-core/src/machines/workflow_machines.rs +17 -15
- package/sdk-core/src/test_help/history_builder.rs +10 -0
- package/sdk-core/src/test_help/history_info.rs +21 -1
- package/sdk-core/src/test_help/mod.rs +6 -10
- package/sdk-core/src/worker/mod.rs +63 -46
- package/sdk-core/src/workflow/workflow_tasks/mod.rs +33 -35
package/index.node
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@temporalio/core-bridge",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.2",
|
|
4
4
|
"description": "Temporal.io SDK Core<>Node bridge",
|
|
5
5
|
"main": "index.node",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"license": "MIT",
|
|
20
20
|
"dependencies": {
|
|
21
21
|
"@opentelemetry/api": "^1.0.3",
|
|
22
|
-
"@temporalio/common": "^0.
|
|
22
|
+
"@temporalio/common": "^0.17.2",
|
|
23
23
|
"arg": "^5.0.1",
|
|
24
24
|
"cargo-cp-artifact": "^0.1.4",
|
|
25
25
|
"which": "^2.0.2"
|
|
@@ -40,5 +40,5 @@
|
|
|
40
40
|
"publishConfig": {
|
|
41
41
|
"access": "public"
|
|
42
42
|
},
|
|
43
|
-
"gitHead": "
|
|
43
|
+
"gitHead": "2232465a4f9b0cade28f0c21c2d7856053678728"
|
|
44
44
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -264,6 +264,7 @@ pub mod coresdk {
|
|
|
264
264
|
}
|
|
265
265
|
|
|
266
266
|
pub mod workflow_completion {
|
|
267
|
+
use crate::temporal::api::failure;
|
|
267
268
|
tonic::include_proto!("coresdk.workflow_completion");
|
|
268
269
|
|
|
269
270
|
impl wf_activation_completion::Status {
|
|
@@ -274,6 +275,12 @@ pub mod coresdk {
|
|
|
274
275
|
}
|
|
275
276
|
}
|
|
276
277
|
}
|
|
278
|
+
|
|
279
|
+
impl From<failure::v1::Failure> for Failure {
|
|
280
|
+
fn from(f: failure::v1::Failure) -> Self {
|
|
281
|
+
Failure { failure: Some(f) }
|
|
282
|
+
}
|
|
283
|
+
}
|
|
277
284
|
}
|
|
278
285
|
|
|
279
286
|
pub mod child_workflow {
|
|
@@ -290,10 +290,17 @@ async fn legacy_query_failure_on_wft_failure() {
|
|
|
290
290
|
core.shutdown().await;
|
|
291
291
|
}
|
|
292
292
|
|
|
293
|
+
#[rstest::rstest]
|
|
293
294
|
#[tokio::test]
|
|
294
|
-
async fn
|
|
295
|
+
async fn legacy_query_after_complete(#[values(false, true)] full_history: bool) {
|
|
295
296
|
let wfid = "fake_wf_id";
|
|
296
|
-
let t =
|
|
297
|
+
let t = if full_history {
|
|
298
|
+
canned_histories::single_timer_wf_completes("1")
|
|
299
|
+
} else {
|
|
300
|
+
let mut t = canned_histories::single_timer("1");
|
|
301
|
+
t.add_workflow_task_completed();
|
|
302
|
+
t
|
|
303
|
+
};
|
|
297
304
|
let query_with_hist_task = {
|
|
298
305
|
let mut pr = hist_to_poll_resp(
|
|
299
306
|
&t,
|
|
@@ -1619,19 +1619,19 @@ async fn failing_wft_doesnt_eat_permit_forever() {
|
|
|
1619
1619
|
t.add_by_type(EventType::WorkflowExecutionStarted);
|
|
1620
1620
|
t.add_workflow_task_scheduled_and_started();
|
|
1621
1621
|
|
|
1622
|
-
let failures = 5;
|
|
1623
|
-
// One extra response for when we stop failing
|
|
1624
|
-
let resps = (1..=(failures + 1)).map(|_| 1);
|
|
1625
1622
|
let mock = MockServerGatewayApis::new();
|
|
1626
|
-
let mut mock = single_hist_mock_sg("fake_wf_id", t,
|
|
1623
|
+
let mut mock = single_hist_mock_sg("fake_wf_id", t, [1, 1, 1], mock, true);
|
|
1627
1624
|
mock.worker_cfg(TEST_Q, |cfg| {
|
|
1628
1625
|
cfg.max_cached_workflows = 2;
|
|
1629
1626
|
cfg.max_outstanding_workflow_tasks = 2;
|
|
1630
1627
|
});
|
|
1628
|
+
let outstanding_mock_tasks = mock.outstanding_task_map.clone();
|
|
1631
1629
|
let core = mock_core(mock);
|
|
1632
1630
|
|
|
1633
|
-
|
|
1634
|
-
|
|
1631
|
+
let mut run_id = "".to_string();
|
|
1632
|
+
// Fail twice, verifying a permit is eaten. We cannot fail the same run more than twice in a row
|
|
1633
|
+
// because we purposefully time out rather than spamming.
|
|
1634
|
+
for _ in 1..=2 {
|
|
1635
1635
|
let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
|
|
1636
1636
|
// Issue a nonsense completion that will trigger a WFT failure
|
|
1637
1637
|
core.complete_workflow_activation(WfActivationCompletion::from_cmd(
|
|
@@ -1648,12 +1648,91 @@ async fn failing_wft_doesnt_eat_permit_forever() {
|
|
|
1648
1648
|
variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
|
|
1649
1649
|
},]
|
|
1650
1650
|
);
|
|
1651
|
+
run_id = activation.run_id.clone();
|
|
1651
1652
|
core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
|
|
1652
1653
|
.await
|
|
1653
1654
|
.unwrap();
|
|
1654
1655
|
assert_eq!(core.outstanding_wfts(TEST_Q), 0);
|
|
1655
1656
|
assert_eq!(core.available_wft_permits(TEST_Q), 2);
|
|
1656
1657
|
}
|
|
1658
|
+
// We should be "out of work" because the mock service thinks we didn't complete the last task,
|
|
1659
|
+
// which we didn't, because we don't spam failures. The real server would eventually time out
|
|
1660
|
+
// the task. Mock doesn't understand that, so the WFT permit is released because eventually a
|
|
1661
|
+
// new one will be generated. We manually clear the mock's outstanding task list so the next
|
|
1662
|
+
// poll will work.
|
|
1663
|
+
outstanding_mock_tasks
|
|
1664
|
+
.unwrap()
|
|
1665
|
+
.write()
|
|
1666
|
+
.remove_by_left(&run_id);
|
|
1667
|
+
let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
|
|
1668
|
+
core.complete_workflow_activation(WfActivationCompletion::from_cmd(
|
|
1669
|
+
TEST_Q,
|
|
1670
|
+
activation.run_id,
|
|
1671
|
+
CompleteWorkflowExecution { result: None }.into(),
|
|
1672
|
+
))
|
|
1673
|
+
.await
|
|
1674
|
+
.unwrap();
|
|
1675
|
+
|
|
1676
|
+
core.shutdown().await;
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
#[tokio::test]
|
|
1680
|
+
async fn cache_miss_doesnt_eat_permit_forever() {
|
|
1681
|
+
let mut t = TestHistoryBuilder::default();
|
|
1682
|
+
t.add_by_type(EventType::WorkflowExecutionStarted);
|
|
1683
|
+
t.add_full_wf_task();
|
|
1684
|
+
t.add_we_signaled("sig", vec![]);
|
|
1685
|
+
t.add_full_wf_task();
|
|
1686
|
+
t.add_workflow_execution_completed();
|
|
1687
|
+
|
|
1688
|
+
let mut mh = MockPollCfg::from_resp_batches(
|
|
1689
|
+
"fake_wf_id",
|
|
1690
|
+
t,
|
|
1691
|
+
[
|
|
1692
|
+
ResponseType::ToTaskNum(1),
|
|
1693
|
+
ResponseType::OneTask(2),
|
|
1694
|
+
ResponseType::ToTaskNum(1),
|
|
1695
|
+
ResponseType::OneTask(2),
|
|
1696
|
+
ResponseType::ToTaskNum(1),
|
|
1697
|
+
ResponseType::OneTask(2),
|
|
1698
|
+
// Last one to complete successfully
|
|
1699
|
+
ResponseType::ToTaskNum(1),
|
|
1700
|
+
],
|
|
1701
|
+
MockServerGatewayApis::new(),
|
|
1702
|
+
);
|
|
1703
|
+
mh.num_expected_fails = Some(3);
|
|
1704
|
+
mh.expect_fail_wft_matcher =
|
|
1705
|
+
Box::new(|_, cause, _| matches!(cause, WorkflowTaskFailedCause::ResetStickyTaskQueue));
|
|
1706
|
+
let mut mock = build_mock_pollers(mh);
|
|
1707
|
+
mock.worker_cfg(TEST_Q, |cfg| {
|
|
1708
|
+
cfg.max_outstanding_workflow_tasks = 2;
|
|
1709
|
+
});
|
|
1710
|
+
let core = mock_core(mock);
|
|
1711
|
+
|
|
1712
|
+
// Spin missing the cache to verify that we don't get stuck
|
|
1713
|
+
for _ in 1..=3 {
|
|
1714
|
+
// Start
|
|
1715
|
+
let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
|
|
1716
|
+
core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
|
|
1717
|
+
.await
|
|
1718
|
+
.unwrap();
|
|
1719
|
+
// Evict
|
|
1720
|
+
let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
|
|
1721
|
+
assert_matches!(
|
|
1722
|
+
activation.jobs.as_slice(),
|
|
1723
|
+
[WfActivationJob {
|
|
1724
|
+
variant: Some(wf_activation_job::Variant::RemoveFromCache(_)),
|
|
1725
|
+
},]
|
|
1726
|
+
);
|
|
1727
|
+
core.complete_workflow_activation(WfActivationCompletion::empty(TEST_Q, activation.run_id))
|
|
1728
|
+
.await
|
|
1729
|
+
.unwrap();
|
|
1730
|
+
assert_eq!(core.outstanding_wfts(TEST_Q), 0);
|
|
1731
|
+
assert_eq!(core.available_wft_permits(TEST_Q), 2);
|
|
1732
|
+
// When we loop back up, the poll will trigger a cache miss, which we should immediately
|
|
1733
|
+
// reply to WFT with failure, and then poll again, which will deliver the from-the-start
|
|
1734
|
+
// history
|
|
1735
|
+
}
|
|
1657
1736
|
let activation = core.poll_workflow_activation(TEST_Q).await.unwrap();
|
|
1658
1737
|
core.complete_workflow_activation(WfActivationCompletion::from_cmd(
|
|
1659
1738
|
TEST_Q,
|
package/sdk-core/src/errors.rs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
//! Error types exposed by public APIs
|
|
2
2
|
|
|
3
|
-
use crate::{machines::WFMachinesError,
|
|
3
|
+
use crate::{machines::WFMachinesError, WorkerLookupErr};
|
|
4
4
|
use temporal_sdk_core_protos::coresdk::{
|
|
5
5
|
activity_result::ActivityResult, workflow_completion::WfActivationCompletion,
|
|
6
6
|
};
|
|
@@ -11,9 +11,8 @@ pub(crate) struct WorkflowUpdateError {
|
|
|
11
11
|
/// Underlying workflow error
|
|
12
12
|
pub source: WFMachinesError,
|
|
13
13
|
/// The run id of the erring workflow
|
|
14
|
+
#[allow(dead_code)] // Useful in debug output
|
|
14
15
|
pub run_id: String,
|
|
15
|
-
/// The task token associated with this update, if one existed yet.
|
|
16
|
-
pub task_token: Option<TaskToken>,
|
|
17
16
|
}
|
|
18
17
|
|
|
19
18
|
impl From<WorkflowMissingError> for WorkflowUpdateError {
|
|
@@ -21,7 +20,6 @@ impl From<WorkflowMissingError> for WorkflowUpdateError {
|
|
|
21
20
|
Self {
|
|
22
21
|
source: WFMachinesError::Fatal("Workflow machines missing".to_string()),
|
|
23
22
|
run_id: wme.run_id,
|
|
24
|
-
task_token: None,
|
|
25
23
|
}
|
|
26
24
|
}
|
|
27
25
|
}
|
|
@@ -7,8 +7,7 @@ use std::convert::{TryFrom, TryInto};
|
|
|
7
7
|
use temporal_sdk_core_protos::{
|
|
8
8
|
coresdk::{
|
|
9
9
|
child_workflow::{
|
|
10
|
-
self as wfr, child_workflow_result::Status as ChildWorkflowStatus,
|
|
11
|
-
ChildWorkflowCancellationType, ChildWorkflowResult,
|
|
10
|
+
self as wfr, child_workflow_result::Status as ChildWorkflowStatus, ChildWorkflowResult,
|
|
12
11
|
},
|
|
13
12
|
common::Payload,
|
|
14
13
|
workflow_activation::{
|
|
@@ -115,7 +114,6 @@ impl StartCommandCreated {
|
|
|
115
114
|
StartEventRecorded::default(),
|
|
116
115
|
SharedState {
|
|
117
116
|
initiated_event_id,
|
|
118
|
-
attrs: None, // Drop the attributes to avoid holding large payloads in memory
|
|
119
117
|
..state
|
|
120
118
|
},
|
|
121
119
|
)
|
|
@@ -303,9 +301,7 @@ pub(super) struct SharedState {
|
|
|
303
301
|
workflow_id: String,
|
|
304
302
|
run_id: String,
|
|
305
303
|
workflow_type: String,
|
|
306
|
-
cancellation_type: ChildWorkflowCancellationType,
|
|
307
304
|
cancelled_before_sent: bool,
|
|
308
|
-
attrs: Option<StartChildWorkflowExecution>,
|
|
309
305
|
}
|
|
310
306
|
|
|
311
307
|
/// Creates a new child workflow state machine and a command to start it on the server.
|
|
@@ -329,11 +325,6 @@ impl ChildWorkflowMachine {
|
|
|
329
325
|
workflow_id: attribs.workflow_id.clone(),
|
|
330
326
|
workflow_type: attribs.workflow_type.clone(),
|
|
331
327
|
namespace: attribs.namespace.clone(),
|
|
332
|
-
cancellation_type: ChildWorkflowCancellationType::from_i32(
|
|
333
|
-
attribs.cancellation_type,
|
|
334
|
-
)
|
|
335
|
-
.unwrap(),
|
|
336
|
-
attrs: Some(attribs.clone()),
|
|
337
328
|
..Default::default()
|
|
338
329
|
},
|
|
339
330
|
};
|
|
@@ -61,6 +61,10 @@ pub(crate) struct WorkflowMachines {
|
|
|
61
61
|
/// Eventually, this number should reach the started id in the latest history update, but
|
|
62
62
|
/// we must incrementally apply the history while communicating with lang.
|
|
63
63
|
next_started_event_id: i64,
|
|
64
|
+
/// The event id of the most recent event processed. It's possible in some situations (ex legacy
|
|
65
|
+
/// queries) to receive a history with no new workflow tasks. If the last history we processed
|
|
66
|
+
/// also had no new tasks, we need a way to know not to apply the same events over again.
|
|
67
|
+
last_processed_event: i64,
|
|
64
68
|
/// True if the workflow is replaying from history
|
|
65
69
|
pub replaying: bool,
|
|
66
70
|
/// Namespace this workflow exists in
|
|
@@ -120,7 +124,6 @@ struct CommandAndMachine {
|
|
|
120
124
|
|
|
121
125
|
#[derive(Debug, Clone, Copy)]
|
|
122
126
|
struct ChangeInfo {
|
|
123
|
-
deprecated: bool,
|
|
124
127
|
created_command: bool,
|
|
125
128
|
}
|
|
126
129
|
|
|
@@ -196,6 +199,7 @@ impl WorkflowMachines {
|
|
|
196
199
|
// In an ideal world one could say ..Default::default() here and it'd still work.
|
|
197
200
|
current_started_event_id: 0,
|
|
198
201
|
next_started_event_id: 0,
|
|
202
|
+
last_processed_event: 0,
|
|
199
203
|
workflow_start_time: None,
|
|
200
204
|
workflow_end_time: None,
|
|
201
205
|
current_wf_time: None,
|
|
@@ -529,11 +533,16 @@ impl WorkflowMachines {
|
|
|
529
533
|
}
|
|
530
534
|
|
|
531
535
|
let last_handled_wft_started_id = self.current_started_event_id;
|
|
532
|
-
let events =
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
536
|
+
let events = {
|
|
537
|
+
let mut evts = self
|
|
538
|
+
.last_history_from_server
|
|
539
|
+
.take_next_wft_sequence(last_handled_wft_started_id)
|
|
540
|
+
.await
|
|
541
|
+
.map_err(WFMachinesError::HistoryFetchingError)?;
|
|
542
|
+
// Do not re-process events we have already processed
|
|
543
|
+
evts.retain(|e| e.event_id > self.last_processed_event);
|
|
544
|
+
evts
|
|
545
|
+
};
|
|
537
546
|
|
|
538
547
|
// We're caught up on reply if there are no new events to process
|
|
539
548
|
// TODO: Probably this is unneeded if we evict whenever history is from non-sticky queue
|
|
@@ -564,23 +573,17 @@ impl WorkflowMachines {
|
|
|
564
573
|
|
|
565
574
|
while let Some(event) = history.next() {
|
|
566
575
|
let next_event = history.peek();
|
|
567
|
-
|
|
568
|
-
if event.event_type == EventType::WorkflowTaskStarted as i32 && next_event.is_none() {
|
|
569
|
-
self.handle_event(event, false)?;
|
|
570
|
-
break;
|
|
571
|
-
}
|
|
572
|
-
|
|
573
576
|
self.handle_event(event, next_event.is_some())?;
|
|
577
|
+
self.last_processed_event = event.event_id;
|
|
574
578
|
}
|
|
575
579
|
|
|
576
580
|
// Scan through to the next WFT, searching for any patch markers, so that we can
|
|
577
581
|
// pre-resolve them.
|
|
578
582
|
for e in self.last_history_from_server.peek_next_wft_sequence() {
|
|
579
|
-
if let Some((patch_id,
|
|
583
|
+
if let Some((patch_id, _)) = e.get_changed_marker_details() {
|
|
580
584
|
self.encountered_change_markers.insert(
|
|
581
585
|
patch_id.clone(),
|
|
582
586
|
ChangeInfo {
|
|
583
|
-
deprecated,
|
|
584
587
|
created_command: false,
|
|
585
588
|
},
|
|
586
589
|
);
|
|
@@ -743,7 +746,6 @@ impl WorkflowMachines {
|
|
|
743
746
|
self.encountered_change_markers.insert(
|
|
744
747
|
attrs.patch_id,
|
|
745
748
|
ChangeInfo {
|
|
746
|
-
deprecated: attrs.deprecated,
|
|
747
749
|
created_command: true,
|
|
748
750
|
},
|
|
749
751
|
);
|
|
@@ -342,6 +342,16 @@ impl TestHistoryBuilder {
|
|
|
342
342
|
HistoryInfo::new_from_history(&self.events.clone().into(), None)
|
|
343
343
|
}
|
|
344
344
|
|
|
345
|
+
pub(crate) fn get_one_wft(
|
|
346
|
+
&self,
|
|
347
|
+
from_wft_number: usize,
|
|
348
|
+
) -> Result<HistoryInfo, HistoryInfoError> {
|
|
349
|
+
let mut histinfo =
|
|
350
|
+
HistoryInfo::new_from_history(&self.events.clone().into(), Some(from_wft_number))?;
|
|
351
|
+
histinfo.make_incremental();
|
|
352
|
+
Ok(histinfo)
|
|
353
|
+
}
|
|
354
|
+
|
|
345
355
|
fn build_and_push_event(&mut self, event_type: EventType, attribs: Attributes) {
|
|
346
356
|
self.current_event_id += 1;
|
|
347
357
|
let evt = HistoryEvent {
|
|
@@ -39,6 +39,7 @@ impl HistoryInfo {
|
|
|
39
39
|
return Err(HistoryInfoError::HistoryEndsUnexpectedly);
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
let is_all_hist = to_wf_task_num.is_none();
|
|
42
43
|
let to_wf_task_num = to_wf_task_num.unwrap_or(usize::MAX);
|
|
43
44
|
let mut workflow_task_started_event_id = 0;
|
|
44
45
|
let mut previous_started_event_id = 0;
|
|
@@ -83,7 +84,7 @@ impl HistoryInfo {
|
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
if next_event.is_none() {
|
|
86
|
-
if event.is_final_wf_execution_event() {
|
|
87
|
+
if event.is_final_wf_execution_event() || is_all_hist {
|
|
87
88
|
return Ok(Self {
|
|
88
89
|
previous_started_event_id,
|
|
89
90
|
workflow_task_started_event_id,
|
|
@@ -100,6 +101,18 @@ impl HistoryInfo {
|
|
|
100
101
|
unreachable!()
|
|
101
102
|
}
|
|
102
103
|
|
|
104
|
+
/// Remove events from the beginning of this history such that it looks like what would've been
|
|
105
|
+
/// delivered on a sticky queue where the previously started task was the one before the last
|
|
106
|
+
/// task in this history.
|
|
107
|
+
pub(crate) fn make_incremental(&mut self) {
|
|
108
|
+
let last_complete_ix = self
|
|
109
|
+
.events
|
|
110
|
+
.iter()
|
|
111
|
+
.rposition(|he| he.event_type() == EventType::WorkflowTaskCompleted)
|
|
112
|
+
.expect("Must be a WFT completed event in history");
|
|
113
|
+
self.events.drain(0..=last_complete_ix);
|
|
114
|
+
}
|
|
115
|
+
|
|
103
116
|
pub(crate) fn events(&self) -> &[HistoryEvent] {
|
|
104
117
|
&self.events
|
|
105
118
|
}
|
|
@@ -135,4 +148,11 @@ mod tests {
|
|
|
135
148
|
let history_info = t.get_history_info(2).unwrap();
|
|
136
149
|
assert_eq!(8, history_info.events.len());
|
|
137
150
|
}
|
|
151
|
+
|
|
152
|
+
#[test]
|
|
153
|
+
fn incremental_works() {
|
|
154
|
+
let t = canned_histories::single_timer("timer1");
|
|
155
|
+
let hi = t.get_one_wft(2).unwrap();
|
|
156
|
+
dbg!(hi.events);
|
|
157
|
+
}
|
|
138
158
|
}
|
|
@@ -53,6 +53,10 @@ pub static NO_MORE_WORK_ERROR_MSG: &str = "No more work to do";
|
|
|
53
53
|
#[derive(derive_more::From, Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
|
54
54
|
pub enum ResponseType {
|
|
55
55
|
ToTaskNum(usize),
|
|
56
|
+
/// Returns just the history after the WFT completed of the provided task number - 1, through to
|
|
57
|
+
/// the next WFT started. Simulating the incremental history for just the provided task number
|
|
58
|
+
#[from(ignore)]
|
|
59
|
+
OneTask(usize),
|
|
56
60
|
AllHistory,
|
|
57
61
|
}
|
|
58
62
|
|
|
@@ -142,6 +146,7 @@ pub struct FakeWfResponses {
|
|
|
142
146
|
pub struct MocksHolder<SG> {
|
|
143
147
|
sg: SG,
|
|
144
148
|
mock_pollers: HashMap<String, MockWorker>,
|
|
149
|
+
// bidirectional mapping of run id / task token
|
|
145
150
|
pub outstanding_task_map: Option<Arc<RwLock<BiMap<String, TaskToken>>>>,
|
|
146
151
|
}
|
|
147
152
|
|
|
@@ -377,16 +382,6 @@ pub fn build_mock_pollers(mut cfg: MockPollCfg) -> MocksHolder<MockServerGateway
|
|
|
377
382
|
}
|
|
378
383
|
}
|
|
379
384
|
|
|
380
|
-
// TODO: Fix -- or not? Sticky invalidation could make this pointless anyway
|
|
381
|
-
// Verify response batches only ever return longer histories (IE: Are sorted ascending)
|
|
382
|
-
// assert!(
|
|
383
|
-
// hist.response_batches
|
|
384
|
-
// .as_slice()
|
|
385
|
-
// .windows(2)
|
|
386
|
-
// .all(|w| w[0] <= w[1]),
|
|
387
|
-
// "response batches must have increasing wft numbers"
|
|
388
|
-
// );
|
|
389
|
-
|
|
390
385
|
if cfg.enforce_correct_number_of_polls {
|
|
391
386
|
*correct_num_polls.get_or_insert(0) += hist.response_batches.len();
|
|
392
387
|
}
|
|
@@ -495,6 +490,7 @@ pub fn hist_to_poll_resp(
|
|
|
495
490
|
};
|
|
496
491
|
let hist_info = match response_type {
|
|
497
492
|
ResponseType::ToTaskNum(tn) => t.get_history_info(tn).unwrap(),
|
|
493
|
+
ResponseType::OneTask(tn) => t.get_one_wft(tn).unwrap(),
|
|
498
494
|
ResponseType::AllHistory => t.get_full_history_info().unwrap(),
|
|
499
495
|
};
|
|
500
496
|
let batch = hist_info.events().to_vec();
|
|
@@ -329,20 +329,28 @@ impl Worker {
|
|
|
329
329
|
completion: WfActivationCompletion,
|
|
330
330
|
) -> Result<(), CompleteWfError> {
|
|
331
331
|
let wfstatus = completion.status;
|
|
332
|
-
let
|
|
332
|
+
let report_outcome = match wfstatus {
|
|
333
333
|
Some(wf_activation_completion::Status::Successful(success)) => {
|
|
334
334
|
self.wf_activation_success(&completion.run_id, success)
|
|
335
335
|
.await
|
|
336
336
|
}
|
|
337
|
+
|
|
337
338
|
Some(wf_activation_completion::Status::Failed(failure)) => {
|
|
338
|
-
self.wf_activation_failed(
|
|
339
|
+
self.wf_activation_failed(
|
|
340
|
+
&completion.run_id,
|
|
341
|
+
WorkflowTaskFailedCause::Unspecified,
|
|
342
|
+
failure,
|
|
343
|
+
)
|
|
344
|
+
.await
|
|
345
|
+
}
|
|
346
|
+
None => {
|
|
347
|
+
return Err(CompleteWfError::MalformedWorkflowCompletion {
|
|
348
|
+
reason: "Workflow completion had empty status field".to_owned(),
|
|
349
|
+
completion: None,
|
|
350
|
+
})
|
|
339
351
|
}
|
|
340
|
-
None => Err(CompleteWfError::MalformedWorkflowCompletion {
|
|
341
|
-
reason: "Workflow completion had empty status field".to_owned(),
|
|
342
|
-
completion: None,
|
|
343
|
-
}),
|
|
344
352
|
}?;
|
|
345
|
-
self.after_workflow_activation(&completion.run_id,
|
|
353
|
+
self.after_workflow_activation(&completion.run_id, report_outcome);
|
|
346
354
|
Ok(())
|
|
347
355
|
}
|
|
348
356
|
|
|
@@ -488,6 +496,7 @@ impl Worker {
|
|
|
488
496
|
}),
|
|
489
497
|
)
|
|
490
498
|
.await?;
|
|
499
|
+
self.return_workflow_task_permit();
|
|
491
500
|
None
|
|
492
501
|
}
|
|
493
502
|
NewWfTaskOutcome::Evict(e) => {
|
|
@@ -508,7 +517,7 @@ impl Worker {
|
|
|
508
517
|
&self,
|
|
509
518
|
run_id: &str,
|
|
510
519
|
success: workflow_completion::Success,
|
|
511
|
-
) -> Result<
|
|
520
|
+
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
512
521
|
// Convert to wf commands
|
|
513
522
|
let cmds = success
|
|
514
523
|
.commands
|
|
@@ -552,7 +561,10 @@ impl Worker {
|
|
|
552
561
|
.await
|
|
553
562
|
})
|
|
554
563
|
.await?;
|
|
555
|
-
Ok(
|
|
564
|
+
Ok(WFTReportOutcome {
|
|
565
|
+
reported_to_server: true,
|
|
566
|
+
failed: false,
|
|
567
|
+
})
|
|
556
568
|
}
|
|
557
569
|
Ok(Some(ServerCommandsWithWorkflowInfo {
|
|
558
570
|
task_token,
|
|
@@ -562,9 +574,15 @@ impl Worker {
|
|
|
562
574
|
self.server_gateway
|
|
563
575
|
.respond_legacy_query(task_token, result)
|
|
564
576
|
.await?;
|
|
565
|
-
Ok(
|
|
577
|
+
Ok(WFTReportOutcome {
|
|
578
|
+
reported_to_server: true,
|
|
579
|
+
failed: false,
|
|
580
|
+
})
|
|
566
581
|
}
|
|
567
|
-
Ok(None) => Ok(
|
|
582
|
+
Ok(None) => Ok(WFTReportOutcome {
|
|
583
|
+
reported_to_server: false,
|
|
584
|
+
failed: false,
|
|
585
|
+
}),
|
|
568
586
|
Err(update_err) => {
|
|
569
587
|
// Automatically fail the workflow task in the event we couldn't update machines
|
|
570
588
|
let fail_cause = if matches!(&update_err.source, WFMachinesError::Nondeterminism(_))
|
|
@@ -573,30 +591,13 @@ impl Worker {
|
|
|
573
591
|
} else {
|
|
574
592
|
WorkflowTaskFailedCause::Unspecified
|
|
575
593
|
};
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
.fail_workflow_task(
|
|
584
|
-
tt.clone(),
|
|
585
|
-
fail_cause,
|
|
586
|
-
Some(Failure::application_failure(wft_fail_str.clone(), false)),
|
|
587
|
-
)
|
|
588
|
-
.await
|
|
589
|
-
})
|
|
590
|
-
.await?;
|
|
591
|
-
// We must evict the workflow since we've failed a WFT
|
|
592
|
-
self.request_wf_eviction(
|
|
593
|
-
run_id,
|
|
594
|
-
format!("Workflow task failure: {}", wft_fail_str),
|
|
595
|
-
);
|
|
596
|
-
Ok(true)
|
|
597
|
-
} else {
|
|
598
|
-
Ok(false)
|
|
599
|
-
}
|
|
594
|
+
let wft_fail_str = format!("{:?}", update_err);
|
|
595
|
+
self.wf_activation_failed(
|
|
596
|
+
run_id,
|
|
597
|
+
fail_cause,
|
|
598
|
+
Failure::application_failure(wft_fail_str.clone(), false).into(),
|
|
599
|
+
)
|
|
600
|
+
.await
|
|
600
601
|
}
|
|
601
602
|
}
|
|
602
603
|
}
|
|
@@ -607,35 +608,46 @@ impl Worker {
|
|
|
607
608
|
async fn wf_activation_failed(
|
|
608
609
|
&self,
|
|
609
610
|
run_id: &str,
|
|
611
|
+
cause: WorkflowTaskFailedCause,
|
|
610
612
|
failure: workflow_completion::Failure,
|
|
611
|
-
) -> Result<
|
|
613
|
+
) -> Result<WFTReportOutcome, CompleteWfError> {
|
|
612
614
|
Ok(match self.wft_manager.failed_activation(run_id) {
|
|
613
615
|
FailedActivationOutcome::Report(tt) => {
|
|
616
|
+
warn!(run_id, failure=?failure, "Failing workflow activation");
|
|
614
617
|
self.handle_wft_reporting_errs(run_id, || async {
|
|
615
618
|
self.server_gateway
|
|
616
|
-
.fail_workflow_task(
|
|
617
|
-
tt,
|
|
618
|
-
WorkflowTaskFailedCause::Unspecified,
|
|
619
|
-
failure.failure.map(Into::into),
|
|
620
|
-
)
|
|
619
|
+
.fail_workflow_task(tt, cause, failure.failure.map(Into::into))
|
|
621
620
|
.await
|
|
622
621
|
})
|
|
623
622
|
.await?;
|
|
624
|
-
|
|
623
|
+
WFTReportOutcome {
|
|
624
|
+
reported_to_server: true,
|
|
625
|
+
failed: true,
|
|
626
|
+
}
|
|
625
627
|
}
|
|
626
628
|
FailedActivationOutcome::ReportLegacyQueryFailure(task_token) => {
|
|
629
|
+
warn!(run_id, failure=?failure, "Failing legacy query request");
|
|
627
630
|
self.server_gateway
|
|
628
631
|
.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
629
632
|
.await?;
|
|
630
|
-
|
|
633
|
+
WFTReportOutcome {
|
|
634
|
+
reported_to_server: true,
|
|
635
|
+
failed: true,
|
|
636
|
+
}
|
|
631
637
|
}
|
|
632
|
-
FailedActivationOutcome::NoReport =>
|
|
638
|
+
FailedActivationOutcome::NoReport => WFTReportOutcome {
|
|
639
|
+
reported_to_server: false,
|
|
640
|
+
failed: true,
|
|
641
|
+
},
|
|
633
642
|
})
|
|
634
643
|
}
|
|
635
644
|
|
|
636
|
-
fn after_workflow_activation(&self, run_id: &str,
|
|
645
|
+
fn after_workflow_activation(&self, run_id: &str, report_outcome: WFTReportOutcome) {
|
|
637
646
|
self.wft_manager.after_wft_report(run_id);
|
|
638
|
-
if
|
|
647
|
+
if report_outcome.reported_to_server || report_outcome.failed {
|
|
648
|
+
// If we failed the WFT but didn't report anything, we still want to release the WFT
|
|
649
|
+
// permit since the server will eventually time out the task and we've already evicted
|
|
650
|
+
// the run.
|
|
639
651
|
self.return_workflow_task_permit();
|
|
640
652
|
}
|
|
641
653
|
self.wft_manager.on_activation_done(run_id);
|
|
@@ -717,6 +729,11 @@ impl WorkerConfig {
|
|
|
717
729
|
}
|
|
718
730
|
}
|
|
719
731
|
|
|
732
|
+
struct WFTReportOutcome {
|
|
733
|
+
reported_to_server: bool,
|
|
734
|
+
failed: bool,
|
|
735
|
+
}
|
|
736
|
+
|
|
720
737
|
#[cfg(test)]
|
|
721
738
|
mod tests {
|
|
722
739
|
use super::*;
|
|
@@ -135,7 +135,7 @@ pub enum ActivationAction {
|
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
macro_rules! machine_mut {
|
|
138
|
-
($myself:ident, $run_id:ident, $
|
|
138
|
+
($myself:ident, $run_id:ident, $clos:expr) => {{
|
|
139
139
|
$myself
|
|
140
140
|
.workflow_machines
|
|
141
141
|
.access($run_id, $clos)
|
|
@@ -143,7 +143,6 @@ macro_rules! machine_mut {
|
|
|
143
143
|
.map_err(|source| WorkflowUpdateError {
|
|
144
144
|
source,
|
|
145
145
|
run_id: $run_id.to_owned(),
|
|
146
|
-
task_token: Some($task_token.clone()),
|
|
147
146
|
})
|
|
148
147
|
}};
|
|
149
148
|
}
|
|
@@ -256,6 +255,7 @@ impl WorkflowTaskManager {
|
|
|
256
255
|
debug!(
|
|
257
256
|
task_token = %&work.task_token,
|
|
258
257
|
history_length = %work.history.events.len(),
|
|
258
|
+
attempt = %work.attempt,
|
|
259
259
|
"Applying new workflow task from server"
|
|
260
260
|
);
|
|
261
261
|
let task_start_time = Instant::now();
|
|
@@ -325,19 +325,20 @@ impl WorkflowTaskManager {
|
|
|
325
325
|
return Ok(None);
|
|
326
326
|
}
|
|
327
327
|
|
|
328
|
-
let task_token
|
|
329
|
-
entry.
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
328
|
+
let (task_token, is_leg_query_task) =
|
|
329
|
+
if let Some(entry) = self.workflow_machines.get_task(run_id) {
|
|
330
|
+
(entry.info.task_token.clone(), entry.legacy_query.is_some())
|
|
331
|
+
} else {
|
|
332
|
+
if !self.activation_has_eviction(run_id) {
|
|
333
|
+
// Don't bother warning if this was an eviction, since it's normal to issue
|
|
334
|
+
// eviction activations without an associated workflow task in that case.
|
|
335
|
+
warn!(
|
|
336
|
+
run_id,
|
|
337
|
+
"Attempted to complete activation for run without associated workflow task"
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
return Ok(None);
|
|
341
|
+
};
|
|
341
342
|
|
|
342
343
|
// If the only command in the activation is a legacy query response, that means we need
|
|
343
344
|
// to respond differently than a typical activation.
|
|
@@ -364,7 +365,6 @@ impl WorkflowTaskManager {
|
|
|
364
365
|
return Err(WorkflowUpdateError {
|
|
365
366
|
source: WFMachinesError::Fatal("Legacy query activation response included other commands, this is not allowed and constitutes an error in the lang SDK".to_string()),
|
|
366
367
|
run_id: run_id.to_string(),
|
|
367
|
-
task_token: Some(task_token)
|
|
368
368
|
});
|
|
369
369
|
}
|
|
370
370
|
query_responses.push(qr);
|
|
@@ -375,30 +375,32 @@ impl WorkflowTaskManager {
|
|
|
375
375
|
}
|
|
376
376
|
|
|
377
377
|
// Send commands from lang into the machines
|
|
378
|
-
machine_mut!(self, run_id,
|
|
378
|
+
machine_mut!(self, run_id, |wfm: &mut WorkflowManager| {
|
|
379
379
|
wfm.push_commands(commands).boxed()
|
|
380
380
|
})?;
|
|
381
381
|
// Check if the workflow run needs another activation and queue it up if there is one
|
|
382
382
|
// by pushing it into the pending activations list
|
|
383
|
-
let next_activation = machine_mut!(
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
task_token,
|
|
387
|
-
move |mgr: &mut WorkflowManager| mgr.get_next_activation().boxed()
|
|
388
|
-
)?;
|
|
383
|
+
let next_activation = machine_mut!(self, run_id, move |mgr: &mut WorkflowManager| mgr
|
|
384
|
+
.get_next_activation()
|
|
385
|
+
.boxed())?;
|
|
389
386
|
if !next_activation.jobs.is_empty() {
|
|
390
387
|
self.pending_activations.push(next_activation);
|
|
391
388
|
let _ = self.pending_activations_notifier.send(true);
|
|
392
389
|
}
|
|
393
390
|
// We want to fetch the outgoing commands only after any new activation has been queued,
|
|
394
391
|
// as doing so may have altered the outgoing commands.
|
|
395
|
-
let server_cmds =
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
392
|
+
let server_cmds = machine_mut!(self, run_id, |wfm: &mut WorkflowManager| {
|
|
393
|
+
async move { Ok(wfm.get_server_commands()) }.boxed()
|
|
394
|
+
})?;
|
|
395
|
+
let is_query_playback = is_leg_query_task && query_responses.is_empty();
|
|
399
396
|
// We only actually want to send commands back to the server if there are no more
|
|
400
|
-
// pending activations and we are caught up on replay.
|
|
401
|
-
if
|
|
397
|
+
// pending activations and we are caught up on replay. We don't want to complete a wft
|
|
398
|
+
// if we already saw the final event in the workflow, or if we are playing back for the
|
|
399
|
+
// express purpose of fulfilling a query
|
|
400
|
+
if !self.pending_activations.has_pending(run_id)
|
|
401
|
+
&& !server_cmds.replaying
|
|
402
|
+
&& !is_query_playback
|
|
403
|
+
{
|
|
402
404
|
Some(ServerCommandsWithWorkflowInfo {
|
|
403
405
|
task_token,
|
|
404
406
|
action: ActivationAction::WftComplete {
|
|
@@ -448,7 +450,7 @@ impl WorkflowTaskManager {
|
|
|
448
450
|
} else {
|
|
449
451
|
// Blow up any cached data associated with the workflow
|
|
450
452
|
let should_report = self
|
|
451
|
-
.request_eviction(run_id, "Activation failed
|
|
453
|
+
.request_eviction(run_id, "Activation failed")
|
|
452
454
|
.map_or(true, |attempt| attempt <= 1);
|
|
453
455
|
if should_report {
|
|
454
456
|
FailedActivationOutcome::Report(tt)
|
|
@@ -507,11 +509,7 @@ impl WorkflowTaskManager {
|
|
|
507
509
|
|
|
508
510
|
Ok((wft_info, activation))
|
|
509
511
|
}
|
|
510
|
-
Err(source) => Err(WorkflowUpdateError {
|
|
511
|
-
source,
|
|
512
|
-
run_id,
|
|
513
|
-
task_token: Some(wft_info.task_token),
|
|
514
|
-
}),
|
|
512
|
+
Err(source) => Err(WorkflowUpdateError { source, run_id }),
|
|
515
513
|
}
|
|
516
514
|
}
|
|
517
515
|
|