@temporalio/core-bridge 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +304 -112
- package/lib/index.d.ts +8 -6
- package/lib/index.js.map +1 -1
- package/package.json +9 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +2 -2
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.buildkite/pipeline.yml +2 -4
- package/sdk-core/.cargo/config.toml +5 -2
- package/sdk-core/.github/workflows/heavy.yml +29 -0
- package/sdk-core/Cargo.toml +1 -1
- package/sdk-core/README.md +20 -10
- package/sdk-core/client/src/lib.rs +215 -39
- package/sdk-core/client/src/metrics.rs +17 -8
- package/sdk-core/client/src/raw.rs +4 -4
- package/sdk-core/client/src/retry.rs +32 -20
- package/sdk-core/core/Cargo.toml +25 -12
- package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
- package/sdk-core/core/src/abstractions.rs +204 -14
- package/sdk-core/core/src/core_tests/activity_tasks.rs +143 -50
- package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
- package/sdk-core/core/src/core_tests/determinism.rs +165 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +431 -43
- package/sdk-core/core/src/core_tests/queries.rs +34 -16
- package/sdk-core/core/src/core_tests/workers.rs +8 -5
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +588 -55
- package/sdk-core/core/src/ephemeral_server/mod.rs +113 -12
- package/sdk-core/core/src/internal_flags.rs +155 -0
- package/sdk-core/core/src/lib.rs +16 -9
- package/sdk-core/core/src/protosext/mod.rs +1 -1
- package/sdk-core/core/src/replay/mod.rs +16 -27
- package/sdk-core/core/src/telemetry/log_export.rs +1 -1
- package/sdk-core/core/src/telemetry/metrics.rs +69 -35
- package/sdk-core/core/src/telemetry/mod.rs +60 -21
- package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
- package/sdk-core/core/src/test_help/mod.rs +73 -14
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
- package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- package/sdk-core/core/src/worker/activities/local_activities.rs +379 -129
- package/sdk-core/core/src/worker/activities.rs +350 -175
- package/sdk-core/core/src/worker/client/mocks.rs +22 -2
- package/sdk-core/core/src/worker/client.rs +18 -2
- package/sdk-core/core/src/worker/mod.rs +183 -64
- package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- package/sdk-core/core/src/worker/workflow/history_update.rs +916 -277
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +216 -183
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +9 -12
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +160 -87
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +13 -14
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +14 -17
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +242 -110
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +27 -19
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +9 -11
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +321 -206
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +13 -18
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +20 -29
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +257 -51
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +310 -150
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +17 -20
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +31 -15
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1052 -380
- package/sdk-core/core/src/worker/workflow/mod.rs +598 -390
- package/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +137 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +469 -718
- package/sdk-core/core-api/Cargo.toml +2 -1
- package/sdk-core/core-api/src/errors.rs +1 -34
- package/sdk-core/core-api/src/lib.rs +19 -9
- package/sdk-core/core-api/src/telemetry.rs +4 -6
- package/sdk-core/core-api/src/worker.rs +19 -1
- package/sdk-core/etc/deps.svg +115 -140
- package/sdk-core/etc/regen-depgraph.sh +5 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +86 -61
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +29 -71
- package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
- package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- package/sdk-core/histories/old_change_marker_format.bin +0 -0
- package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
- package/sdk-core/protos/api_upstream/Makefile +6 -6
- package/sdk-core/protos/api_upstream/build/go.mod +7 -0
- package/sdk-core/protos/api_upstream/build/go.sum +5 -0
- package/sdk-core/protos/api_upstream/build/tools.go +29 -0
- package/sdk-core/protos/api_upstream/go.mod +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -26
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +8 -8
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +25 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +49 -26
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +5 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
- package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -28
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -4
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +67 -60
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- package/sdk-core/sdk/Cargo.toml +5 -4
- package/sdk-core/sdk/src/lib.rs +108 -26
- package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
- package/sdk-core/sdk/src/workflow_context.rs +24 -17
- package/sdk-core/sdk/src/workflow_future.rs +16 -15
- package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- package/sdk-core/sdk-core-protos/build.rs +36 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +138 -106
- package/sdk-core/sdk-core-protos/src/history_info.rs +10 -1
- package/sdk-core/sdk-core-protos/src/lib.rs +272 -87
- package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +106 -296
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +82 -23
- package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- package/sdk-core/test-utils/src/workflows.rs +29 -0
- package/sdk-core/tests/fuzzy_workflow.rs +130 -0
- package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
- package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- package/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- package/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- package/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +161 -72
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +94 -200
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +34 -28
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +76 -7
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +7 -8
- package/sdk-core/tests/integ_tests/workflow_tests.rs +13 -14
- package/sdk-core/tests/main.rs +3 -13
- package/sdk-core/tests/runner.rs +75 -36
- package/sdk-core/tests/wf_input_replay.rs +32 -0
- package/src/conversions.rs +14 -8
- package/src/runtime.rs +9 -8
- package/ts/index.ts +8 -6
- package/sdk-core/bridge-ffi/Cargo.toml +0 -24
- package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- package/sdk-core/bridge-ffi/build.rs +0 -25
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
- package/sdk-core/bridge-ffi/src/lib.rs +0 -746
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- package/sdk-core/sdk/src/conversions.rs +0 -8
|
@@ -1,188 +1,217 @@
|
|
|
1
1
|
#[cfg(test)]
|
|
2
2
|
mod managed_wf_test;
|
|
3
3
|
|
|
4
|
+
#[cfg(test)]
|
|
5
|
+
pub(crate) use managed_wf_test::ManagedWFFunc;
|
|
6
|
+
|
|
4
7
|
use crate::{
|
|
8
|
+
abstractions::dbg_panic,
|
|
9
|
+
protosext::WorkflowActivationExt,
|
|
5
10
|
worker::{
|
|
6
11
|
workflow::{
|
|
7
|
-
machines::WorkflowMachines, ActivationAction,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
history_update::HistoryPaginator, machines::WorkflowMachines, ActivationAction,
|
|
13
|
+
ActivationCompleteOutcome, ActivationCompleteResult, ActivationOrAuto,
|
|
14
|
+
EvictionRequestResult, FailedActivationWFTReport, HeartbeatTimeoutMsg, HistoryUpdate,
|
|
15
|
+
LocalActivityRequestSink, LocalResolution, NextPageReq, OutgoingServerCommands,
|
|
16
|
+
OutstandingActivation, OutstandingTask, PermittedWFT, RequestEvictMsg, RunBasics,
|
|
17
|
+
ServerCommandsWithWorkflowInfo, WFCommand, WFMachinesError, WFTReportStatus,
|
|
18
|
+
WorkflowBridge, WorkflowTaskInfo, WFT_HEARTBEAT_TIMEOUT_FRACTION,
|
|
11
19
|
},
|
|
12
|
-
LocalActRequest,
|
|
20
|
+
LocalActRequest, LEGACY_QUERY_ID,
|
|
13
21
|
},
|
|
14
22
|
MetricsContext,
|
|
15
23
|
};
|
|
16
|
-
use
|
|
24
|
+
use futures_util::future::AbortHandle;
|
|
17
25
|
use std::{
|
|
26
|
+
collections::HashSet,
|
|
18
27
|
ops::Add,
|
|
28
|
+
rc::Rc,
|
|
19
29
|
sync::mpsc::Sender,
|
|
20
30
|
time::{Duration, Instant},
|
|
21
31
|
};
|
|
22
|
-
use
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
oneshot,
|
|
32
|
+
use temporal_sdk_core_protos::{
|
|
33
|
+
coresdk::{
|
|
34
|
+
workflow_activation::{
|
|
35
|
+
create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
|
|
36
|
+
workflow_activation_job, RemoveFromCache, WorkflowActivation,
|
|
37
|
+
},
|
|
38
|
+
workflow_commands::QueryResult,
|
|
39
|
+
workflow_completion,
|
|
31
40
|
},
|
|
32
|
-
|
|
33
|
-
|
|
41
|
+
temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure},
|
|
42
|
+
TaskToken,
|
|
34
43
|
};
|
|
35
|
-
use
|
|
44
|
+
use tokio::sync::oneshot;
|
|
36
45
|
use tracing::Span;
|
|
37
|
-
use tracing_futures::Instrument;
|
|
38
|
-
|
|
39
|
-
use crate::worker::workflow::{
|
|
40
|
-
ActivationCompleteResult, ActivationOrAuto, FailRunUpdate, FulfillableActivationComplete,
|
|
41
|
-
GoodRunUpdate, LocalActivityRequestSink, RunAction, RunUpdateResponseKind,
|
|
42
|
-
};
|
|
43
|
-
use temporal_sdk_core_protos::TaskToken;
|
|
44
|
-
|
|
45
|
-
use crate::abstractions::dbg_panic;
|
|
46
|
-
#[cfg(test)]
|
|
47
|
-
pub(crate) use managed_wf_test::ManagedWFFunc;
|
|
48
46
|
|
|
49
47
|
type Result<T, E = WFMachinesError> = std::result::Result<T, E>;
|
|
50
|
-
|
|
51
|
-
/// necessary.
|
|
52
|
-
const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
|
|
48
|
+
pub(super) type RunUpdateAct = Option<ActivationOrAuto>;
|
|
53
49
|
|
|
50
|
+
/// Manages access to a specific workflow run. Everything inside is entirely synchronous and should
|
|
51
|
+
/// remain that way.
|
|
52
|
+
#[derive(derive_more::DebugCustom)]
|
|
53
|
+
#[debug(
|
|
54
|
+
fmt = "ManagedRun {{ wft: {:?}, activation: {:?}, buffered_resp: {:?} \
|
|
55
|
+
trying_to_evict: {} }}",
|
|
56
|
+
wft,
|
|
57
|
+
activation,
|
|
58
|
+
buffered_resp,
|
|
59
|
+
"trying_to_evict.is_some()"
|
|
60
|
+
)]
|
|
54
61
|
pub(super) struct ManagedRun {
|
|
55
62
|
wfm: WorkflowManager,
|
|
56
|
-
|
|
57
|
-
|
|
63
|
+
/// Called when the machines need to produce local activity requests. This can't be lifted up
|
|
64
|
+
/// easily as return values, because sometimes local activity requests trigger immediate
|
|
65
|
+
/// resolutions (ex: too many attempts). Thus lifting it up creates a lot of unneeded complexity
|
|
66
|
+
/// pushing things out and then directly back in. The downside is this is the only "impure" part
|
|
67
|
+
/// of the in/out nature of workflow state management. If there's ever a sensible way to lift it
|
|
68
|
+
/// up, that'd be nice.
|
|
69
|
+
local_activity_request_sink: Rc<dyn LocalActivityRequestSink>,
|
|
70
|
+
/// Set if the run is currently waiting on the execution of some local activities.
|
|
58
71
|
waiting_on_la: Option<WaitingOnLAs>,
|
|
59
|
-
|
|
60
|
-
|
|
72
|
+
/// Is set to true if the machines encounter an error and the only subsequent thing we should
|
|
73
|
+
/// do is be evicted.
|
|
61
74
|
am_broken: bool,
|
|
62
|
-
|
|
75
|
+
/// If set, the WFT this run is currently/will be processing.
|
|
76
|
+
wft: Option<OutstandingTask>,
|
|
77
|
+
/// An outstanding activation to lang
|
|
78
|
+
activation: Option<OutstandingActivation>,
|
|
79
|
+
/// If set, it indicates there is a buffered poll response from the server that applies to this
|
|
80
|
+
/// run. This can happen when lang takes too long to complete a task and the task times out, for
|
|
81
|
+
/// example. Upon next completion, the buffered response will be removed and can be made ready
|
|
82
|
+
/// to be returned from polling
|
|
83
|
+
buffered_resp: Option<PermittedWFT>,
|
|
84
|
+
/// Is set if an eviction has been requested for this run
|
|
85
|
+
trying_to_evict: Option<RequestEvictMsg>,
|
|
63
86
|
|
|
64
|
-
///
|
|
65
|
-
///
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
///
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
CompletionDataForWFT,
|
|
73
|
-
oneshot::Sender<ActivationCompleteResult>,
|
|
74
|
-
)>,
|
|
75
|
-
hb_chan: UnboundedSender<Span>,
|
|
76
|
-
heartbeat_timeout_task: JoinHandle<()>,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
#[derive(Debug)]
|
|
80
|
-
struct CompletionDataForWFT {
|
|
81
|
-
task_token: TaskToken,
|
|
82
|
-
query_responses: Vec<QueryResult>,
|
|
83
|
-
has_pending_query: bool,
|
|
84
|
-
activation_was_only_eviction: bool,
|
|
87
|
+
/// We track if we have recorded useful debugging values onto a certain span yet, to overcome
|
|
88
|
+
/// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
|
|
89
|
+
/// is fixed.
|
|
90
|
+
recorded_span_ids: HashSet<tracing::Id>,
|
|
91
|
+
metrics: MetricsContext,
|
|
92
|
+
/// We store the paginator used for our own run's history fetching
|
|
93
|
+
paginator: Option<HistoryPaginator>,
|
|
94
|
+
completion_waiting_on_page_fetch: Option<RunActivationCompletion>,
|
|
85
95
|
}
|
|
86
|
-
|
|
87
96
|
impl ManagedRun {
|
|
88
97
|
pub(super) fn new(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
local_activity_request_sink: LocalActivityRequestSink,
|
|
98
|
+
basics: RunBasics,
|
|
99
|
+
local_activity_request_sink: Rc<dyn LocalActivityRequestSink>,
|
|
92
100
|
) -> Self {
|
|
101
|
+
let metrics = basics.metrics.clone();
|
|
102
|
+
let wfm = WorkflowManager::new(basics);
|
|
93
103
|
Self {
|
|
94
104
|
wfm,
|
|
95
|
-
update_tx,
|
|
96
105
|
local_activity_request_sink,
|
|
97
106
|
waiting_on_la: None,
|
|
98
107
|
am_broken: false,
|
|
108
|
+
wft: None,
|
|
109
|
+
activation: None,
|
|
110
|
+
buffered_resp: None,
|
|
111
|
+
trying_to_evict: None,
|
|
112
|
+
recorded_span_ids: Default::default(),
|
|
113
|
+
metrics,
|
|
114
|
+
paginator: None,
|
|
115
|
+
completion_waiting_on_page_fetch: None,
|
|
99
116
|
}
|
|
100
117
|
}
|
|
101
118
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
let mut no_wft = false;
|
|
115
|
-
async move {
|
|
116
|
-
let res = match action {
|
|
117
|
-
RunActions::NewIncomingWFT(wft) => me
|
|
118
|
-
.incoming_wft(wft)
|
|
119
|
-
.await
|
|
120
|
-
.map(RunActionOutcome::AfterNewWFT),
|
|
121
|
-
RunActions::ActivationCompletion(completion) => me
|
|
122
|
-
.completion(completion, &heartbeat_tx)
|
|
123
|
-
.await
|
|
124
|
-
.map(RunActionOutcome::AfterCompletion),
|
|
125
|
-
RunActions::CheckMoreWork {
|
|
126
|
-
want_to_evict,
|
|
127
|
-
has_pending_queries,
|
|
128
|
-
has_wft,
|
|
129
|
-
} => {
|
|
130
|
-
if !has_wft {
|
|
131
|
-
no_wft = true;
|
|
132
|
-
}
|
|
133
|
-
me.check_more_work(want_to_evict, has_pending_queries, has_wft)
|
|
134
|
-
.await
|
|
135
|
-
.map(RunActionOutcome::AfterCheckWork)
|
|
136
|
-
}
|
|
137
|
-
RunActions::LocalResolution(r) => me
|
|
138
|
-
.local_resolution(r)
|
|
139
|
-
.await
|
|
140
|
-
.map(RunActionOutcome::AfterLocalResolution),
|
|
141
|
-
RunActions::HeartbeatTimeout => {
|
|
142
|
-
let maybe_act = if me.heartbeat_timeout() {
|
|
143
|
-
Some(ActivationOrAuto::Autocomplete {
|
|
144
|
-
run_id: me.wfm.machines.run_id.clone(),
|
|
145
|
-
})
|
|
146
|
-
} else {
|
|
147
|
-
None
|
|
148
|
-
};
|
|
149
|
-
Ok(RunActionOutcome::AfterHeartbeatTimeout(maybe_act))
|
|
150
|
-
}
|
|
151
|
-
};
|
|
152
|
-
match res {
|
|
153
|
-
Ok(outcome) => {
|
|
154
|
-
me.send_update_response(outcome, no_wft);
|
|
155
|
-
}
|
|
156
|
-
Err(e) => {
|
|
157
|
-
error!(error=?e, "Error in run machines");
|
|
158
|
-
me.am_broken = true;
|
|
159
|
-
me.update_tx
|
|
160
|
-
.send(RunUpdateResponse {
|
|
161
|
-
kind: RunUpdateResponseKind::Fail(FailRunUpdate {
|
|
162
|
-
run_id: me.wfm.machines.run_id.clone(),
|
|
163
|
-
err: e.source,
|
|
164
|
-
completion_resp: e.complete_resp_chan,
|
|
165
|
-
}),
|
|
166
|
-
span: Span::current(),
|
|
167
|
-
})
|
|
168
|
-
.expect("Machine can send update");
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
(me, heartbeat_tx)
|
|
172
|
-
}
|
|
173
|
-
.instrument(span)
|
|
174
|
-
})
|
|
175
|
-
.await;
|
|
119
|
+
/// Returns true if there are pending jobs that need to be sent to lang.
|
|
120
|
+
pub(super) fn more_pending_work(&self) -> bool {
|
|
121
|
+
// We don't want to consider there to be more local-only work to be done if there is
|
|
122
|
+
// no workflow task associated with the run right now. This can happen if, ex, we
|
|
123
|
+
// complete a local activity while waiting for server to send us the next WFT.
|
|
124
|
+
// Activating lang would be harmful at this stage, as there might be work returned
|
|
125
|
+
// in that next WFT which should be part of the next activation.
|
|
126
|
+
self.wft.is_some() && self.wfm.machines.has_pending_jobs()
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
pub(super) fn have_seen_terminal_event(&self) -> bool {
|
|
130
|
+
self.wfm.machines.have_seen_terminal_event
|
|
176
131
|
}
|
|
177
132
|
|
|
178
|
-
|
|
133
|
+
/// Returns a ref to info about the currently tracked workflow task, if any.
|
|
134
|
+
pub(super) fn wft(&self) -> Option<&OutstandingTask> {
|
|
135
|
+
self.wft.as_ref()
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/// Returns a ref to info about the currently tracked workflow activation, if any.
|
|
139
|
+
pub(super) fn activation(&self) -> Option<&OutstandingActivation> {
|
|
140
|
+
self.activation.as_ref()
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/// Returns true if this run has already been told it will be evicted.
|
|
144
|
+
pub(super) fn is_trying_to_evict(&self) -> bool {
|
|
145
|
+
self.trying_to_evict.is_some()
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/// Called whenever a new workflow task is obtained for this run
|
|
149
|
+
pub(super) fn incoming_wft(&mut self, pwft: PermittedWFT) -> RunUpdateAct {
|
|
150
|
+
let res = self._incoming_wft(pwft);
|
|
151
|
+
self.update_to_acts(res.map(Into::into), true)
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
fn _incoming_wft(
|
|
179
155
|
&mut self,
|
|
180
|
-
|
|
156
|
+
pwft: PermittedWFT,
|
|
181
157
|
) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
|
|
182
|
-
|
|
183
|
-
|
|
158
|
+
if self.wft.is_some() {
|
|
159
|
+
dbg_panic!("Trying to send a new WFT for a run which already has one!");
|
|
160
|
+
}
|
|
161
|
+
let start_time = Instant::now();
|
|
162
|
+
|
|
163
|
+
let work = pwft.work;
|
|
164
|
+
let did_miss_cache = !work.is_incremental() || !work.update.is_real();
|
|
165
|
+
debug!(
|
|
166
|
+
run_id = %work.execution.run_id,
|
|
167
|
+
task_token = %&work.task_token,
|
|
168
|
+
update = ?work.update,
|
|
169
|
+
has_legacy_query = %work.legacy_query.is_some(),
|
|
170
|
+
attempt = %work.attempt,
|
|
171
|
+
"Applying new workflow task from server"
|
|
172
|
+
);
|
|
173
|
+
let wft_info = WorkflowTaskInfo {
|
|
174
|
+
attempt: work.attempt,
|
|
175
|
+
task_token: work.task_token,
|
|
176
|
+
wf_id: work.execution.workflow_id.clone(),
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
let legacy_query_from_poll = work
|
|
180
|
+
.legacy_query
|
|
181
|
+
.map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
|
|
182
|
+
|
|
183
|
+
let mut pending_queries = work.query_requests;
|
|
184
|
+
if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
|
|
185
|
+
error!(
|
|
186
|
+
"Server issued both normal and legacy queries. This should not happen. Please \
|
|
187
|
+
file a bug report."
|
|
188
|
+
);
|
|
189
|
+
return Err(RunUpdateErr {
|
|
190
|
+
source: WFMachinesError::Fatal(
|
|
191
|
+
"Server issued both normal and legacy query".to_string(),
|
|
192
|
+
),
|
|
193
|
+
complete_resp_chan: None,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
if let Some(lq) = legacy_query_from_poll {
|
|
197
|
+
pending_queries.push(lq);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
self.paginator = Some(pwft.paginator);
|
|
201
|
+
self.wft = Some(OutstandingTask {
|
|
202
|
+
info: wft_info,
|
|
203
|
+
hit_cache: !did_miss_cache,
|
|
204
|
+
pending_queries,
|
|
205
|
+
start_time,
|
|
206
|
+
permit: pwft.permit,
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// The update field is only populated in the event we hit the cache
|
|
210
|
+
let activation = if work.update.is_real() {
|
|
211
|
+
self.metrics.sticky_cache_hit();
|
|
212
|
+
self.wfm.feed_history_from_server(work.update)?
|
|
184
213
|
} else {
|
|
185
|
-
let r = self.wfm.get_next_activation()
|
|
214
|
+
let r = self.wfm.get_next_activation()?;
|
|
186
215
|
if r.jobs.is_empty() {
|
|
187
216
|
return Err(RunUpdateErr {
|
|
188
217
|
source: WFMachinesError::Fatal(format!(
|
|
@@ -197,16 +226,17 @@ impl ManagedRun {
|
|
|
197
226
|
|
|
198
227
|
if activation.jobs.is_empty() {
|
|
199
228
|
if self.wfm.machines.outstanding_local_activity_count() > 0 {
|
|
200
|
-
// If the activation has no jobs but there are outstanding LAs, we need to restart
|
|
201
|
-
// WFT heartbeat.
|
|
229
|
+
// If the activation has no jobs but there are outstanding LAs, we need to restart
|
|
230
|
+
// the WFT heartbeat.
|
|
202
231
|
if let Some(ref mut lawait) = self.waiting_on_la {
|
|
203
232
|
if lawait.completion_dat.is_some() {
|
|
204
233
|
panic!("Should not have completion dat when getting new wft & empty jobs")
|
|
205
234
|
}
|
|
206
|
-
lawait.
|
|
207
|
-
lawait.
|
|
208
|
-
|
|
209
|
-
|
|
235
|
+
lawait.hb_timeout_handle.abort();
|
|
236
|
+
lawait.hb_timeout_handle = sink_heartbeat_timeout_start(
|
|
237
|
+
self.wfm.machines.run_id.clone(),
|
|
238
|
+
self.local_activity_request_sink.as_ref(),
|
|
239
|
+
start_time,
|
|
210
240
|
lawait.wft_timeout,
|
|
211
241
|
);
|
|
212
242
|
// No activation needs to be sent to lang. We just need to wait for another
|
|
@@ -228,41 +258,354 @@ impl ManagedRun {
|
|
|
228
258
|
Ok(Some(ActivationOrAuto::LangActivation(activation)))
|
|
229
259
|
}
|
|
230
260
|
|
|
231
|
-
|
|
261
|
+
/// Deletes the currently tracked WFT & records latency metrics. Should be called after it has
|
|
262
|
+
/// been responded to (server has been told). Returns the WFT if there was one.
|
|
263
|
+
pub(super) fn mark_wft_complete(
|
|
232
264
|
&mut self,
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
let
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
// Send commands from lang into the machines then check if the workflow run
|
|
243
|
-
// needs another activation and mark it if so
|
|
244
|
-
self.wfm.push_commands(completion.commands).await?;
|
|
245
|
-
// Don't bother applying the next task if we're evicting at the end of
|
|
246
|
-
// this activation
|
|
247
|
-
if !completion.activation_was_eviction {
|
|
248
|
-
self.wfm.apply_next_task_if_ready().await?;
|
|
265
|
+
report_status: WFTReportStatus,
|
|
266
|
+
) -> Option<OutstandingTask> {
|
|
267
|
+
debug!("Marking WFT completed");
|
|
268
|
+
let retme = self.wft.take();
|
|
269
|
+
|
|
270
|
+
// Only record latency metrics if we genuinely reported to server
|
|
271
|
+
if matches!(report_status, WFTReportStatus::Reported) {
|
|
272
|
+
if let Some(ot) = &retme {
|
|
273
|
+
self.metrics.wf_task_latency(ot.start_time.elapsed());
|
|
249
274
|
}
|
|
250
|
-
|
|
275
|
+
// Tell the LA manager that we're done with the WFT
|
|
276
|
+
self.local_activity_request_sink.sink_reqs(vec![
|
|
277
|
+
LocalActRequest::IndicateWorkflowTaskCompleted(self.wfm.machines.run_id.clone()),
|
|
278
|
+
]);
|
|
279
|
+
}
|
|
251
280
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
281
|
+
retme
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/// Checks if any further activations need to go out for this run and produces them if so.
|
|
285
|
+
pub(super) fn check_more_activations(&mut self) -> RunUpdateAct {
|
|
286
|
+
let res = self._check_more_activations();
|
|
287
|
+
self.update_to_acts(res.map(Into::into), false)
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
fn _check_more_activations(&mut self) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
|
|
291
|
+
// No point in checking for more activations if there's already an outstanding activation.
|
|
292
|
+
if self.activation.is_some() {
|
|
293
|
+
return Ok(None);
|
|
294
|
+
}
|
|
295
|
+
// In the event it's time to evict this run, cancel any outstanding LAs
|
|
296
|
+
if self.trying_to_evict.is_some() {
|
|
297
|
+
self.sink_la_requests(vec![LocalActRequest::CancelAllInRun(
|
|
298
|
+
self.wfm.machines.run_id.clone(),
|
|
299
|
+
)])?;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if self.wft.is_none() {
|
|
303
|
+
// It doesn't make sense to do workflow work unless we have a WFT
|
|
304
|
+
return Ok(None);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if self.wfm.machines.has_pending_jobs() && !self.am_broken {
|
|
308
|
+
Ok(Some(ActivationOrAuto::LangActivation(
|
|
309
|
+
self.wfm.get_next_activation()?,
|
|
310
|
+
)))
|
|
311
|
+
} else {
|
|
312
|
+
if !self.am_broken {
|
|
313
|
+
let has_pending_queries = self
|
|
314
|
+
.wft
|
|
315
|
+
.as_ref()
|
|
316
|
+
.map(|wft| !wft.pending_queries.is_empty())
|
|
317
|
+
.unwrap_or_default();
|
|
318
|
+
if has_pending_queries {
|
|
319
|
+
return Ok(Some(ActivationOrAuto::ReadyForQueries(
|
|
320
|
+
self.wfm.machines.get_wf_activation(),
|
|
321
|
+
)));
|
|
322
|
+
}
|
|
256
323
|
}
|
|
324
|
+
if let Some(wte) = self.trying_to_evict.clone() {
|
|
325
|
+
let mut act = self.wfm.machines.get_wf_activation();
|
|
326
|
+
// No other jobs make any sense to send if we encountered an error.
|
|
327
|
+
if self.am_broken {
|
|
328
|
+
act.jobs = vec![];
|
|
329
|
+
}
|
|
330
|
+
act.append_evict_job(RemoveFromCache {
|
|
331
|
+
message: wte.message,
|
|
332
|
+
reason: wte.reason as i32,
|
|
333
|
+
});
|
|
334
|
+
Ok(Some(ActivationOrAuto::LangActivation(act)))
|
|
335
|
+
} else {
|
|
336
|
+
Ok(None)
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
257
340
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
341
|
+
/// Called whenever lang successfully completes a workflow activation. Commands produced by the
|
|
342
|
+
/// activation are passed in. `resp_chan` will be used to unblock the completion call when
|
|
343
|
+
/// everything we need to do to fulfill it has happened.
|
|
344
|
+
///
|
|
345
|
+
/// Can return an error in the event that another page of history needs to be fetched before
|
|
346
|
+
/// the completion can proceed.
|
|
347
|
+
pub(super) fn successful_completion(
|
|
348
|
+
&mut self,
|
|
349
|
+
mut commands: Vec<WFCommand>,
|
|
350
|
+
used_flags: Vec<u32>,
|
|
351
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
352
|
+
) -> Result<RunUpdateAct, NextPageReq> {
|
|
353
|
+
let activation_was_only_eviction = self.activation_has_only_eviction();
|
|
354
|
+
let (task_token, has_pending_query, start_time) = if let Some(entry) = self.wft.as_ref() {
|
|
355
|
+
(
|
|
356
|
+
entry.info.task_token.clone(),
|
|
357
|
+
!entry.pending_queries.is_empty(),
|
|
358
|
+
entry.start_time,
|
|
359
|
+
)
|
|
360
|
+
} else {
|
|
361
|
+
if !activation_was_only_eviction {
|
|
362
|
+
// Not an error if this was an eviction, since it's normal to issue eviction
|
|
363
|
+
// activations without an associated workflow task in that case.
|
|
364
|
+
dbg_panic!(
|
|
365
|
+
"Attempted to complete activation for run {} without associated workflow task",
|
|
366
|
+
self.run_id()
|
|
367
|
+
);
|
|
368
|
+
}
|
|
369
|
+
let outcome = if let Some((tt, reason)) = self.trying_to_evict.as_mut().and_then(|te| {
|
|
370
|
+
te.auto_reply_fail_tt
|
|
371
|
+
.take()
|
|
372
|
+
.map(|tt| (tt, te.message.clone()))
|
|
373
|
+
}) {
|
|
374
|
+
ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
|
|
375
|
+
tt,
|
|
376
|
+
WorkflowTaskFailedCause::Unspecified,
|
|
377
|
+
Failure::application_failure(reason, true).into(),
|
|
378
|
+
))
|
|
379
|
+
} else {
|
|
380
|
+
ActivationCompleteOutcome::DoNothing
|
|
263
381
|
};
|
|
382
|
+
self.reply_to_complete(outcome, resp_chan);
|
|
383
|
+
return Ok(None);
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
// If the only command from the activation is a legacy query response, that means we need
|
|
387
|
+
// to respond differently than a typical activation.
|
|
388
|
+
if matches!(&commands.as_slice(),
|
|
389
|
+
&[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
|
|
390
|
+
{
|
|
391
|
+
let qr = match commands.remove(0) {
|
|
392
|
+
WFCommand::QueryResponse(qr) => qr,
|
|
393
|
+
_ => unreachable!("We just verified this is the only command"),
|
|
394
|
+
};
|
|
395
|
+
self.reply_to_complete(
|
|
396
|
+
ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
|
|
397
|
+
task_token,
|
|
398
|
+
action: ActivationAction::RespondLegacyQuery {
|
|
399
|
+
result: Box::new(qr),
|
|
400
|
+
},
|
|
401
|
+
}),
|
|
402
|
+
resp_chan,
|
|
403
|
+
);
|
|
404
|
+
Ok(None)
|
|
405
|
+
} else {
|
|
406
|
+
// First strip out query responses from other commands that actually affect machines
|
|
407
|
+
// Would be prettier with `drain_filter`
|
|
408
|
+
let mut i = 0;
|
|
409
|
+
let mut query_responses = vec![];
|
|
410
|
+
while i < commands.len() {
|
|
411
|
+
if matches!(commands[i], WFCommand::QueryResponse(_)) {
|
|
412
|
+
if let WFCommand::QueryResponse(qr) = commands.remove(i) {
|
|
413
|
+
query_responses.push(qr);
|
|
414
|
+
}
|
|
415
|
+
} else {
|
|
416
|
+
i += 1;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if activation_was_only_eviction && !commands.is_empty() {
|
|
421
|
+
dbg_panic!("Reply to an eviction only containing an eviction included commands");
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
let rac = RunActivationCompletion {
|
|
425
|
+
task_token,
|
|
426
|
+
start_time,
|
|
427
|
+
commands,
|
|
428
|
+
activation_was_eviction: self.activation_has_eviction(),
|
|
429
|
+
activation_was_only_eviction,
|
|
430
|
+
has_pending_query,
|
|
431
|
+
query_responses,
|
|
432
|
+
used_flags,
|
|
433
|
+
resp_chan,
|
|
434
|
+
};
|
|
435
|
+
|
|
436
|
+
// Verify we can actually apply the next workflow task, which will happen as part of
|
|
437
|
+
// applying the completion to machines. If we can't, return early indicating we need
|
|
438
|
+
// to fetch a page.
|
|
439
|
+
if !self.wfm.ready_to_apply_next_wft() {
|
|
440
|
+
return if let Some(paginator) = self.paginator.take() {
|
|
441
|
+
debug!("Need to fetch a history page before next WFT can be applied");
|
|
442
|
+
self.completion_waiting_on_page_fetch = Some(rac);
|
|
443
|
+
Err(NextPageReq {
|
|
444
|
+
paginator,
|
|
445
|
+
span: Span::current(),
|
|
446
|
+
})
|
|
447
|
+
} else {
|
|
448
|
+
Ok(self.update_to_acts(
|
|
449
|
+
Err(RunUpdateErr {
|
|
450
|
+
source: WFMachinesError::Fatal(
|
|
451
|
+
"Run's paginator was absent when attempting to fetch next history \
|
|
452
|
+
page. This is a Core SDK bug."
|
|
453
|
+
.to_string(),
|
|
454
|
+
),
|
|
455
|
+
complete_resp_chan: rac.resp_chan,
|
|
456
|
+
}),
|
|
457
|
+
false,
|
|
458
|
+
))
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
Ok(self.process_completion(rac))
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/// Called after the higher-up machinery has fetched more pages of event history needed to apply
|
|
467
|
+
/// the next workflow task. The history update and paginator used to perform the fetch are
|
|
468
|
+
/// passed in, with the update being used to apply the task, and the paginator stored to be
|
|
469
|
+
/// attached with another fetch request if needed.
|
|
470
|
+
pub(super) fn fetched_page_completion(
|
|
471
|
+
&mut self,
|
|
472
|
+
update: HistoryUpdate,
|
|
473
|
+
paginator: HistoryPaginator,
|
|
474
|
+
) -> RunUpdateAct {
|
|
475
|
+
let res = self._fetched_page_completion(update, paginator);
|
|
476
|
+
self.update_to_acts(res.map(Into::into), false)
|
|
477
|
+
}
|
|
478
|
+
fn _fetched_page_completion(
|
|
479
|
+
&mut self,
|
|
480
|
+
update: HistoryUpdate,
|
|
481
|
+
paginator: HistoryPaginator,
|
|
482
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
|
483
|
+
self.paginator = Some(paginator);
|
|
484
|
+
if let Some(d) = self.completion_waiting_on_page_fetch.take() {
|
|
485
|
+
self._process_completion(d, Some(update))
|
|
486
|
+
} else {
|
|
487
|
+
dbg_panic!(
|
|
488
|
+
"Shouldn't be possible to be applying a next-page-fetch update when \
|
|
489
|
+
doing anything other than completing an activation."
|
|
490
|
+
);
|
|
491
|
+
Err(RunUpdateErr::from(WFMachinesError::Fatal(
|
|
492
|
+
"Tried to apply next-page-fetch update to a run that wasn't handling a completion"
|
|
493
|
+
.to_string(),
|
|
494
|
+
)))
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/// Called whenever either core lang cannot complete a workflow activation. EX: Nondeterminism
|
|
499
|
+
/// or user code threw/panicked, respectively. The `cause` and `reason` fields are determined
|
|
500
|
+
/// inside core always. The `failure` field may come from lang. `resp_chan` will be used to
|
|
501
|
+
/// unblock the completion call when everything we need to do to fulfill it has happened.
|
|
502
|
+
pub(super) fn failed_completion(
|
|
503
|
+
&mut self,
|
|
504
|
+
cause: WorkflowTaskFailedCause,
|
|
505
|
+
reason: EvictionReason,
|
|
506
|
+
failure: workflow_completion::Failure,
|
|
507
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
508
|
+
) -> RunUpdateAct {
|
|
509
|
+
let tt = if let Some(tt) = self.wft.as_ref().map(|t| t.info.task_token.clone()) {
|
|
510
|
+
tt
|
|
511
|
+
} else {
|
|
512
|
+
dbg_panic!(
|
|
513
|
+
"No workflow task for run id {} found when trying to fail activation",
|
|
514
|
+
self.run_id()
|
|
515
|
+
);
|
|
516
|
+
self.reply_to_complete(ActivationCompleteOutcome::DoNothing, resp_chan);
|
|
517
|
+
return None;
|
|
518
|
+
};
|
|
519
|
+
|
|
520
|
+
self.metrics.wf_task_failed();
|
|
521
|
+
let message = format!("Workflow activation completion failed: {:?}", &failure);
|
|
522
|
+
// Blow up any cached data associated with the workflow
|
|
523
|
+
let evict_req_outcome = self.request_eviction(RequestEvictMsg {
|
|
524
|
+
run_id: self.run_id().to_string(),
|
|
525
|
+
message,
|
|
526
|
+
reason,
|
|
527
|
+
auto_reply_fail_tt: None,
|
|
528
|
+
});
|
|
529
|
+
let should_report = match &evict_req_outcome {
|
|
530
|
+
EvictionRequestResult::EvictionRequested(Some(attempt), _)
|
|
531
|
+
| EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => *attempt <= 1,
|
|
532
|
+
_ => false,
|
|
533
|
+
};
|
|
534
|
+
let rur = evict_req_outcome.into_run_update_resp();
|
|
535
|
+
// If the outstanding WFT is a legacy query task, report that we need to fail it
|
|
536
|
+
let outcome = if self.pending_work_is_legacy_query() {
|
|
537
|
+
ActivationCompleteOutcome::ReportWFTFail(
|
|
538
|
+
FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
|
|
539
|
+
)
|
|
540
|
+
} else if should_report {
|
|
541
|
+
ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
|
|
542
|
+
tt, cause, failure,
|
|
543
|
+
))
|
|
544
|
+
} else {
|
|
545
|
+
ActivationCompleteOutcome::WFTFailedDontReport
|
|
546
|
+
};
|
|
547
|
+
self.reply_to_complete(outcome, resp_chan);
|
|
548
|
+
rur
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/// Delete the currently tracked workflow activation and return it, if any. Should be called
|
|
552
|
+
/// after the processing of the activation completion, and WFT reporting.
|
|
553
|
+
pub(super) fn delete_activation(&mut self) -> Option<OutstandingActivation> {
|
|
554
|
+
self.activation.take()
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
/// Called when local activities resolve
|
|
558
|
+
pub(super) fn local_resolution(&mut self, res: LocalResolution) -> RunUpdateAct {
|
|
559
|
+
let res = self._local_resolution(res);
|
|
560
|
+
self.update_to_acts(res.map(Into::into), false)
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
fn process_completion(&mut self, completion: RunActivationCompletion) -> RunUpdateAct {
|
|
564
|
+
let res = self._process_completion(completion, None);
|
|
565
|
+
self.update_to_acts(res.map(Into::into), false)
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
fn _process_completion(
|
|
569
|
+
&mut self,
|
|
570
|
+
completion: RunActivationCompletion,
|
|
571
|
+
new_update: Option<HistoryUpdate>,
|
|
572
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
|
573
|
+
let data = CompletionDataForWFT {
|
|
574
|
+
task_token: completion.task_token,
|
|
575
|
+
query_responses: completion.query_responses,
|
|
576
|
+
has_pending_query: completion.has_pending_query,
|
|
577
|
+
activation_was_only_eviction: completion.activation_was_only_eviction,
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
self.wfm.machines.add_lang_used_flags(completion.used_flags);
|
|
581
|
+
|
|
582
|
+
// If this is just bookkeeping after a reply to an only-eviction activation, we can bypass
|
|
583
|
+
// everything, since there is no reason to continue trying to update machines.
|
|
584
|
+
if completion.activation_was_only_eviction {
|
|
585
|
+
return Ok(Some(self.prepare_complete_resp(
|
|
586
|
+
completion.resp_chan,
|
|
587
|
+
data,
|
|
588
|
+
false,
|
|
589
|
+
)));
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
let outcome = (|| {
|
|
593
|
+
// Send commands from lang into the machines then check if the workflow run needs
|
|
594
|
+
// another activation and mark it if so
|
|
595
|
+
self.wfm.push_commands_and_iterate(completion.commands)?;
|
|
596
|
+
// If there was a new update included as part of the completion, apply it.
|
|
597
|
+
if let Some(update) = new_update {
|
|
598
|
+
self.wfm.feed_history_from_new_page(update)?;
|
|
599
|
+
}
|
|
600
|
+
// Don't bother applying the next task if we're evicting at the end of this activation
|
|
601
|
+
if !completion.activation_was_eviction {
|
|
602
|
+
self.wfm.apply_next_task_if_ready()?;
|
|
603
|
+
}
|
|
604
|
+
let new_local_acts = self.wfm.drain_queued_local_activities();
|
|
605
|
+
self.sink_la_requests(new_local_acts)?;
|
|
606
|
+
|
|
264
607
|
if self.wfm.machines.outstanding_local_activity_count() == 0 {
|
|
265
|
-
Ok(
|
|
608
|
+
Ok(None)
|
|
266
609
|
} else {
|
|
267
610
|
let wft_timeout: Duration = self
|
|
268
611
|
.wfm
|
|
@@ -275,28 +618,26 @@ impl ManagedRun {
|
|
|
275
618
|
.to_string(),
|
|
276
619
|
)
|
|
277
620
|
})?;
|
|
278
|
-
|
|
279
|
-
Ok((
|
|
280
|
-
Some((heartbeat_tx, completion.start_time, wft_timeout)),
|
|
281
|
-
data,
|
|
282
|
-
self,
|
|
283
|
-
))
|
|
621
|
+
Ok(Some((completion.start_time, wft_timeout)))
|
|
284
622
|
}
|
|
285
|
-
}
|
|
286
|
-
.await;
|
|
623
|
+
})();
|
|
287
624
|
|
|
288
625
|
match outcome {
|
|
289
|
-
Ok(
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
626
|
+
Ok(None) => Ok(Some(self.prepare_complete_resp(
|
|
627
|
+
completion.resp_chan,
|
|
628
|
+
data,
|
|
629
|
+
false,
|
|
630
|
+
))),
|
|
631
|
+
Ok(Some((start_t, wft_timeout))) => {
|
|
632
|
+
if let Some(wola) = self.waiting_on_la.as_mut() {
|
|
633
|
+
wola.hb_timeout_handle.abort();
|
|
293
634
|
}
|
|
294
|
-
|
|
635
|
+
self.waiting_on_la = Some(WaitingOnLAs {
|
|
295
636
|
wft_timeout,
|
|
296
|
-
completion_dat: Some((data, resp_chan)),
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
637
|
+
completion_dat: Some((data, completion.resp_chan)),
|
|
638
|
+
hb_timeout_handle: sink_heartbeat_timeout_start(
|
|
639
|
+
self.run_id().to_string(),
|
|
640
|
+
self.local_activity_request_sink.as_ref(),
|
|
300
641
|
start_t,
|
|
301
642
|
wft_timeout,
|
|
302
643
|
),
|
|
@@ -305,72 +646,350 @@ impl ManagedRun {
|
|
|
305
646
|
}
|
|
306
647
|
Err(e) => Err(RunUpdateErr {
|
|
307
648
|
source: e,
|
|
308
|
-
complete_resp_chan:
|
|
649
|
+
complete_resp_chan: completion.resp_chan,
|
|
309
650
|
}),
|
|
310
651
|
}
|
|
311
652
|
}
|
|
312
653
|
|
|
313
|
-
|
|
654
|
+
fn _local_resolution(
|
|
314
655
|
&mut self,
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
if
|
|
320
|
-
|
|
321
|
-
|
|
656
|
+
res: LocalResolution,
|
|
657
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
|
658
|
+
debug!(resolution=?res, "Applying local resolution");
|
|
659
|
+
self.wfm.notify_of_local_result(res)?;
|
|
660
|
+
if self.wfm.machines.outstanding_local_activity_count() == 0 {
|
|
661
|
+
if let Some(mut wait_dat) = self.waiting_on_la.take() {
|
|
662
|
+
// Cancel the heartbeat timeout
|
|
663
|
+
wait_dat.hb_timeout_handle.abort();
|
|
664
|
+
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
|
665
|
+
return Ok(Some(self.prepare_complete_resp(
|
|
666
|
+
resp_chan,
|
|
667
|
+
completion_dat,
|
|
668
|
+
false,
|
|
669
|
+
)));
|
|
670
|
+
}
|
|
671
|
+
}
|
|
322
672
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
673
|
+
Ok(None)
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
pub(super) fn heartbeat_timeout(&mut self) -> RunUpdateAct {
|
|
677
|
+
let maybe_act = if self._heartbeat_timeout() {
|
|
678
|
+
Some(ActivationOrAuto::Autocomplete {
|
|
679
|
+
run_id: self.wfm.machines.run_id.clone(),
|
|
680
|
+
})
|
|
327
681
|
} else {
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
682
|
+
None
|
|
683
|
+
};
|
|
684
|
+
self.update_to_acts(Ok(maybe_act).map(Into::into), false)
|
|
685
|
+
}
|
|
686
|
+
/// Returns `true` if autocompletion should be issued, which will actually cause us to end up
|
|
687
|
+
/// in [completion] again, at which point we'll start a new heartbeat timeout, which will
|
|
688
|
+
/// immediately trigger and thus finish the completion, forcing a new task as it should.
|
|
689
|
+
fn _heartbeat_timeout(&mut self) -> bool {
|
|
690
|
+
if let Some(ref mut wait_dat) = self.waiting_on_la {
|
|
691
|
+
// Cancel the heartbeat timeout
|
|
692
|
+
wait_dat.hb_timeout_handle.abort();
|
|
693
|
+
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
|
694
|
+
let compl = self.prepare_complete_resp(resp_chan, completion_dat, true);
|
|
695
|
+
// Immediately fulfill the completion since the run update will already have
|
|
696
|
+
// been replied to
|
|
697
|
+
compl.fulfill();
|
|
698
|
+
} else {
|
|
699
|
+
// Auto-reply WFT complete
|
|
700
|
+
return true;
|
|
332
701
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
702
|
+
}
|
|
703
|
+
false
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/// Returns true if the managed run has any form of pending work
|
|
707
|
+
/// If `ignore_evicts` is true, pending evictions do not count as pending work.
|
|
708
|
+
/// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
|
|
709
|
+
pub(super) fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
|
|
710
|
+
let evict_work = if ignore_evicts {
|
|
711
|
+
false
|
|
712
|
+
} else {
|
|
713
|
+
self.trying_to_evict.is_some()
|
|
714
|
+
};
|
|
715
|
+
let act_work = if ignore_evicts {
|
|
716
|
+
if let Some(ref act) = self.activation {
|
|
717
|
+
!act.has_only_eviction()
|
|
344
718
|
} else {
|
|
345
|
-
|
|
719
|
+
false
|
|
720
|
+
}
|
|
721
|
+
} else {
|
|
722
|
+
self.activation.is_some()
|
|
723
|
+
};
|
|
724
|
+
let buffered = if ignore_buffered {
|
|
725
|
+
false
|
|
726
|
+
} else {
|
|
727
|
+
self.buffered_resp.is_some()
|
|
728
|
+
};
|
|
729
|
+
trace!(wft=self.wft.is_some(), buffered=?buffered, more_work=?self.more_pending_work(),
|
|
730
|
+
act_work, evict_work, "Does run have pending work?");
|
|
731
|
+
self.wft.is_some() || buffered || self.more_pending_work() || act_work || evict_work
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
/// Stores some work if there is any outstanding WFT or activation for the run. If there was
|
|
735
|
+
/// not, returns the work back out inside the option.
|
|
736
|
+
pub(super) fn buffer_wft_if_outstanding_work(
|
|
737
|
+
&mut self,
|
|
738
|
+
work: PermittedWFT,
|
|
739
|
+
) -> Option<PermittedWFT> {
|
|
740
|
+
let about_to_issue_evict = self.trying_to_evict.is_some();
|
|
741
|
+
let has_wft = self.wft().is_some();
|
|
742
|
+
let has_activation = self.activation().is_some();
|
|
743
|
+
if has_wft || has_activation || about_to_issue_evict || self.more_pending_work() {
|
|
744
|
+
debug!(run_id = %self.run_id(),
|
|
745
|
+
"Got new WFT for a run with outstanding work, buffering it");
|
|
746
|
+
self.buffered_resp = Some(work);
|
|
747
|
+
None
|
|
748
|
+
} else {
|
|
749
|
+
Some(work)
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
/// Returns true if there is a buffered workflow task for this run.
|
|
754
|
+
pub(super) fn has_buffered_wft(&self) -> bool {
|
|
755
|
+
self.buffered_resp.is_some()
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
/// Removes and returns the buffered workflow task, if any.
|
|
759
|
+
pub(super) fn take_buffered_wft(&mut self) -> Option<PermittedWFT> {
|
|
760
|
+
self.buffered_resp.take()
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
pub(super) fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
|
|
764
|
+
let attempts = self.wft.as_ref().map(|wt| wt.info.attempt);
|
|
765
|
+
|
|
766
|
+
// If we were waiting on a page fetch and we're getting evicted because fetching failed,
|
|
767
|
+
// then make sure we allow the completion to proceed, otherwise we're stuck waiting forever.
|
|
768
|
+
if self.completion_waiting_on_page_fetch.is_some()
|
|
769
|
+
&& matches!(info.reason, EvictionReason::PaginationOrHistoryFetch)
|
|
770
|
+
{
|
|
771
|
+
// We just checked it is some, unwrap OK.
|
|
772
|
+
let c = self.completion_waiting_on_page_fetch.take().unwrap();
|
|
773
|
+
let run_upd = self.failed_completion(
|
|
774
|
+
WorkflowTaskFailedCause::Unspecified,
|
|
775
|
+
info.reason,
|
|
776
|
+
Failure::application_failure(info.message, false).into(),
|
|
777
|
+
c.resp_chan,
|
|
778
|
+
);
|
|
779
|
+
return EvictionRequestResult::EvictionRequested(attempts, run_upd);
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if !self.activation_has_eviction() && self.trying_to_evict.is_none() {
|
|
783
|
+
debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
|
|
784
|
+
self.trying_to_evict = Some(info);
|
|
785
|
+
EvictionRequestResult::EvictionRequested(attempts, self.check_more_activations())
|
|
786
|
+
} else {
|
|
787
|
+
// Always store the most recent eviction reason
|
|
788
|
+
self.trying_to_evict = Some(info);
|
|
789
|
+
EvictionRequestResult::EvictionAlreadyRequested(attempts)
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
pub(super) fn record_span_fields(&mut self, span: &Span) {
|
|
794
|
+
if let Some(spid) = span.id() {
|
|
795
|
+
if self.recorded_span_ids.contains(&spid) {
|
|
796
|
+
return;
|
|
797
|
+
}
|
|
798
|
+
self.recorded_span_ids.insert(spid);
|
|
799
|
+
|
|
800
|
+
if let Some(wid) = self.wft().map(|wft| &wft.info.wf_id) {
|
|
801
|
+
span.record("workflow_id", wid.as_str());
|
|
346
802
|
}
|
|
347
803
|
}
|
|
348
804
|
}
|
|
349
805
|
|
|
806
|
+
/// Take the result of some update to ourselves and turn it into a return value of zero or more
|
|
807
|
+
/// actions
|
|
808
|
+
fn update_to_acts(
|
|
809
|
+
&mut self,
|
|
810
|
+
outcome: Result<ActOrFulfill, RunUpdateErr>,
|
|
811
|
+
in_response_to_wft: bool,
|
|
812
|
+
) -> RunUpdateAct {
|
|
813
|
+
match outcome {
|
|
814
|
+
Ok(act_or_fulfill) => {
|
|
815
|
+
let (mut maybe_act, maybe_fulfill) = match act_or_fulfill {
|
|
816
|
+
ActOrFulfill::OutgoingAct(a) => (a, None),
|
|
817
|
+
ActOrFulfill::FulfillableComplete(c) => (None, c),
|
|
818
|
+
};
|
|
819
|
+
// If there's no activation but is pending work, check and possibly generate one
|
|
820
|
+
if self.more_pending_work() && maybe_act.is_none() {
|
|
821
|
+
match self._check_more_activations() {
|
|
822
|
+
Ok(oa) => maybe_act = oa,
|
|
823
|
+
Err(e) => {
|
|
824
|
+
return self.update_to_acts(Err(e), in_response_to_wft);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
let r = match maybe_act {
|
|
829
|
+
Some(ActivationOrAuto::LangActivation(mut activation)) => {
|
|
830
|
+
if in_response_to_wft {
|
|
831
|
+
let wft = self
|
|
832
|
+
.wft
|
|
833
|
+
.as_mut()
|
|
834
|
+
.expect("WFT must exist for run just updated with one");
|
|
835
|
+
// If there are in-poll queries, insert jobs for those queries into the
|
|
836
|
+
// activation, but only if we hit the cache. If we didn't, those queries
|
|
837
|
+
// will need to be dealt with once replay is over
|
|
838
|
+
if wft.hit_cache {
|
|
839
|
+
put_queries_in_act(&mut activation, wft);
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
if activation.jobs.is_empty() {
|
|
844
|
+
dbg_panic!("Should not send lang activation with no jobs");
|
|
845
|
+
}
|
|
846
|
+
Some(ActivationOrAuto::LangActivation(activation))
|
|
847
|
+
}
|
|
848
|
+
Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
|
|
849
|
+
if let Some(wft) = self.wft.as_mut() {
|
|
850
|
+
put_queries_in_act(&mut act, wft);
|
|
851
|
+
Some(ActivationOrAuto::LangActivation(act))
|
|
852
|
+
} else {
|
|
853
|
+
dbg_panic!("Ready for queries but no WFT!");
|
|
854
|
+
None
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
a @ Some(
|
|
858
|
+
ActivationOrAuto::Autocomplete { .. } | ActivationOrAuto::AutoFail { .. },
|
|
859
|
+
) => a,
|
|
860
|
+
None => {
|
|
861
|
+
if let Some(reason) = self.trying_to_evict.as_ref() {
|
|
862
|
+
// If we had nothing to do, but we're trying to evict, just do that now
|
|
863
|
+
// as long as there's no other outstanding work.
|
|
864
|
+
if self.activation.is_none() && !self.more_pending_work() {
|
|
865
|
+
let mut evict_act = create_evict_activation(
|
|
866
|
+
self.run_id().to_string(),
|
|
867
|
+
reason.message.clone(),
|
|
868
|
+
reason.reason,
|
|
869
|
+
);
|
|
870
|
+
evict_act.history_length =
|
|
871
|
+
self.most_recently_processed_event_number() as u32;
|
|
872
|
+
Some(ActivationOrAuto::LangActivation(evict_act))
|
|
873
|
+
} else {
|
|
874
|
+
None
|
|
875
|
+
}
|
|
876
|
+
} else {
|
|
877
|
+
None
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
};
|
|
881
|
+
if let Some(f) = maybe_fulfill {
|
|
882
|
+
f.fulfill();
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
match r {
|
|
886
|
+
// After each run update, check if it's ready to handle any buffered poll
|
|
887
|
+
None | Some(ActivationOrAuto::Autocomplete { .. })
|
|
888
|
+
if !self.has_any_pending_work(false, true) =>
|
|
889
|
+
{
|
|
890
|
+
if let Some(bufft) = self.buffered_resp.take() {
|
|
891
|
+
self.incoming_wft(bufft)
|
|
892
|
+
} else {
|
|
893
|
+
None
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
Some(r) => {
|
|
897
|
+
self.insert_outstanding_activation(&r);
|
|
898
|
+
Some(r)
|
|
899
|
+
}
|
|
900
|
+
None => None,
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
Err(fail) => {
|
|
904
|
+
self.am_broken = true;
|
|
905
|
+
let rur = if let Some(resp_chan) = fail.complete_resp_chan {
|
|
906
|
+
// Automatically fail the workflow task in the event we couldn't update machines
|
|
907
|
+
let fail_cause = if matches!(&fail.source, WFMachinesError::Nondeterminism(_)) {
|
|
908
|
+
WorkflowTaskFailedCause::NonDeterministicError
|
|
909
|
+
} else {
|
|
910
|
+
WorkflowTaskFailedCause::Unspecified
|
|
911
|
+
};
|
|
912
|
+
let wft_fail_str = format!("{:?}", fail.source);
|
|
913
|
+
self.failed_completion(
|
|
914
|
+
fail_cause,
|
|
915
|
+
fail.source.evict_reason(),
|
|
916
|
+
Failure::application_failure(wft_fail_str, false).into(),
|
|
917
|
+
Some(resp_chan),
|
|
918
|
+
)
|
|
919
|
+
} else {
|
|
920
|
+
warn!(error=?fail.source, "Error while updating workflow");
|
|
921
|
+
Some(ActivationOrAuto::AutoFail {
|
|
922
|
+
run_id: self.run_id().to_owned(),
|
|
923
|
+
machines_err: fail.source,
|
|
924
|
+
})
|
|
925
|
+
};
|
|
926
|
+
rur
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
|
|
932
|
+
let act_type = match &act {
|
|
933
|
+
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
934
|
+
if act.is_legacy_query() {
|
|
935
|
+
OutstandingActivation::LegacyQuery
|
|
936
|
+
} else {
|
|
937
|
+
OutstandingActivation::Normal {
|
|
938
|
+
contains_eviction: act.eviction_index().is_some(),
|
|
939
|
+
num_jobs: act.jobs.len(),
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
ActivationOrAuto::Autocomplete { .. } | ActivationOrAuto::AutoFail { .. } => {
|
|
944
|
+
OutstandingActivation::Autocomplete
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
if let Some(old_act) = self.activation {
|
|
948
|
+
// This is a panic because we have screwed up core logic if this is violated. It must be
|
|
949
|
+
// upheld.
|
|
950
|
+
panic!(
|
|
951
|
+
"Attempted to insert a new outstanding activation {act:?}, but there already was \
|
|
952
|
+
one outstanding: {old_act:?}"
|
|
953
|
+
);
|
|
954
|
+
}
|
|
955
|
+
self.activation = Some(act_type);
|
|
956
|
+
}
|
|
957
|
+
|
|
350
958
|
fn prepare_complete_resp(
|
|
351
959
|
&mut self,
|
|
352
|
-
resp_chan: oneshot::Sender<ActivationCompleteResult
|
|
960
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
353
961
|
data: CompletionDataForWFT,
|
|
354
962
|
due_to_heartbeat_timeout: bool,
|
|
355
963
|
) -> FulfillableActivationComplete {
|
|
356
|
-
let outgoing_cmds = self.wfm.get_server_commands();
|
|
964
|
+
let mut outgoing_cmds = self.wfm.get_server_commands();
|
|
965
|
+
if data.activation_was_only_eviction && !outgoing_cmds.commands.is_empty() {
|
|
966
|
+
if self.am_broken {
|
|
967
|
+
// If we broke there could be commands in the pipe that we didn't get a chance to
|
|
968
|
+
// handle properly during replay, just wipe them all out.
|
|
969
|
+
outgoing_cmds.commands = vec![];
|
|
970
|
+
} else {
|
|
971
|
+
dbg_panic!(
|
|
972
|
+
"There should not be any outgoing commands when preparing a completion response \
|
|
973
|
+
if the activation was only an eviction. This is an SDK bug."
|
|
974
|
+
);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
357
978
|
let query_responses = data.query_responses;
|
|
358
979
|
let has_query_responses = !query_responses.is_empty();
|
|
359
980
|
let is_query_playback = data.has_pending_query && !has_query_responses;
|
|
360
981
|
let mut force_new_wft = due_to_heartbeat_timeout;
|
|
361
982
|
|
|
362
|
-
// We only actually want to send commands back to the server if there are no more
|
|
363
|
-
//
|
|
364
|
-
//
|
|
365
|
-
//
|
|
366
|
-
// eviction, and there were no commands produced during iteration, don't send that
|
|
983
|
+
// We only actually want to send commands back to the server if there are no more pending
|
|
984
|
+
// activations and we are caught up on replay. We don't want to complete a wft if we already
|
|
985
|
+
// saw the final event in the workflow, or if we are playing back for the express purpose of
|
|
986
|
+
// fulfilling a query. If the activation we sent was *only* an eviction, don't send that
|
|
367
987
|
// either.
|
|
368
|
-
let no_commands_and_evicting =
|
|
369
|
-
outgoing_cmds.commands.is_empty() && data.activation_was_only_eviction;
|
|
370
988
|
let should_respond = !(self.wfm.machines.has_pending_jobs()
|
|
371
989
|
|| outgoing_cmds.replaying
|
|
372
990
|
|| is_query_playback
|
|
373
|
-
||
|
|
991
|
+
|| data.activation_was_only_eviction
|
|
992
|
+
|| self.wfm.machines.have_seen_terminal_event);
|
|
374
993
|
// If there are pending LA resolutions, and we're responding to a query here,
|
|
375
994
|
// we want to make sure to force a new task, as otherwise once we tell lang about
|
|
376
995
|
// the LA resolution there wouldn't be any task to reply to with the result of iterating
|
|
@@ -378,154 +997,164 @@ impl ManagedRun {
|
|
|
378
997
|
if has_query_responses && self.wfm.machines.has_pending_la_resolutions() {
|
|
379
998
|
force_new_wft = true;
|
|
380
999
|
}
|
|
381
|
-
let to_be_sent = ServerCommandsWithWorkflowInfo {
|
|
382
|
-
task_token: data.task_token,
|
|
383
|
-
action: ActivationAction::WftComplete {
|
|
384
|
-
force_new_wft,
|
|
385
|
-
commands: outgoing_cmds.commands,
|
|
386
|
-
query_responses,
|
|
387
|
-
},
|
|
388
|
-
};
|
|
389
1000
|
|
|
390
1001
|
let outcome = if should_respond || has_query_responses {
|
|
391
|
-
ActivationCompleteOutcome::ReportWFTSuccess(
|
|
1002
|
+
ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
|
|
1003
|
+
task_token: data.task_token,
|
|
1004
|
+
action: ActivationAction::WftComplete {
|
|
1005
|
+
force_new_wft,
|
|
1006
|
+
commands: outgoing_cmds.commands,
|
|
1007
|
+
query_responses,
|
|
1008
|
+
sdk_metadata: self.wfm.machines.get_metadata_for_wft_complete(),
|
|
1009
|
+
},
|
|
1010
|
+
})
|
|
392
1011
|
} else {
|
|
393
1012
|
ActivationCompleteOutcome::DoNothing
|
|
394
1013
|
};
|
|
395
1014
|
FulfillableActivationComplete {
|
|
396
|
-
result:
|
|
397
|
-
most_recently_processed_event: self.wfm.machines.last_processed_event as usize,
|
|
398
|
-
outcome,
|
|
399
|
-
},
|
|
1015
|
+
result: self.build_activation_complete_result(outcome),
|
|
400
1016
|
resp_chan,
|
|
401
1017
|
}
|
|
402
1018
|
}
|
|
403
1019
|
|
|
404
|
-
|
|
1020
|
+
/// Pump some local activity requests into the sink, applying any immediate results to the
|
|
1021
|
+
/// workflow machines.
|
|
1022
|
+
fn sink_la_requests(
|
|
405
1023
|
&mut self,
|
|
406
|
-
|
|
407
|
-
) -> Result<
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
if let Some(mut wait_dat) = self.waiting_on_la.take() {
|
|
412
|
-
// Cancel the heartbeat timeout
|
|
413
|
-
wait_dat.heartbeat_timeout_task.abort();
|
|
414
|
-
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
|
415
|
-
return Ok(Some(self.prepare_complete_resp(
|
|
416
|
-
resp_chan,
|
|
417
|
-
completion_dat,
|
|
418
|
-
false,
|
|
419
|
-
)));
|
|
420
|
-
}
|
|
421
|
-
}
|
|
1024
|
+
new_local_acts: Vec<LocalActRequest>,
|
|
1025
|
+
) -> Result<(), WFMachinesError> {
|
|
1026
|
+
let immediate_resolutions = self.local_activity_request_sink.sink_reqs(new_local_acts);
|
|
1027
|
+
if !immediate_resolutions.is_empty() {
|
|
1028
|
+
warn!("Immediate res: {:?}", &immediate_resolutions);
|
|
422
1029
|
}
|
|
423
|
-
|
|
1030
|
+
for resolution in immediate_resolutions {
|
|
1031
|
+
self.wfm
|
|
1032
|
+
.notify_of_local_result(LocalResolution::LocalActivity(resolution))?;
|
|
1033
|
+
}
|
|
1034
|
+
Ok(())
|
|
424
1035
|
}
|
|
425
1036
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
let compl = self.prepare_complete_resp(resp_chan, completion_dat, true);
|
|
435
|
-
// Immediately fulfill the completion since the run update will already have
|
|
436
|
-
// been replied to
|
|
437
|
-
compl.fulfill();
|
|
438
|
-
} else {
|
|
439
|
-
// Auto-reply WFT complete
|
|
440
|
-
return true;
|
|
441
|
-
}
|
|
442
|
-
} else {
|
|
443
|
-
// If a heartbeat timeout happened, we should always have been waiting on LAs
|
|
444
|
-
dbg_panic!("WFT heartbeat timeout fired but we were not waiting on any LAs");
|
|
1037
|
+
fn reply_to_complete(
|
|
1038
|
+
&self,
|
|
1039
|
+
outcome: ActivationCompleteOutcome,
|
|
1040
|
+
chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1041
|
+
) {
|
|
1042
|
+
if let Some(chan) = chan {
|
|
1043
|
+
chan.send(self.build_activation_complete_result(outcome))
|
|
1044
|
+
.expect("Rcv half of activation reply not dropped");
|
|
445
1045
|
}
|
|
446
|
-
false
|
|
447
1046
|
}
|
|
448
1047
|
|
|
449
|
-
fn
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
//
|
|
463
|
-
//
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
.
|
|
1048
|
+
fn build_activation_complete_result(
|
|
1049
|
+
&self,
|
|
1050
|
+
outcome: ActivationCompleteOutcome,
|
|
1051
|
+
) -> ActivationCompleteResult {
|
|
1052
|
+
ActivationCompleteResult {
|
|
1053
|
+
outcome,
|
|
1054
|
+
most_recently_processed_event: self.most_recently_processed_event_number() as usize,
|
|
1055
|
+
replaying: self.wfm.machines.replaying,
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
/// Returns true if the handle is currently processing a WFT which contains a legacy query.
|
|
1060
|
+
fn pending_work_is_legacy_query(&self) -> bool {
|
|
1061
|
+
// Either we know because there is a pending legacy query, or it's already been drained and
|
|
1062
|
+
// sent as an activation.
|
|
1063
|
+
matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
|
|
1064
|
+
|| self
|
|
1065
|
+
.wft
|
|
1066
|
+
.as_ref()
|
|
1067
|
+
.map(|t| t.has_pending_legacy_query())
|
|
1068
|
+
.unwrap_or_default()
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
fn most_recently_processed_event_number(&self) -> i64 {
|
|
1072
|
+
self.wfm.machines.last_processed_event
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
fn activation_has_eviction(&mut self) -> bool {
|
|
1076
|
+
self.activation
|
|
1077
|
+
.map(OutstandingActivation::has_eviction)
|
|
1078
|
+
.unwrap_or_default()
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
fn activation_has_only_eviction(&mut self) -> bool {
|
|
1082
|
+
self.activation
|
|
1083
|
+
.map(OutstandingActivation::has_only_eviction)
|
|
1084
|
+
.unwrap_or_default()
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
fn run_id(&self) -> &str {
|
|
1088
|
+
&self.wfm.machines.run_id
|
|
485
1089
|
}
|
|
486
1090
|
}
|
|
487
1091
|
|
|
488
|
-
|
|
489
|
-
|
|
1092
|
+
/// Drains pending queries from the workflow task and appends them to the activation's jobs
|
|
1093
|
+
fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
|
|
1094
|
+
// Nothing to do if there are no pending queries
|
|
1095
|
+
if wft.pending_queries.is_empty() {
|
|
1096
|
+
return;
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
let has_legacy = wft.has_pending_legacy_query();
|
|
1100
|
+
// Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
|
|
1101
|
+
// activity resolves while we've gotten a legacy query after heartbeating.
|
|
1102
|
+
if has_legacy && !act.jobs.is_empty() {
|
|
1103
|
+
return;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
debug!(queries=?wft.pending_queries, "Dispatching queries");
|
|
1107
|
+
let query_jobs = wft
|
|
1108
|
+
.pending_queries
|
|
1109
|
+
.drain(..)
|
|
1110
|
+
.map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
|
|
1111
|
+
act.jobs.extend(query_jobs);
|
|
1112
|
+
}
|
|
1113
|
+
fn sink_heartbeat_timeout_start(
|
|
1114
|
+
run_id: String,
|
|
1115
|
+
sink: &dyn LocalActivityRequestSink,
|
|
490
1116
|
wft_start_time: Instant,
|
|
491
1117
|
wft_timeout: Duration,
|
|
492
|
-
) ->
|
|
1118
|
+
) -> AbortHandle {
|
|
493
1119
|
// The heartbeat deadline is 80% of the WFT timeout
|
|
494
|
-
let
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
AfterLocalResolution(Option<FulfillableActivationComplete>),
|
|
506
|
-
AfterCompletion(Option<FulfillableActivationComplete>),
|
|
507
|
-
AfterHeartbeatTimeout(Option<ActivationOrAuto>),
|
|
1120
|
+
let deadline = wft_start_time.add(wft_timeout.mul_f32(WFT_HEARTBEAT_TIMEOUT_FRACTION));
|
|
1121
|
+
let (abort_handle, abort_reg) = AbortHandle::new_pair();
|
|
1122
|
+
sink.sink_reqs(vec![LocalActRequest::StartHeartbeatTimeout {
|
|
1123
|
+
send_on_elapse: HeartbeatTimeoutMsg {
|
|
1124
|
+
run_id,
|
|
1125
|
+
span: Span::current(),
|
|
1126
|
+
},
|
|
1127
|
+
deadline,
|
|
1128
|
+
abort_reg,
|
|
1129
|
+
}]);
|
|
1130
|
+
abort_handle
|
|
508
1131
|
}
|
|
509
1132
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
struct
|
|
513
|
-
|
|
514
|
-
|
|
1133
|
+
/// If an activation completion needed to wait on LA completions (or heartbeat timeout) we use
|
|
1134
|
+
/// this struct to store the data we need to finish the completion once that has happened
|
|
1135
|
+
struct WaitingOnLAs {
|
|
1136
|
+
wft_timeout: Duration,
|
|
1137
|
+
/// If set, we are waiting for LAs to complete as part of a just-finished workflow activation.
|
|
1138
|
+
/// If unset, we already had a heartbeat timeout and got a new WFT without any new work while
|
|
1139
|
+
/// there are still incomplete LAs.
|
|
1140
|
+
completion_dat: Option<(
|
|
1141
|
+
CompletionDataForWFT,
|
|
1142
|
+
Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1143
|
+
)>,
|
|
1144
|
+
/// Can be used to abort heartbeat timeouts
|
|
1145
|
+
hb_timeout_handle: AbortHandle,
|
|
515
1146
|
}
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
}
|
|
523
|
-
}
|
|
1147
|
+
#[derive(Debug)]
|
|
1148
|
+
struct CompletionDataForWFT {
|
|
1149
|
+
task_token: TaskToken,
|
|
1150
|
+
query_responses: Vec<QueryResult>,
|
|
1151
|
+
has_pending_query: bool,
|
|
1152
|
+
activation_was_only_eviction: bool,
|
|
524
1153
|
}
|
|
525
1154
|
|
|
526
1155
|
/// Manages an instance of a [WorkflowMachines], which is not thread-safe, as well as other data
|
|
527
1156
|
/// associated with that specific workflow run.
|
|
528
|
-
|
|
1157
|
+
struct WorkflowManager {
|
|
529
1158
|
machines: WorkflowMachines,
|
|
530
1159
|
/// Is always `Some` in normal operation. Optional to allow for unit testing with the test
|
|
531
1160
|
/// workflow driver, which does not need to complete activations the normal way.
|
|
@@ -535,24 +1164,9 @@ pub(crate) struct WorkflowManager {
|
|
|
535
1164
|
impl WorkflowManager {
|
|
536
1165
|
/// Create a new workflow manager given workflow history and execution info as would be found
|
|
537
1166
|
/// in [PollWorkflowTaskQueueResponse]
|
|
538
|
-
|
|
539
|
-
history: HistoryUpdate,
|
|
540
|
-
namespace: String,
|
|
541
|
-
workflow_id: String,
|
|
542
|
-
workflow_type: String,
|
|
543
|
-
run_id: String,
|
|
544
|
-
metrics: MetricsContext,
|
|
545
|
-
) -> Self {
|
|
1167
|
+
fn new(basics: RunBasics) -> Self {
|
|
546
1168
|
let (wfb, cmd_sink) = WorkflowBridge::new();
|
|
547
|
-
let state_machines = WorkflowMachines::new(
|
|
548
|
-
namespace,
|
|
549
|
-
workflow_id,
|
|
550
|
-
workflow_type,
|
|
551
|
-
run_id,
|
|
552
|
-
history,
|
|
553
|
-
Box::new(wfb).into(),
|
|
554
|
-
metrics,
|
|
555
|
-
);
|
|
1169
|
+
let state_machines = WorkflowMachines::new(basics, Box::new(wfb).into());
|
|
556
1170
|
Self {
|
|
557
1171
|
machines: state_machines,
|
|
558
1172
|
command_sink: Some(cmd_sink),
|
|
@@ -560,7 +1174,7 @@ impl WorkflowManager {
|
|
|
560
1174
|
}
|
|
561
1175
|
|
|
562
1176
|
#[cfg(test)]
|
|
563
|
-
|
|
1177
|
+
const fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
|
|
564
1178
|
Self {
|
|
565
1179
|
machines: workflow_machines,
|
|
566
1180
|
command_sink: None,
|
|
@@ -571,12 +1185,15 @@ impl WorkflowManager {
|
|
|
571
1185
|
///
|
|
572
1186
|
/// Should only be called when a workflow has caught up on replay (or is just beginning). It
|
|
573
1187
|
/// will return a workflow activation if one is needed.
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
1188
|
+
fn feed_history_from_server(&mut self, update: HistoryUpdate) -> Result<WorkflowActivation> {
|
|
1189
|
+
self.machines.new_history_from_server(update)?;
|
|
1190
|
+
self.get_next_activation()
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
/// Update the machines with some events from fetching another page of history. Does *not*
|
|
1194
|
+
/// attempt to pull the next activation, unlike [Self::feed_history_from_server].
|
|
1195
|
+
fn feed_history_from_new_page(&mut self, update: HistoryUpdate) -> Result<()> {
|
|
1196
|
+
self.machines.new_history_from_server(update)
|
|
580
1197
|
}
|
|
581
1198
|
|
|
582
1199
|
/// Let this workflow know that something we've been waiting locally on has resolved, like a
|
|
@@ -593,27 +1210,33 @@ impl WorkflowManager {
|
|
|
593
1210
|
///
|
|
594
1211
|
/// Callers may also need to call [get_server_commands] after this to issue any pending commands
|
|
595
1212
|
/// to the server.
|
|
596
|
-
|
|
1213
|
+
fn get_next_activation(&mut self) -> Result<WorkflowActivation> {
|
|
597
1214
|
// First check if there are already some pending jobs, which can be a result of replay.
|
|
598
1215
|
let activation = self.machines.get_wf_activation();
|
|
599
1216
|
if !activation.jobs.is_empty() {
|
|
600
1217
|
return Ok(activation);
|
|
601
1218
|
}
|
|
602
1219
|
|
|
603
|
-
self.machines.apply_next_wft_from_history()
|
|
1220
|
+
self.machines.apply_next_wft_from_history()?;
|
|
604
1221
|
Ok(self.machines.get_wf_activation())
|
|
605
1222
|
}
|
|
606
1223
|
|
|
1224
|
+
/// Returns true if machines are ready to apply the next WFT sequence, false if events will need
|
|
1225
|
+
/// to be fetched in order to create a complete update with the entire next WFT sequence.
|
|
1226
|
+
pub(crate) fn ready_to_apply_next_wft(&self) -> bool {
|
|
1227
|
+
self.machines.ready_to_apply_next_wft()
|
|
1228
|
+
}
|
|
1229
|
+
|
|
607
1230
|
/// If there are no pending jobs for the workflow, apply the next workflow task and check
|
|
608
1231
|
/// again if there are any jobs. Importantly, does not *drain* jobs.
|
|
609
1232
|
///
|
|
610
1233
|
/// Returns true if there are jobs (before or after applying the next WFT).
|
|
611
|
-
|
|
1234
|
+
fn apply_next_task_if_ready(&mut self) -> Result<bool> {
|
|
612
1235
|
if self.machines.has_pending_jobs() {
|
|
613
1236
|
return Ok(true);
|
|
614
1237
|
}
|
|
615
1238
|
loop {
|
|
616
|
-
let consumed_events = self.machines.apply_next_wft_from_history()
|
|
1239
|
+
let consumed_events = self.machines.apply_next_wft_from_history()?;
|
|
617
1240
|
|
|
618
1241
|
if consumed_events == 0 || !self.machines.replaying || self.machines.has_pending_jobs()
|
|
619
1242
|
{
|
|
@@ -643,13 +1266,62 @@ impl WorkflowManager {
|
|
|
643
1266
|
|
|
644
1267
|
/// Feed the workflow machines new commands issued by the executing workflow code, and iterate
|
|
645
1268
|
/// the machines.
|
|
646
|
-
|
|
1269
|
+
fn push_commands_and_iterate(&mut self, cmds: Vec<WFCommand>) -> Result<()> {
|
|
647
1270
|
if let Some(cs) = self.command_sink.as_mut() {
|
|
648
1271
|
cs.send(cmds).map_err(|_| {
|
|
649
1272
|
WFMachinesError::Fatal("Internal error buffering workflow commands".to_string())
|
|
650
1273
|
})?;
|
|
651
1274
|
}
|
|
652
|
-
self.machines.iterate_machines()
|
|
1275
|
+
self.machines.iterate_machines()?;
|
|
653
1276
|
Ok(())
|
|
654
1277
|
}
|
|
655
1278
|
}
|
|
1279
|
+
|
|
1280
|
+
#[derive(Debug)]
|
|
1281
|
+
struct FulfillableActivationComplete {
|
|
1282
|
+
result: ActivationCompleteResult,
|
|
1283
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1284
|
+
}
|
|
1285
|
+
impl FulfillableActivationComplete {
|
|
1286
|
+
fn fulfill(self) {
|
|
1287
|
+
if let Some(resp_chan) = self.resp_chan {
|
|
1288
|
+
let _ = resp_chan.send(self.result);
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
#[derive(Debug)]
|
|
1294
|
+
struct RunActivationCompletion {
|
|
1295
|
+
task_token: TaskToken,
|
|
1296
|
+
start_time: Instant,
|
|
1297
|
+
commands: Vec<WFCommand>,
|
|
1298
|
+
activation_was_eviction: bool,
|
|
1299
|
+
activation_was_only_eviction: bool,
|
|
1300
|
+
has_pending_query: bool,
|
|
1301
|
+
query_responses: Vec<QueryResult>,
|
|
1302
|
+
used_flags: Vec<u32>,
|
|
1303
|
+
/// Used to notify the worker when the completion is done processing and the completion can
|
|
1304
|
+
/// unblock. Must always be `Some` when initialized.
|
|
1305
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1306
|
+
}
|
|
1307
|
+
#[derive(Debug, derive_more::From)]
|
|
1308
|
+
enum ActOrFulfill {
|
|
1309
|
+
OutgoingAct(Option<ActivationOrAuto>),
|
|
1310
|
+
FulfillableComplete(Option<FulfillableActivationComplete>),
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
#[derive(derive_more::DebugCustom)]
|
|
1314
|
+
#[debug(fmt = "RunUpdateErr({source:?})")]
|
|
1315
|
+
struct RunUpdateErr {
|
|
1316
|
+
source: WFMachinesError,
|
|
1317
|
+
complete_resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
impl From<WFMachinesError> for RunUpdateErr {
|
|
1321
|
+
fn from(e: WFMachinesError) -> Self {
|
|
1322
|
+
RunUpdateErr {
|
|
1323
|
+
source: e,
|
|
1324
|
+
complete_resp_chan: None,
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
}
|