@temporalio/core-bridge 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +255 -48
- package/package.json +4 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/pipeline.yml +1 -3
- package/sdk-core/.cargo/config.toml +5 -2
- package/sdk-core/.github/workflows/heavy.yml +28 -0
- package/sdk-core/Cargo.toml +1 -1
- package/sdk-core/README.md +9 -5
- package/sdk-core/client/src/lib.rs +211 -36
- package/sdk-core/client/src/raw.rs +1 -1
- package/sdk-core/client/src/retry.rs +32 -20
- package/sdk-core/core/Cargo.toml +23 -9
- package/sdk-core/core/src/abstractions.rs +11 -0
- package/sdk-core/core/src/core_tests/activity_tasks.rs +6 -5
- package/sdk-core/core/src/core_tests/local_activities.rs +263 -22
- package/sdk-core/core/src/core_tests/queries.rs +2 -2
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +249 -5
- package/sdk-core/core/src/ephemeral_server/mod.rs +5 -6
- package/sdk-core/core/src/lib.rs +2 -0
- package/sdk-core/core/src/protosext/mod.rs +1 -1
- package/sdk-core/core/src/telemetry/log_export.rs +1 -1
- package/sdk-core/core/src/telemetry/mod.rs +23 -8
- package/sdk-core/core/src/test_help/mod.rs +8 -1
- package/sdk-core/core/src/worker/activities/local_activities.rs +259 -125
- package/sdk-core/core/src/worker/activities.rs +3 -2
- package/sdk-core/core/src/worker/mod.rs +53 -26
- package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- package/sdk-core/core/src/worker/workflow/history_update.rs +835 -277
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +9 -17
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +73 -51
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +3 -3
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +4 -4
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -2
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +3 -5
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +6 -7
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +4 -4
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +89 -58
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +4 -7
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +21 -9
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1021 -360
- package/sdk-core/core/src/worker/workflow/mod.rs +306 -346
- package/sdk-core/core/src/worker/workflow/run_cache.rs +29 -53
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +115 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +444 -714
- package/sdk-core/core-api/Cargo.toml +2 -0
- package/sdk-core/core-api/src/errors.rs +1 -34
- package/sdk-core/core-api/src/lib.rs +6 -2
- package/sdk-core/core-api/src/worker.rs +14 -1
- package/sdk-core/etc/deps.svg +115 -140
- package/sdk-core/etc/regen-depgraph.sh +5 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +6 -6
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +7 -3
- package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- package/sdk-core/protos/api_upstream/Makefile +5 -5
- package/sdk-core/protos/api_upstream/build/go.mod +7 -0
- package/sdk-core/protos/api_upstream/build/go.sum +5 -0
- package/sdk-core/protos/api_upstream/build/tools.go +29 -0
- package/sdk-core/protos/api_upstream/go.mod +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +12 -19
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +3 -3
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +20 -2
- package/sdk-core/protos/api_upstream/temporal/api/{update/v1/message.proto → enums/v1/interaction_type.proto} +11 -18
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +2 -13
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -19
- package/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +87 -0
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +13 -8
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- package/sdk-core/sdk/Cargo.toml +4 -3
- package/sdk-core/sdk/src/lib.rs +87 -21
- package/sdk-core/sdk/src/workflow_future.rs +7 -12
- package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- package/sdk-core/sdk-core-protos/build.rs +36 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +26 -19
- package/sdk-core/sdk-core-protos/src/history_info.rs +4 -0
- package/sdk-core/sdk-core-protos/src/lib.rs +78 -34
- package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +50 -18
- package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- package/sdk-core/test-utils/src/workflows.rs +29 -0
- package/sdk-core/tests/fuzzy_workflow.rs +130 -0
- package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +114 -7
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -2
- package/sdk-core/tests/integ_tests/metrics_tests.rs +1 -1
- package/sdk-core/tests/integ_tests/polling_tests.rs +1 -39
- package/sdk-core/tests/integ_tests/queries_tests.rs +2 -127
- package/sdk-core/tests/integ_tests/visibility_tests.rs +52 -5
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +74 -1
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +5 -13
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +2 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +69 -197
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +4 -28
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +14 -14
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +3 -19
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +3 -19
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +1 -1
- package/sdk-core/tests/integ_tests/workflow_tests.rs +5 -6
- package/sdk-core/tests/main.rs +2 -12
- package/sdk-core/tests/runner.rs +71 -34
- package/sdk-core/tests/wf_input_replay.rs +32 -0
- package/sdk-core/bridge-ffi/Cargo.toml +0 -24
- package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- package/sdk-core/bridge-ffi/build.rs +0 -25
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
- package/sdk-core/bridge-ffi/src/lib.rs +0 -746
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- package/sdk-core/sdk/src/conversions.rs +0 -8
|
@@ -8,36 +8,43 @@ mod history_update;
|
|
|
8
8
|
mod machines;
|
|
9
9
|
mod managed_run;
|
|
10
10
|
mod run_cache;
|
|
11
|
+
mod wft_extraction;
|
|
11
12
|
pub(crate) mod wft_poller;
|
|
12
13
|
mod workflow_stream;
|
|
13
14
|
|
|
15
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
16
|
+
pub use workflow_stream::replay_wf_state_inputs;
|
|
17
|
+
|
|
14
18
|
pub(crate) use bridge::WorkflowBridge;
|
|
15
19
|
pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
|
|
16
|
-
pub(crate) use history_update::
|
|
17
|
-
pub(crate) use machines::WFMachinesError;
|
|
20
|
+
pub(crate) use history_update::HistoryUpdate;
|
|
18
21
|
#[cfg(test)]
|
|
19
22
|
pub(crate) use managed_run::ManagedWFFunc;
|
|
20
23
|
|
|
21
24
|
use crate::{
|
|
22
|
-
abstractions::OwnedMeteredSemPermit,
|
|
23
|
-
protosext::{legacy_query_failure, ValidPollWFTQResponse
|
|
24
|
-
telemetry::VecDisplayer,
|
|
25
|
+
abstractions::{stream_when_allowed, MeteredSemaphore, OwnedMeteredSemPermit},
|
|
26
|
+
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
27
|
+
telemetry::{metrics::workflow_worker_type, VecDisplayer},
|
|
25
28
|
worker::{
|
|
26
|
-
activities::{ActivitiesFromWFTsHandle, PermittedTqResp},
|
|
29
|
+
activities::{ActivitiesFromWFTsHandle, LocalActivityManager, PermittedTqResp},
|
|
27
30
|
client::{WorkerClient, WorkflowTaskCompletion},
|
|
28
31
|
workflow::{
|
|
29
|
-
|
|
32
|
+
history_update::HistoryPaginator,
|
|
33
|
+
managed_run::RunUpdateAct,
|
|
34
|
+
wft_extraction::{HistoryFetchReq, WFTExtractor},
|
|
30
35
|
wft_poller::validate_wft,
|
|
31
36
|
workflow_stream::{LocalInput, LocalInputs, WFStream},
|
|
32
37
|
},
|
|
33
|
-
LocalActRequest, LocalActivityResolution,
|
|
38
|
+
LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
34
39
|
},
|
|
35
40
|
MetricsContext,
|
|
36
41
|
};
|
|
37
42
|
use futures::{stream::BoxStream, Stream, StreamExt};
|
|
43
|
+
use futures_util::stream;
|
|
44
|
+
use prost_types::TimestampError;
|
|
38
45
|
use std::{
|
|
39
|
-
collections::
|
|
40
|
-
fmt::
|
|
46
|
+
collections::VecDeque,
|
|
47
|
+
fmt::Debug,
|
|
41
48
|
future::Future,
|
|
42
49
|
ops::DerefMut,
|
|
43
50
|
result,
|
|
@@ -59,8 +66,9 @@ use temporal_sdk_core_protos::{
|
|
|
59
66
|
},
|
|
60
67
|
temporal::api::{
|
|
61
68
|
command::v1::{command::Attributes, Command as ProtoCommand, Command},
|
|
62
|
-
common::v1::{Memo, RetryPolicy, SearchAttributes},
|
|
69
|
+
common::v1::{Memo, RetryPolicy, SearchAttributes, WorkflowExecution},
|
|
63
70
|
enums::v1::WorkflowTaskFailedCause,
|
|
71
|
+
query::v1::WorkflowQuery,
|
|
64
72
|
taskqueue::v1::StickyExecutionAttributes,
|
|
65
73
|
workflowservice::v1::PollActivityTaskQueueResponse,
|
|
66
74
|
},
|
|
@@ -68,7 +76,7 @@ use temporal_sdk_core_protos::{
|
|
|
68
76
|
};
|
|
69
77
|
use tokio::{
|
|
70
78
|
sync::{
|
|
71
|
-
mpsc::{unbounded_channel, UnboundedSender},
|
|
79
|
+
mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
|
|
72
80
|
oneshot,
|
|
73
81
|
},
|
|
74
82
|
task,
|
|
@@ -79,6 +87,9 @@ use tokio_util::sync::CancellationToken;
|
|
|
79
87
|
use tracing::Span;
|
|
80
88
|
|
|
81
89
|
pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
|
|
90
|
+
/// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
|
|
91
|
+
/// necessary.
|
|
92
|
+
const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
|
|
82
93
|
const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
|
|
83
94
|
|
|
84
95
|
type Result<T, E = WFMachinesError> = result::Result<T, E>;
|
|
@@ -100,9 +111,11 @@ pub(crate) struct Workflows {
|
|
|
100
111
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
101
112
|
/// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
|
|
102
113
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
114
|
+
/// Ensures we stay at or below this worker's maximum concurrent workflow task limit
|
|
115
|
+
wft_semaphore: MeteredSemaphore,
|
|
103
116
|
}
|
|
104
117
|
|
|
105
|
-
pub(
|
|
118
|
+
pub(crate) struct WorkflowBasics {
|
|
106
119
|
pub max_cached_workflows: usize,
|
|
107
120
|
pub max_outstanding_wfts: usize,
|
|
108
121
|
pub shutdown_token: CancellationToken,
|
|
@@ -110,6 +123,9 @@ pub(super) struct WorkflowBasics {
|
|
|
110
123
|
pub namespace: String,
|
|
111
124
|
pub task_queue: String,
|
|
112
125
|
pub ignore_evicts_on_shutdown: bool,
|
|
126
|
+
pub fetching_concurrency: usize,
|
|
127
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
128
|
+
pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
|
|
113
129
|
}
|
|
114
130
|
|
|
115
131
|
impl Workflows {
|
|
@@ -118,20 +134,38 @@ impl Workflows {
|
|
|
118
134
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
119
135
|
client: Arc<dyn WorkerClient>,
|
|
120
136
|
wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
|
|
121
|
-
local_activity_request_sink: impl
|
|
122
|
-
|
|
123
|
-
+ Sync
|
|
124
|
-
+ 'static,
|
|
137
|
+
local_activity_request_sink: impl LocalActivityRequestSink,
|
|
138
|
+
heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
|
|
125
139
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
126
140
|
) -> Self {
|
|
127
141
|
let (local_tx, local_rx) = unbounded_channel();
|
|
142
|
+
let (fetch_tx, fetch_rx) = unbounded_channel();
|
|
128
143
|
let shutdown_tok = basics.shutdown_token.clone();
|
|
129
144
|
let task_queue = basics.task_queue.clone();
|
|
130
|
-
let
|
|
131
|
-
basics,
|
|
145
|
+
let wft_semaphore = MeteredSemaphore::new(
|
|
146
|
+
basics.max_outstanding_wfts,
|
|
147
|
+
basics.metrics.with_new_attrs([workflow_worker_type()]),
|
|
148
|
+
MetricsContext::available_task_slots,
|
|
149
|
+
);
|
|
150
|
+
// Only allow polling of the new WFT stream if there are available task slots
|
|
151
|
+
let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
|
|
152
|
+
Some((sem.acquire_owned().await.unwrap(), sem))
|
|
153
|
+
});
|
|
154
|
+
let wft_stream = stream_when_allowed(wft_stream, proceeder);
|
|
155
|
+
let extracted_wft_stream = WFTExtractor::build(
|
|
156
|
+
client.clone(),
|
|
157
|
+
basics.fetching_concurrency,
|
|
132
158
|
wft_stream,
|
|
159
|
+
UnboundedReceiverStream::new(fetch_rx),
|
|
160
|
+
);
|
|
161
|
+
let locals_stream = stream::select(
|
|
133
162
|
UnboundedReceiverStream::new(local_rx),
|
|
134
|
-
|
|
163
|
+
UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
|
|
164
|
+
);
|
|
165
|
+
let mut stream = WFStream::build(
|
|
166
|
+
basics,
|
|
167
|
+
extracted_wft_stream,
|
|
168
|
+
locals_stream,
|
|
135
169
|
local_activity_request_sink,
|
|
136
170
|
);
|
|
137
171
|
let (activation_tx, activation_rx) = unbounded_channel();
|
|
@@ -152,10 +186,24 @@ impl Workflows {
|
|
|
152
186
|
if !do_poll {
|
|
153
187
|
return;
|
|
154
188
|
}
|
|
155
|
-
while let Some(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
189
|
+
while let Some(output) = stream.next().await {
|
|
190
|
+
match output {
|
|
191
|
+
Ok(o) => {
|
|
192
|
+
for fetchreq in o.fetch_histories {
|
|
193
|
+
fetch_tx
|
|
194
|
+
.send(fetchreq)
|
|
195
|
+
.expect("Fetch channel must not be dropped");
|
|
196
|
+
}
|
|
197
|
+
for act in o.activations {
|
|
198
|
+
activation_tx
|
|
199
|
+
.send(Ok(act))
|
|
200
|
+
.expect("Activation processor channel not dropped");
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
Err(e) => activation_tx
|
|
204
|
+
.send(Err(e))
|
|
205
|
+
.expect("Activation processor channel not dropped"),
|
|
206
|
+
}
|
|
159
207
|
}
|
|
160
208
|
});
|
|
161
209
|
Self {
|
|
@@ -169,12 +217,13 @@ impl Workflows {
|
|
|
169
217
|
client,
|
|
170
218
|
sticky_attrs,
|
|
171
219
|
activity_tasks_handle,
|
|
220
|
+
wft_semaphore,
|
|
172
221
|
}
|
|
173
222
|
}
|
|
174
223
|
|
|
175
224
|
pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
176
225
|
loop {
|
|
177
|
-
let
|
|
226
|
+
let al = {
|
|
178
227
|
let mut lock = self.activation_stream.lock().await;
|
|
179
228
|
let (ref mut stream, ref mut beginner) = lock.deref_mut();
|
|
180
229
|
if let Some(beginner) = beginner.take() {
|
|
@@ -182,8 +231,8 @@ impl Workflows {
|
|
|
182
231
|
}
|
|
183
232
|
stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
|
|
184
233
|
};
|
|
185
|
-
Span::current().record("run_id",
|
|
186
|
-
match
|
|
234
|
+
Span::current().record("run_id", al.run_id());
|
|
235
|
+
match al {
|
|
187
236
|
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
188
237
|
debug!(activation=%act, "Sending activation to lang");
|
|
189
238
|
break Ok(act);
|
|
@@ -202,7 +251,7 @@ impl Workflows {
|
|
|
202
251
|
/// Queue an activation completion for processing, returning a future that will resolve with
|
|
203
252
|
/// the outcome of that completion. See [ActivationCompletedOutcome].
|
|
204
253
|
///
|
|
205
|
-
/// Returns the most-recently-processed event number for the run
|
|
254
|
+
/// Returns the most-recently-processed event number for the run.
|
|
206
255
|
pub async fn activation_completed(
|
|
207
256
|
&self,
|
|
208
257
|
completion: WorkflowActivationCompletion,
|
|
@@ -213,7 +262,7 @@ impl Workflows {
|
|
|
213
262
|
let (tx, rx) = oneshot::channel();
|
|
214
263
|
let was_sent = self.send_local(WFActCompleteMsg {
|
|
215
264
|
completion,
|
|
216
|
-
response_tx: tx,
|
|
265
|
+
response_tx: Some(tx),
|
|
217
266
|
});
|
|
218
267
|
if !was_sent {
|
|
219
268
|
if is_empty_completion {
|
|
@@ -230,7 +279,7 @@ impl Workflows {
|
|
|
230
279
|
.await
|
|
231
280
|
.expect("Send half of activation complete response not dropped");
|
|
232
281
|
let mut wft_from_complete = None;
|
|
233
|
-
let
|
|
282
|
+
let wft_report_status = match completion_outcome.outcome {
|
|
234
283
|
ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
|
|
235
284
|
ServerCommandsWithWorkflowInfo {
|
|
236
285
|
task_token,
|
|
@@ -273,14 +322,14 @@ impl Workflows {
|
|
|
273
322
|
Ok(())
|
|
274
323
|
})
|
|
275
324
|
.await;
|
|
276
|
-
|
|
325
|
+
WFTReportStatus::Reported
|
|
277
326
|
}
|
|
278
327
|
ServerCommandsWithWorkflowInfo {
|
|
279
328
|
task_token,
|
|
280
329
|
action: ActivationAction::RespondLegacyQuery { result },
|
|
281
330
|
} => {
|
|
282
331
|
self.respond_legacy_query(task_token, *result).await;
|
|
283
|
-
|
|
332
|
+
WFTReportStatus::Reported
|
|
284
333
|
}
|
|
285
334
|
},
|
|
286
335
|
ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
|
|
@@ -292,22 +341,39 @@ impl Workflows {
|
|
|
292
341
|
.await
|
|
293
342
|
})
|
|
294
343
|
.await;
|
|
295
|
-
|
|
344
|
+
WFTReportStatus::Reported
|
|
296
345
|
}
|
|
297
346
|
FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
|
|
298
347
|
warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
|
|
299
348
|
self.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
300
349
|
.await;
|
|
301
|
-
|
|
350
|
+
WFTReportStatus::Reported
|
|
302
351
|
}
|
|
303
352
|
},
|
|
304
|
-
ActivationCompleteOutcome::
|
|
353
|
+
ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
|
|
354
|
+
ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
|
|
355
|
+
};
|
|
356
|
+
|
|
357
|
+
let maybe_pwft = if let Some(wft) = wft_from_complete {
|
|
358
|
+
match HistoryPaginator::from_poll(wft, self.client.clone()).await {
|
|
359
|
+
Ok((paginator, pwft)) => Some((pwft, paginator)),
|
|
360
|
+
Err(e) => {
|
|
361
|
+
self.request_eviction(
|
|
362
|
+
&run_id,
|
|
363
|
+
format!("Failed to paginate workflow task from completion: {e:?}"),
|
|
364
|
+
EvictionReason::Fatal,
|
|
365
|
+
);
|
|
366
|
+
None
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
} else {
|
|
370
|
+
None
|
|
305
371
|
};
|
|
306
372
|
|
|
307
373
|
self.post_activation(PostActivationMsg {
|
|
308
374
|
run_id,
|
|
309
|
-
|
|
310
|
-
wft_from_complete,
|
|
375
|
+
wft_report_status,
|
|
376
|
+
wft_from_complete: maybe_pwft,
|
|
311
377
|
});
|
|
312
378
|
|
|
313
379
|
Ok(completion_outcome.most_recently_processed_event)
|
|
@@ -342,12 +408,16 @@ impl Workflows {
|
|
|
342
408
|
async move { rx.await.ok() }
|
|
343
409
|
}
|
|
344
410
|
|
|
411
|
+
pub fn available_wft_permits(&self) -> usize {
|
|
412
|
+
self.wft_semaphore.available_permits()
|
|
413
|
+
}
|
|
414
|
+
|
|
345
415
|
pub async fn shutdown(&self) -> Result<(), JoinError> {
|
|
346
416
|
let maybe_jh = self.processing_task.lock().await.take();
|
|
347
417
|
if let Some(jh) = maybe_jh {
|
|
348
418
|
// This acts as a final wake up in case the stream is still alive and wouldn't otherwise
|
|
349
419
|
// receive another message. It allows it to shut itself down.
|
|
350
|
-
let _ = self.get_state_info();
|
|
420
|
+
let _ = self.get_state_info().await;
|
|
351
421
|
jh.await
|
|
352
422
|
} else {
|
|
353
423
|
Ok(())
|
|
@@ -393,7 +463,11 @@ impl Workflows {
|
|
|
393
463
|
/// successfully.
|
|
394
464
|
fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
|
|
395
465
|
let msg = msg.into();
|
|
396
|
-
let print_err =
|
|
466
|
+
let print_err = match &msg {
|
|
467
|
+
LocalInputs::GetStateInfo(_) => false,
|
|
468
|
+
LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
|
|
469
|
+
_ => true,
|
|
470
|
+
};
|
|
397
471
|
if let Err(e) = self.local_tx.send(LocalInput {
|
|
398
472
|
input: msg,
|
|
399
473
|
span: Span::current(),
|
|
@@ -509,186 +583,30 @@ impl Workflows {
|
|
|
509
583
|
}
|
|
510
584
|
}
|
|
511
585
|
|
|
512
|
-
///
|
|
513
|
-
///
|
|
514
|
-
#[derive(derive_more::
|
|
515
|
-
#[
|
|
516
|
-
fmt = "
|
|
517
|
-
|
|
518
|
-
trying_to_evict: {}, last_action_acked: {} }}",
|
|
519
|
-
wft,
|
|
520
|
-
activation,
|
|
521
|
-
buffered_resp,
|
|
522
|
-
have_seen_terminal_event,
|
|
523
|
-
most_recently_processed_event_number,
|
|
524
|
-
more_pending_work,
|
|
525
|
-
"trying_to_evict.is_some()",
|
|
526
|
-
last_action_acked
|
|
586
|
+
/// Returned when a cache miss happens and we need to fetch history from the beginning to
|
|
587
|
+
/// replay a run
|
|
588
|
+
#[derive(Debug, derive_more::Display)]
|
|
589
|
+
#[display(
|
|
590
|
+
fmt = "CacheMissFetchReq(run_id: {})",
|
|
591
|
+
"original_wft.work.execution.run_id"
|
|
527
592
|
)]
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
have_seen_terminal_event: bool,
|
|
540
|
-
/// The most recently processed event id this machine has seen. 0 means it has seen nothing.
|
|
541
|
-
most_recently_processed_event_number: usize,
|
|
542
|
-
/// Is set true when the machines indicate that there is additional known work to be processed
|
|
543
|
-
more_pending_work: bool,
|
|
544
|
-
/// Is set if an eviction has been requested for this run
|
|
545
|
-
trying_to_evict: Option<RequestEvictMsg>,
|
|
546
|
-
/// Set to true if the last action we tried to take to this run has been processed (ie: the
|
|
547
|
-
/// [RunUpdateResponse] for it has been seen.
|
|
548
|
-
last_action_acked: bool,
|
|
549
|
-
/// For sending work to the machines
|
|
550
|
-
run_actions_tx: UnboundedSender<RunAction>,
|
|
551
|
-
/// Handle to the task where the actual machines live
|
|
552
|
-
handle: JoinHandle<()>,
|
|
553
|
-
|
|
554
|
-
/// We track if we have recorded useful debugging values onto a certain span yet, to overcome
|
|
555
|
-
/// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
|
|
556
|
-
/// is fixed.
|
|
557
|
-
recorded_span_ids: HashSet<tracing::Id>,
|
|
558
|
-
metrics: MetricsContext,
|
|
593
|
+
#[must_use]
|
|
594
|
+
struct CacheMissFetchReq {
|
|
595
|
+
original_wft: PermittedWFT,
|
|
596
|
+
}
|
|
597
|
+
/// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
|
|
598
|
+
/// isn't in memory
|
|
599
|
+
#[derive(Debug)]
|
|
600
|
+
#[must_use]
|
|
601
|
+
struct NextPageReq {
|
|
602
|
+
paginator: HistoryPaginator,
|
|
603
|
+
span: Span,
|
|
559
604
|
}
|
|
560
|
-
impl ManagedRunHandle {
|
|
561
|
-
fn new(
|
|
562
|
-
wfm: WorkflowManager,
|
|
563
|
-
activations_tx: UnboundedSender<RunUpdateResponse>,
|
|
564
|
-
local_activity_request_sink: LocalActivityRequestSink,
|
|
565
|
-
metrics: MetricsContext,
|
|
566
|
-
) -> Self {
|
|
567
|
-
let (run_actions_tx, run_actions_rx) = unbounded_channel();
|
|
568
|
-
let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
|
|
569
|
-
let handle = tokio::task::spawn(managed.run(run_actions_rx));
|
|
570
|
-
Self {
|
|
571
|
-
wft: None,
|
|
572
|
-
activation: None,
|
|
573
|
-
buffered_resp: None,
|
|
574
|
-
have_seen_terminal_event: false,
|
|
575
|
-
most_recently_processed_event_number: 0,
|
|
576
|
-
more_pending_work: false,
|
|
577
|
-
trying_to_evict: None,
|
|
578
|
-
last_action_acked: true,
|
|
579
|
-
run_actions_tx,
|
|
580
|
-
handle,
|
|
581
|
-
recorded_span_ids: Default::default(),
|
|
582
|
-
metrics,
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
fn incoming_wft(&mut self, wft: NewIncomingWFT) {
|
|
587
|
-
if self.wft.is_some() {
|
|
588
|
-
error!("Trying to send a new WFT for a run which already has one!");
|
|
589
|
-
}
|
|
590
|
-
self.send_run_action(RunActions::NewIncomingWFT(wft));
|
|
591
|
-
}
|
|
592
|
-
fn check_more_activations(&mut self) {
|
|
593
|
-
// No point in checking for more activations if we have not acked the last update, or
|
|
594
|
-
// if there's already an outstanding activation.
|
|
595
|
-
if self.last_action_acked && self.activation.is_none() {
|
|
596
|
-
self.send_run_action(RunActions::CheckMoreWork {
|
|
597
|
-
want_to_evict: self.trying_to_evict.clone(),
|
|
598
|
-
has_pending_queries: self
|
|
599
|
-
.wft
|
|
600
|
-
.as_ref()
|
|
601
|
-
.map(|wft| !wft.pending_queries.is_empty())
|
|
602
|
-
.unwrap_or_default(),
|
|
603
|
-
has_wft: self.wft.is_some(),
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
fn send_completion(&mut self, c: RunActivationCompletion) {
|
|
608
|
-
self.send_run_action(RunActions::ActivationCompletion(c));
|
|
609
|
-
}
|
|
610
|
-
fn send_local_resolution(&mut self, r: LocalResolution) {
|
|
611
|
-
self.send_run_action(RunActions::LocalResolution(r));
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
|
|
615
|
-
let act_type = match &act {
|
|
616
|
-
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
617
|
-
if act.is_legacy_query() {
|
|
618
|
-
OutstandingActivation::LegacyQuery
|
|
619
|
-
} else {
|
|
620
|
-
OutstandingActivation::Normal {
|
|
621
|
-
contains_eviction: act.eviction_index().is_some(),
|
|
622
|
-
num_jobs: act.jobs.len(),
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
|
|
627
|
-
};
|
|
628
|
-
if let Some(old_act) = self.activation {
|
|
629
|
-
// This is a panic because we have screwed up core logic if this is violated. It must be
|
|
630
|
-
// upheld.
|
|
631
|
-
panic!(
|
|
632
|
-
"Attempted to insert a new outstanding activation {:?}, but there already was \
|
|
633
|
-
one outstanding: {:?}",
|
|
634
|
-
act, old_act
|
|
635
|
-
);
|
|
636
|
-
}
|
|
637
|
-
self.activation = Some(act_type);
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
fn send_run_action(&mut self, action: RunActions) {
|
|
641
|
-
self.last_action_acked = false;
|
|
642
|
-
self.run_actions_tx
|
|
643
|
-
.send(RunAction {
|
|
644
|
-
action,
|
|
645
|
-
trace_span: Span::current(),
|
|
646
|
-
})
|
|
647
|
-
.expect("Receive half of run actions not dropped");
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
/// Returns true if the managed run has any form of pending work
|
|
651
|
-
/// If `ignore_evicts` is true, pending evictions do not count as pending work.
|
|
652
|
-
/// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
|
|
653
|
-
fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
|
|
654
|
-
let evict_work = if ignore_evicts {
|
|
655
|
-
false
|
|
656
|
-
} else {
|
|
657
|
-
self.trying_to_evict.is_some()
|
|
658
|
-
};
|
|
659
|
-
let act_work = if ignore_evicts {
|
|
660
|
-
if let Some(ref act) = self.activation {
|
|
661
|
-
!act.has_only_eviction()
|
|
662
|
-
} else {
|
|
663
|
-
false
|
|
664
|
-
}
|
|
665
|
-
} else {
|
|
666
|
-
self.activation.is_some()
|
|
667
|
-
};
|
|
668
|
-
let buffered = if ignore_buffered {
|
|
669
|
-
false
|
|
670
|
-
} else {
|
|
671
|
-
self.buffered_resp.is_some()
|
|
672
|
-
};
|
|
673
|
-
self.wft.is_some()
|
|
674
|
-
|| buffered
|
|
675
|
-
|| !self.last_action_acked
|
|
676
|
-
|| self.more_pending_work
|
|
677
|
-
|| act_work
|
|
678
|
-
|| evict_work
|
|
679
|
-
}
|
|
680
605
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
|
|
686
|
-
|| self
|
|
687
|
-
.wft
|
|
688
|
-
.as_ref()
|
|
689
|
-
.map(|t| t.has_pending_legacy_query())
|
|
690
|
-
.unwrap_or_default()
|
|
691
|
-
}
|
|
606
|
+
#[derive(Debug)]
|
|
607
|
+
struct WFStreamOutput {
|
|
608
|
+
activations: VecDeque<ActivationOrAuto>,
|
|
609
|
+
fetch_histories: VecDeque<HistoryFetchReq>,
|
|
692
610
|
}
|
|
693
611
|
|
|
694
612
|
#[derive(Debug, derive_more::Display)]
|
|
@@ -697,6 +615,7 @@ enum ActivationOrAuto {
|
|
|
697
615
|
/// This type should only be filled with an empty activation which is ready to have queries
|
|
698
616
|
/// inserted into the joblist
|
|
699
617
|
ReadyForQueries(WorkflowActivation),
|
|
618
|
+
#[display(fmt = "Autocomplete(run_id={run_id})")]
|
|
700
619
|
Autocomplete {
|
|
701
620
|
run_id: String,
|
|
702
621
|
},
|
|
@@ -711,11 +630,48 @@ impl ActivationOrAuto {
|
|
|
711
630
|
}
|
|
712
631
|
}
|
|
713
632
|
|
|
633
|
+
/// A processed WFT which has been validated and had a history update extracted from it
|
|
714
634
|
#[derive(derive_more::DebugCustom)]
|
|
715
|
-
#[
|
|
635
|
+
#[cfg_attr(
|
|
636
|
+
feature = "save_wf_inputs",
|
|
637
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
638
|
+
)]
|
|
639
|
+
#[debug(fmt = "PermittedWft({work:?})")]
|
|
716
640
|
pub(crate) struct PermittedWFT {
|
|
717
|
-
|
|
641
|
+
work: PreparedWFT,
|
|
642
|
+
#[cfg_attr(
|
|
643
|
+
feature = "save_wf_inputs",
|
|
644
|
+
serde(skip, default = "OwnedMeteredSemPermit::fake_deserialized")
|
|
645
|
+
)]
|
|
718
646
|
permit: OwnedMeteredSemPermit,
|
|
647
|
+
#[cfg_attr(
|
|
648
|
+
feature = "save_wf_inputs",
|
|
649
|
+
serde(skip, default = "HistoryPaginator::fake_deserialized")
|
|
650
|
+
)]
|
|
651
|
+
paginator: HistoryPaginator,
|
|
652
|
+
}
|
|
653
|
+
#[derive(Debug)]
|
|
654
|
+
#[cfg_attr(
|
|
655
|
+
feature = "save_wf_inputs",
|
|
656
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
657
|
+
)]
|
|
658
|
+
struct PreparedWFT {
|
|
659
|
+
task_token: TaskToken,
|
|
660
|
+
attempt: u32,
|
|
661
|
+
execution: WorkflowExecution,
|
|
662
|
+
workflow_type: String,
|
|
663
|
+
legacy_query: Option<WorkflowQuery>,
|
|
664
|
+
query_requests: Vec<QueryWorkflow>,
|
|
665
|
+
update: HistoryUpdate,
|
|
666
|
+
}
|
|
667
|
+
impl PreparedWFT {
|
|
668
|
+
/// Returns true if the contained history update is incremental (IE: expects to hit a cached
|
|
669
|
+
/// workflow)
|
|
670
|
+
pub fn is_incremental(&self) -> bool {
|
|
671
|
+
let start_event_id = self.update.first_event_id();
|
|
672
|
+
let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
|
|
673
|
+
poll_resp_is_incremental || start_event_id.is_none()
|
|
674
|
+
}
|
|
719
675
|
}
|
|
720
676
|
|
|
721
677
|
#[derive(Debug)]
|
|
@@ -811,44 +767,74 @@ pub(crate) enum ActivationAction {
|
|
|
811
767
|
RespondLegacyQuery { result: Box<QueryResult> },
|
|
812
768
|
}
|
|
813
769
|
|
|
814
|
-
#[derive(Debug
|
|
815
|
-
|
|
816
|
-
EvictionRequested(Option<u32
|
|
770
|
+
#[derive(Debug)]
|
|
771
|
+
enum EvictionRequestResult {
|
|
772
|
+
EvictionRequested(Option<u32>, RunUpdateAct),
|
|
817
773
|
NotFound,
|
|
818
774
|
EvictionAlreadyRequested(Option<u32>),
|
|
819
775
|
}
|
|
776
|
+
impl EvictionRequestResult {
|
|
777
|
+
fn into_run_update_resp(self) -> RunUpdateAct {
|
|
778
|
+
match self {
|
|
779
|
+
EvictionRequestResult::EvictionRequested(_, resp) => resp,
|
|
780
|
+
EvictionRequestResult::NotFound
|
|
781
|
+
| EvictionRequestResult::EvictionAlreadyRequested(_) => None,
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
820
785
|
|
|
821
786
|
#[derive(Debug)]
|
|
822
787
|
#[allow(dead_code)] // Not always used in non-test
|
|
823
788
|
pub(crate) struct WorkflowStateInfo {
|
|
824
789
|
pub cached_workflows: usize,
|
|
825
790
|
pub outstanding_wft: usize,
|
|
826
|
-
pub available_wft_permits: usize,
|
|
827
791
|
}
|
|
828
792
|
|
|
829
793
|
#[derive(Debug)]
|
|
794
|
+
#[cfg_attr(
|
|
795
|
+
feature = "save_wf_inputs",
|
|
796
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
797
|
+
)]
|
|
830
798
|
struct WFActCompleteMsg {
|
|
831
799
|
completion: ValidatedCompletion,
|
|
832
|
-
|
|
800
|
+
#[cfg_attr(feature = "save_wf_inputs", serde(skip))]
|
|
801
|
+
response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
833
802
|
}
|
|
834
803
|
#[derive(Debug)]
|
|
804
|
+
#[cfg_attr(
|
|
805
|
+
feature = "save_wf_inputs",
|
|
806
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
807
|
+
)]
|
|
835
808
|
struct LocalResolutionMsg {
|
|
836
809
|
run_id: String,
|
|
837
810
|
res: LocalResolution,
|
|
838
811
|
}
|
|
839
812
|
#[derive(Debug)]
|
|
813
|
+
#[cfg_attr(
|
|
814
|
+
feature = "save_wf_inputs",
|
|
815
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
816
|
+
)]
|
|
840
817
|
struct PostActivationMsg {
|
|
841
818
|
run_id: String,
|
|
842
|
-
|
|
843
|
-
wft_from_complete: Option<
|
|
819
|
+
wft_report_status: WFTReportStatus,
|
|
820
|
+
wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
|
|
844
821
|
}
|
|
845
822
|
#[derive(Debug, Clone)]
|
|
823
|
+
#[cfg_attr(
|
|
824
|
+
feature = "save_wf_inputs",
|
|
825
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
826
|
+
)]
|
|
846
827
|
struct RequestEvictMsg {
|
|
847
828
|
run_id: String,
|
|
848
829
|
message: String,
|
|
849
830
|
reason: EvictionReason,
|
|
850
831
|
}
|
|
851
832
|
#[derive(Debug)]
|
|
833
|
+
pub(crate) struct HeartbeatTimeoutMsg {
|
|
834
|
+
pub(crate) run_id: String,
|
|
835
|
+
pub(crate) span: Span,
|
|
836
|
+
}
|
|
837
|
+
#[derive(Debug)]
|
|
852
838
|
struct GetStateInfoMsg {
|
|
853
839
|
response_tx: oneshot::Sender<WorkflowStateInfo>,
|
|
854
840
|
}
|
|
@@ -869,16 +855,24 @@ enum ActivationCompleteOutcome {
|
|
|
869
855
|
ReportWFTFail(FailedActivationWFTReport),
|
|
870
856
|
/// There's nothing to do right now. EX: The workflow needs to keep replaying.
|
|
871
857
|
DoNothing,
|
|
858
|
+
/// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
|
|
859
|
+
/// in a row.
|
|
860
|
+
WFTFailedDontReport,
|
|
872
861
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
862
|
+
/// Did we report, or not, completion of a WFT to server?
|
|
863
|
+
#[derive(Debug, Copy, Clone)]
|
|
864
|
+
#[cfg_attr(
|
|
865
|
+
feature = "save_wf_inputs",
|
|
866
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
867
|
+
)]
|
|
868
|
+
enum WFTReportStatus {
|
|
869
|
+
Reported,
|
|
870
|
+
/// The WFT completion was not reported when finishing the activation, because there's still
|
|
871
|
+
/// work to be done. EX: Running LAs.
|
|
872
|
+
NotReported,
|
|
873
|
+
/// We didn't report, but we want to clear the outstanding workflow task anyway. See
|
|
874
|
+
/// [ActivationCompleteOutcome::WFTFailedDontReport]
|
|
875
|
+
DropWft,
|
|
882
876
|
}
|
|
883
877
|
|
|
884
878
|
fn validate_completion(
|
|
@@ -908,8 +902,7 @@ fn validate_completion(
|
|
|
908
902
|
reason: format!(
|
|
909
903
|
"Workflow completion had a legacy query response along with other \
|
|
910
904
|
commands. This is not allowed and constitutes an error in the \
|
|
911
|
-
lang SDK. Commands: {:?}"
|
|
912
|
-
commands
|
|
905
|
+
lang SDK. Commands: {commands:?}"
|
|
913
906
|
),
|
|
914
907
|
run_id: completion.run_id,
|
|
915
908
|
});
|
|
@@ -934,6 +927,10 @@ fn validate_completion(
|
|
|
934
927
|
}
|
|
935
928
|
|
|
936
929
|
#[derive(Debug)]
|
|
930
|
+
#[cfg_attr(
|
|
931
|
+
feature = "save_wf_inputs",
|
|
932
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
933
|
+
)]
|
|
937
934
|
#[allow(clippy::large_enum_variant)]
|
|
938
935
|
enum ValidatedCompletion {
|
|
939
936
|
Success {
|
|
@@ -955,112 +952,6 @@ impl ValidatedCompletion {
|
|
|
955
952
|
}
|
|
956
953
|
}
|
|
957
954
|
|
|
958
|
-
/// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
|
|
959
|
-
#[derive(Debug)]
|
|
960
|
-
struct RunAction {
|
|
961
|
-
action: RunActions,
|
|
962
|
-
trace_span: Span,
|
|
963
|
-
}
|
|
964
|
-
#[derive(Debug)]
|
|
965
|
-
#[allow(clippy::large_enum_variant)]
|
|
966
|
-
enum RunActions {
|
|
967
|
-
NewIncomingWFT(NewIncomingWFT),
|
|
968
|
-
ActivationCompletion(RunActivationCompletion),
|
|
969
|
-
CheckMoreWork {
|
|
970
|
-
want_to_evict: Option<RequestEvictMsg>,
|
|
971
|
-
has_pending_queries: bool,
|
|
972
|
-
has_wft: bool,
|
|
973
|
-
},
|
|
974
|
-
LocalResolution(LocalResolution),
|
|
975
|
-
HeartbeatTimeout,
|
|
976
|
-
}
|
|
977
|
-
#[derive(Debug)]
|
|
978
|
-
struct NewIncomingWFT {
|
|
979
|
-
/// This field is only populated if the machines already exist. Otherwise the machines
|
|
980
|
-
/// are instantiated with the workflow history.
|
|
981
|
-
history_update: Option<HistoryUpdate>,
|
|
982
|
-
/// Wft start time
|
|
983
|
-
start_time: Instant,
|
|
984
|
-
}
|
|
985
|
-
#[derive(Debug)]
|
|
986
|
-
struct RunActivationCompletion {
|
|
987
|
-
task_token: TaskToken,
|
|
988
|
-
start_time: Instant,
|
|
989
|
-
commands: Vec<WFCommand>,
|
|
990
|
-
activation_was_eviction: bool,
|
|
991
|
-
activation_was_only_eviction: bool,
|
|
992
|
-
has_pending_query: bool,
|
|
993
|
-
query_responses: Vec<QueryResult>,
|
|
994
|
-
/// Used to notify the worker when the completion is done processing and the completion can
|
|
995
|
-
/// unblock. Must always be `Some` when initialized.
|
|
996
|
-
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
/// A response from a [ManagedRun] held by a [ManagedRunHandle]
|
|
1000
|
-
#[derive(Debug)]
|
|
1001
|
-
struct RunUpdateResponse {
|
|
1002
|
-
kind: RunUpdateResponseKind,
|
|
1003
|
-
span: Span,
|
|
1004
|
-
}
|
|
1005
|
-
#[derive(Debug, derive_more::Display)]
|
|
1006
|
-
#[allow(clippy::large_enum_variant)]
|
|
1007
|
-
enum RunUpdateResponseKind {
|
|
1008
|
-
Good(GoodRunUpdate),
|
|
1009
|
-
Fail(FailRunUpdate),
|
|
1010
|
-
}
|
|
1011
|
-
impl RunUpdateResponseKind {
|
|
1012
|
-
pub(crate) fn run_id(&self) -> &str {
|
|
1013
|
-
match self {
|
|
1014
|
-
RunUpdateResponseKind::Good(g) => &g.run_id,
|
|
1015
|
-
RunUpdateResponseKind::Fail(f) => &f.run_id,
|
|
1016
|
-
}
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
#[derive(Debug)]
|
|
1021
|
-
struct GoodRunUpdate {
|
|
1022
|
-
run_id: String,
|
|
1023
|
-
outgoing_activation: Option<ActivationOrAuto>,
|
|
1024
|
-
fulfillable_complete: Option<FulfillableActivationComplete>,
|
|
1025
|
-
have_seen_terminal_event: bool,
|
|
1026
|
-
/// Is true if there are more jobs that need to be sent to lang
|
|
1027
|
-
more_pending_work: bool,
|
|
1028
|
-
most_recently_processed_event_number: usize,
|
|
1029
|
-
/// Is true if this update was in response to a new WFT
|
|
1030
|
-
in_response_to_wft: bool,
|
|
1031
|
-
}
|
|
1032
|
-
impl Display for GoodRunUpdate {
|
|
1033
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1034
|
-
write!(
|
|
1035
|
-
f,
|
|
1036
|
-
"GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
|
|
1037
|
-
self.run_id,
|
|
1038
|
-
if let Some(og) = self.outgoing_activation.as_ref() {
|
|
1039
|
-
format!("{}", og)
|
|
1040
|
-
} else {
|
|
1041
|
-
"None".to_string()
|
|
1042
|
-
},
|
|
1043
|
-
self.more_pending_work
|
|
1044
|
-
)
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
#[derive(Debug)]
|
|
1048
|
-
pub(crate) struct FailRunUpdate {
|
|
1049
|
-
run_id: String,
|
|
1050
|
-
err: WFMachinesError,
|
|
1051
|
-
/// This is populated if the run update failed while processing a completion - and thus we
|
|
1052
|
-
/// must respond down it when handling the failure.
|
|
1053
|
-
completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1054
|
-
}
|
|
1055
|
-
impl Display for FailRunUpdate {
|
|
1056
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1057
|
-
write!(
|
|
1058
|
-
f,
|
|
1059
|
-
"FailRunUpdate(run_id: {}, error: {:?})",
|
|
1060
|
-
self.run_id, self.err
|
|
1061
|
-
)
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
955
|
#[derive(Debug)]
|
|
1065
956
|
pub struct OutgoingServerCommands {
|
|
1066
957
|
pub commands: Vec<ProtoCommand>,
|
|
@@ -1068,9 +959,22 @@ pub struct OutgoingServerCommands {
|
|
|
1068
959
|
}
|
|
1069
960
|
|
|
1070
961
|
#[derive(Debug)]
|
|
962
|
+
#[cfg_attr(
|
|
963
|
+
feature = "save_wf_inputs",
|
|
964
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
965
|
+
)]
|
|
1071
966
|
pub(crate) enum LocalResolution {
|
|
1072
967
|
LocalActivity(LocalActivityResolution),
|
|
1073
968
|
}
|
|
969
|
+
impl LocalResolution {
|
|
970
|
+
pub fn is_la_cancel_confirmation(&self) -> bool {
|
|
971
|
+
match self {
|
|
972
|
+
LocalResolution::LocalActivity(lar) => {
|
|
973
|
+
matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
}
|
|
1074
978
|
|
|
1075
979
|
#[derive(thiserror::Error, Debug, derive_more::From)]
|
|
1076
980
|
#[error("Lang provided workflow command with empty variant")]
|
|
@@ -1079,6 +983,10 @@ pub struct EmptyWorkflowCommandErr;
|
|
|
1079
983
|
/// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
|
|
1080
984
|
/// EX: Create a new timer, complete the workflow, etc.
|
|
1081
985
|
#[derive(Debug, derive_more::From, derive_more::Display)]
|
|
986
|
+
#[cfg_attr(
|
|
987
|
+
feature = "save_wf_inputs",
|
|
988
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
989
|
+
)]
|
|
1082
990
|
#[allow(clippy::large_enum_variant)]
|
|
1083
991
|
pub enum WFCommand {
|
|
1084
992
|
/// Returned when we need to wait for the lang sdk to send us something
|
|
@@ -1171,12 +1079,9 @@ pub struct WorkflowStartedInfo {
|
|
|
1171
1079
|
retry_policy: Option<RetryPolicy>,
|
|
1172
1080
|
}
|
|
1173
1081
|
|
|
1174
|
-
type LocalActivityRequestSink =
|
|
1175
|
-
Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
|
|
1176
|
-
|
|
1177
1082
|
/// Wraps outgoing activation job protos with some internal details core might care about
|
|
1178
1083
|
#[derive(Debug, derive_more::Display)]
|
|
1179
|
-
#[display(fmt = "{}"
|
|
1084
|
+
#[display(fmt = "{variant}")]
|
|
1180
1085
|
struct OutgoingJob {
|
|
1181
1086
|
variant: workflow_activation_job::Variant,
|
|
1182
1087
|
/// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
|
|
@@ -1198,3 +1103,58 @@ impl From<OutgoingJob> for WorkflowActivationJob {
|
|
|
1198
1103
|
}
|
|
1199
1104
|
}
|
|
1200
1105
|
}
|
|
1106
|
+
|
|
1107
|
+
/// Errors thrown inside of workflow machines
|
|
1108
|
+
#[derive(thiserror::Error, Debug)]
|
|
1109
|
+
pub(crate) enum WFMachinesError {
|
|
1110
|
+
#[error("Nondeterminism error: {0}")]
|
|
1111
|
+
Nondeterminism(String),
|
|
1112
|
+
#[error("Fatal error in workflow machines: {0}")]
|
|
1113
|
+
Fatal(String),
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
impl WFMachinesError {
|
|
1117
|
+
pub fn evict_reason(&self) -> EvictionReason {
|
|
1118
|
+
match self {
|
|
1119
|
+
WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
|
|
1120
|
+
WFMachinesError::Fatal(_) => EvictionReason::Fatal,
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
impl From<TimestampError> for WFMachinesError {
|
|
1126
|
+
fn from(_: TimestampError) -> Self {
|
|
1127
|
+
Self::Fatal("Could not decode timestamp".to_string())
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
|
|
1132
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
#[derive(derive_more::Constructor)]
|
|
1136
|
+
pub(super) struct LAReqSink {
|
|
1137
|
+
lam: Arc<LocalActivityManager>,
|
|
1138
|
+
/// If we're recording WF inputs, we also need to store immediate resolutions so they're
|
|
1139
|
+
/// available on replay.
|
|
1140
|
+
#[allow(dead_code)] // sometimes appears unused due to feature flagging
|
|
1141
|
+
recorder: Option<UnboundedSender<Vec<u8>>>,
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
impl LocalActivityRequestSink for LAReqSink {
|
|
1145
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
|
|
1146
|
+
if reqs.is_empty() {
|
|
1147
|
+
return vec![];
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
#[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
|
|
1151
|
+
let res = self.lam.enqueue(reqs);
|
|
1152
|
+
|
|
1153
|
+
// We always save when there are any reqs, even if the response might be empty, so that
|
|
1154
|
+
// calls/responses are 1:1
|
|
1155
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
1156
|
+
self.write_req(&res);
|
|
1157
|
+
|
|
1158
|
+
res
|
|
1159
|
+
}
|
|
1160
|
+
}
|