@temporalio/core-bridge 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +304 -112
- package/lib/index.d.ts +8 -6
- package/lib/index.js.map +1 -1
- package/package.json +9 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +2 -2
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.buildkite/pipeline.yml +2 -4
- package/sdk-core/.cargo/config.toml +5 -2
- package/sdk-core/.github/workflows/heavy.yml +29 -0
- package/sdk-core/Cargo.toml +1 -1
- package/sdk-core/README.md +20 -10
- package/sdk-core/client/src/lib.rs +215 -39
- package/sdk-core/client/src/metrics.rs +17 -8
- package/sdk-core/client/src/raw.rs +4 -4
- package/sdk-core/client/src/retry.rs +32 -20
- package/sdk-core/core/Cargo.toml +25 -12
- package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
- package/sdk-core/core/src/abstractions.rs +204 -14
- package/sdk-core/core/src/core_tests/activity_tasks.rs +143 -50
- package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
- package/sdk-core/core/src/core_tests/determinism.rs +165 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +431 -43
- package/sdk-core/core/src/core_tests/queries.rs +34 -16
- package/sdk-core/core/src/core_tests/workers.rs +8 -5
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +588 -55
- package/sdk-core/core/src/ephemeral_server/mod.rs +113 -12
- package/sdk-core/core/src/internal_flags.rs +155 -0
- package/sdk-core/core/src/lib.rs +16 -9
- package/sdk-core/core/src/protosext/mod.rs +1 -1
- package/sdk-core/core/src/replay/mod.rs +16 -27
- package/sdk-core/core/src/telemetry/log_export.rs +1 -1
- package/sdk-core/core/src/telemetry/metrics.rs +69 -35
- package/sdk-core/core/src/telemetry/mod.rs +60 -21
- package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
- package/sdk-core/core/src/test_help/mod.rs +73 -14
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
- package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- package/sdk-core/core/src/worker/activities/local_activities.rs +379 -129
- package/sdk-core/core/src/worker/activities.rs +350 -175
- package/sdk-core/core/src/worker/client/mocks.rs +22 -2
- package/sdk-core/core/src/worker/client.rs +18 -2
- package/sdk-core/core/src/worker/mod.rs +183 -64
- package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- package/sdk-core/core/src/worker/workflow/history_update.rs +916 -277
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +216 -183
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +9 -12
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +160 -87
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +13 -14
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +14 -17
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +242 -110
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +27 -19
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +9 -11
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +321 -206
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +13 -18
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +20 -29
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +257 -51
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +310 -150
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +17 -20
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +31 -15
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1052 -380
- package/sdk-core/core/src/worker/workflow/mod.rs +598 -390
- package/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +137 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +469 -718
- package/sdk-core/core-api/Cargo.toml +2 -1
- package/sdk-core/core-api/src/errors.rs +1 -34
- package/sdk-core/core-api/src/lib.rs +19 -9
- package/sdk-core/core-api/src/telemetry.rs +4 -6
- package/sdk-core/core-api/src/worker.rs +19 -1
- package/sdk-core/etc/deps.svg +115 -140
- package/sdk-core/etc/regen-depgraph.sh +5 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +86 -61
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +29 -71
- package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
- package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- package/sdk-core/histories/old_change_marker_format.bin +0 -0
- package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
- package/sdk-core/protos/api_upstream/Makefile +6 -6
- package/sdk-core/protos/api_upstream/build/go.mod +7 -0
- package/sdk-core/protos/api_upstream/build/go.sum +5 -0
- package/sdk-core/protos/api_upstream/build/tools.go +29 -0
- package/sdk-core/protos/api_upstream/go.mod +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -26
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +8 -8
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +25 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +49 -26
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +5 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
- package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -28
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -4
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +67 -60
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- package/sdk-core/sdk/Cargo.toml +5 -4
- package/sdk-core/sdk/src/lib.rs +108 -26
- package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
- package/sdk-core/sdk/src/workflow_context.rs +24 -17
- package/sdk-core/sdk/src/workflow_future.rs +16 -15
- package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- package/sdk-core/sdk-core-protos/build.rs +36 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +138 -106
- package/sdk-core/sdk-core-protos/src/history_info.rs +10 -1
- package/sdk-core/sdk-core-protos/src/lib.rs +272 -87
- package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +106 -296
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +82 -23
- package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- package/sdk-core/test-utils/src/workflows.rs +29 -0
- package/sdk-core/tests/fuzzy_workflow.rs +130 -0
- package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
- package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- package/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- package/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- package/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +161 -72
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +94 -200
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +34 -28
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +76 -7
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +7 -8
- package/sdk-core/tests/integ_tests/workflow_tests.rs +13 -14
- package/sdk-core/tests/main.rs +3 -13
- package/sdk-core/tests/runner.rs +75 -36
- package/sdk-core/tests/wf_input_replay.rs +32 -0
- package/src/conversions.rs +14 -8
- package/src/runtime.rs +9 -8
- package/ts/index.ts +8 -6
- package/sdk-core/bridge-ffi/Cargo.toml +0 -24
- package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- package/sdk-core/bridge-ffi/build.rs +0 -25
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
- package/sdk-core/bridge-ffi/src/lib.rs +0 -746
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- package/sdk-core/sdk/src/conversions.rs +0 -8
|
@@ -8,40 +8,61 @@ mod history_update;
|
|
|
8
8
|
mod machines;
|
|
9
9
|
mod managed_run;
|
|
10
10
|
mod run_cache;
|
|
11
|
+
mod wft_extraction;
|
|
11
12
|
pub(crate) mod wft_poller;
|
|
12
13
|
mod workflow_stream;
|
|
13
14
|
|
|
15
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
16
|
+
pub use workflow_stream::replay_wf_state_inputs;
|
|
17
|
+
|
|
14
18
|
pub(crate) use bridge::WorkflowBridge;
|
|
15
19
|
pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
|
|
16
|
-
pub(crate) use history_update::
|
|
17
|
-
pub(crate) use machines::WFMachinesError;
|
|
20
|
+
pub(crate) use history_update::HistoryUpdate;
|
|
18
21
|
#[cfg(test)]
|
|
19
22
|
pub(crate) use managed_run::ManagedWFFunc;
|
|
20
23
|
|
|
21
24
|
use crate::{
|
|
22
|
-
abstractions::
|
|
23
|
-
|
|
24
|
-
|
|
25
|
+
abstractions::{
|
|
26
|
+
stream_when_allowed, take_cell::TakeCell, MeteredSemaphore, TrackedOwnedMeteredSemPermit,
|
|
27
|
+
UsedMeteredSemPermit,
|
|
28
|
+
},
|
|
29
|
+
internal_flags::InternalFlags,
|
|
30
|
+
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
31
|
+
telemetry::{
|
|
32
|
+
metrics::workflow_worker_type, set_trace_subscriber_for_current_thread, TelemetryInstance,
|
|
33
|
+
VecDisplayer,
|
|
34
|
+
},
|
|
25
35
|
worker::{
|
|
26
|
-
activities::{ActivitiesFromWFTsHandle,
|
|
36
|
+
activities::{ActivitiesFromWFTsHandle, LocalActivityManager, TrackedPermittedTqResp},
|
|
27
37
|
client::{WorkerClient, WorkflowTaskCompletion},
|
|
28
38
|
workflow::{
|
|
29
|
-
|
|
39
|
+
history_update::HistoryPaginator,
|
|
40
|
+
managed_run::RunUpdateAct,
|
|
41
|
+
wft_extraction::{HistoryFetchReq, WFTExtractor},
|
|
30
42
|
wft_poller::validate_wft,
|
|
31
43
|
workflow_stream::{LocalInput, LocalInputs, WFStream},
|
|
32
44
|
},
|
|
33
|
-
LocalActRequest, LocalActivityResolution,
|
|
45
|
+
LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
46
|
+
PostActivateHookData,
|
|
34
47
|
},
|
|
35
48
|
MetricsContext,
|
|
36
49
|
};
|
|
50
|
+
use anyhow::anyhow;
|
|
37
51
|
use futures::{stream::BoxStream, Stream, StreamExt};
|
|
52
|
+
use futures_util::{future::abortable, stream};
|
|
53
|
+
use prost_types::TimestampError;
|
|
38
54
|
use std::{
|
|
39
|
-
|
|
40
|
-
|
|
55
|
+
cell::RefCell,
|
|
56
|
+
cmp::Ordering,
|
|
57
|
+
collections::VecDeque,
|
|
58
|
+
fmt::Debug,
|
|
41
59
|
future::Future,
|
|
60
|
+
mem::discriminant,
|
|
42
61
|
ops::DerefMut,
|
|
62
|
+
rc::Rc,
|
|
43
63
|
result,
|
|
44
|
-
sync::Arc,
|
|
64
|
+
sync::{atomic, atomic::AtomicBool, Arc},
|
|
65
|
+
thread,
|
|
45
66
|
time::{Duration, Instant},
|
|
46
67
|
};
|
|
47
68
|
use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
|
|
@@ -59,36 +80,41 @@ use temporal_sdk_core_protos::{
|
|
|
59
80
|
},
|
|
60
81
|
temporal::api::{
|
|
61
82
|
command::v1::{command::Attributes, Command as ProtoCommand, Command},
|
|
62
|
-
common::v1::{Memo, RetryPolicy, SearchAttributes},
|
|
83
|
+
common::v1::{Memo, MeteringMetadata, RetryPolicy, SearchAttributes, WorkflowExecution},
|
|
63
84
|
enums::v1::WorkflowTaskFailedCause,
|
|
85
|
+
query::v1::WorkflowQuery,
|
|
86
|
+
sdk::v1::WorkflowTaskCompletedMetadata,
|
|
64
87
|
taskqueue::v1::StickyExecutionAttributes,
|
|
65
|
-
workflowservice::v1::PollActivityTaskQueueResponse,
|
|
88
|
+
workflowservice::v1::{get_system_info_response, PollActivityTaskQueueResponse},
|
|
66
89
|
},
|
|
67
90
|
TaskToken,
|
|
68
91
|
};
|
|
69
92
|
use tokio::{
|
|
70
93
|
sync::{
|
|
71
|
-
mpsc::{unbounded_channel, UnboundedSender},
|
|
94
|
+
mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
|
|
72
95
|
oneshot,
|
|
73
96
|
},
|
|
74
|
-
task,
|
|
75
|
-
task::{JoinError, JoinHandle},
|
|
97
|
+
task::{spawn_blocking, LocalSet},
|
|
76
98
|
};
|
|
77
99
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
78
100
|
use tokio_util::sync::CancellationToken;
|
|
79
101
|
use tracing::Span;
|
|
80
102
|
|
|
81
103
|
pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
|
|
104
|
+
/// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
|
|
105
|
+
/// necessary.
|
|
106
|
+
const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
|
|
82
107
|
const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
|
|
83
108
|
|
|
84
109
|
type Result<T, E = WFMachinesError> = result::Result<T, E>;
|
|
85
110
|
type BoxedActivationStream = BoxStream<'static, Result<ActivationOrAuto, PollWfError>>;
|
|
111
|
+
type InternalFlagsRef = Rc<RefCell<InternalFlags>>;
|
|
86
112
|
|
|
87
113
|
/// Centralizes all state related to workflows and workflow tasks
|
|
88
114
|
pub(crate) struct Workflows {
|
|
89
115
|
task_queue: String,
|
|
90
116
|
local_tx: UnboundedSender<LocalInput>,
|
|
91
|
-
processing_task:
|
|
117
|
+
processing_task: TakeCell<thread::JoinHandle<()>>,
|
|
92
118
|
activation_stream: tokio::sync::Mutex<(
|
|
93
119
|
BoxedActivationStream,
|
|
94
120
|
// Used to indicate polling may begin
|
|
@@ -100,9 +126,13 @@ pub(crate) struct Workflows {
|
|
|
100
126
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
101
127
|
/// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
|
|
102
128
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
129
|
+
/// Ensures we stay at or below this worker's maximum concurrent workflow task limit
|
|
130
|
+
wft_semaphore: MeteredSemaphore,
|
|
131
|
+
local_act_mgr: Arc<LocalActivityManager>,
|
|
132
|
+
ever_polled: AtomicBool,
|
|
103
133
|
}
|
|
104
134
|
|
|
105
|
-
pub(
|
|
135
|
+
pub(crate) struct WorkflowBasics {
|
|
106
136
|
pub max_cached_workflows: usize,
|
|
107
137
|
pub max_outstanding_wfts: usize,
|
|
108
138
|
pub shutdown_token: CancellationToken,
|
|
@@ -110,58 +140,120 @@ pub(super) struct WorkflowBasics {
|
|
|
110
140
|
pub namespace: String,
|
|
111
141
|
pub task_queue: String,
|
|
112
142
|
pub ignore_evicts_on_shutdown: bool,
|
|
143
|
+
pub fetching_concurrency: usize,
|
|
144
|
+
pub server_capabilities: get_system_info_response::Capabilities,
|
|
145
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
146
|
+
pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
pub(crate) struct RunBasics<'a> {
|
|
150
|
+
pub namespace: String,
|
|
151
|
+
pub workflow_id: String,
|
|
152
|
+
pub workflow_type: String,
|
|
153
|
+
pub run_id: String,
|
|
154
|
+
pub history: HistoryUpdate,
|
|
155
|
+
pub metrics: MetricsContext,
|
|
156
|
+
pub capabilities: &'a get_system_info_response::Capabilities,
|
|
113
157
|
}
|
|
114
158
|
|
|
115
159
|
impl Workflows {
|
|
160
|
+
#[allow(clippy::too_many_arguments)] // Not much worth combining here
|
|
116
161
|
pub(super) fn new(
|
|
117
162
|
basics: WorkflowBasics,
|
|
118
163
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
119
164
|
client: Arc<dyn WorkerClient>,
|
|
120
165
|
wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
|
|
121
|
-
local_activity_request_sink: impl
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
+ 'static,
|
|
166
|
+
local_activity_request_sink: impl LocalActivityRequestSink,
|
|
167
|
+
local_act_mgr: Arc<LocalActivityManager>,
|
|
168
|
+
heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
|
|
125
169
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
170
|
+
telem_instance: Option<&TelemetryInstance>,
|
|
126
171
|
) -> Self {
|
|
127
172
|
let (local_tx, local_rx) = unbounded_channel();
|
|
173
|
+
let (fetch_tx, fetch_rx) = unbounded_channel();
|
|
128
174
|
let shutdown_tok = basics.shutdown_token.clone();
|
|
129
175
|
let task_queue = basics.task_queue.clone();
|
|
130
|
-
let
|
|
131
|
-
basics,
|
|
176
|
+
let wft_semaphore = MeteredSemaphore::new(
|
|
177
|
+
basics.max_outstanding_wfts,
|
|
178
|
+
basics.metrics.with_new_attrs([workflow_worker_type()]),
|
|
179
|
+
MetricsContext::available_task_slots,
|
|
180
|
+
);
|
|
181
|
+
// Only allow polling of the new WFT stream if there are available task slots
|
|
182
|
+
let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
|
|
183
|
+
Some((sem.acquire_owned().await.unwrap(), sem))
|
|
184
|
+
});
|
|
185
|
+
let wft_stream = stream_when_allowed(wft_stream, proceeder);
|
|
186
|
+
let extracted_wft_stream = WFTExtractor::build(
|
|
187
|
+
client.clone(),
|
|
188
|
+
basics.fetching_concurrency,
|
|
132
189
|
wft_stream,
|
|
190
|
+
UnboundedReceiverStream::new(fetch_rx),
|
|
191
|
+
);
|
|
192
|
+
let locals_stream = stream::select(
|
|
133
193
|
UnboundedReceiverStream::new(local_rx),
|
|
134
|
-
|
|
135
|
-
local_activity_request_sink,
|
|
194
|
+
UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
|
|
136
195
|
);
|
|
137
196
|
let (activation_tx, activation_rx) = unbounded_channel();
|
|
138
197
|
let (start_polling_tx, start_polling_rx) = oneshot::channel();
|
|
139
198
|
// We must spawn a task to constantly poll the activation stream, because otherwise
|
|
140
199
|
// activation completions would not cause anything to happen until the next poll.
|
|
141
|
-
let
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
200
|
+
let tracing_sub = telem_instance.map(|ti| ti.trace_subscriber());
|
|
201
|
+
let processing_task = thread::spawn(move || {
|
|
202
|
+
if let Some(ts) = tracing_sub {
|
|
203
|
+
set_trace_subscriber_for_current_thread(ts);
|
|
204
|
+
}
|
|
205
|
+
let rt = tokio::runtime::Builder::new_current_thread()
|
|
206
|
+
.enable_all()
|
|
207
|
+
.thread_name("workflow-processing")
|
|
208
|
+
.build()
|
|
209
|
+
.unwrap();
|
|
210
|
+
let local = LocalSet::new();
|
|
211
|
+
local.block_on(&rt, async move {
|
|
212
|
+
let mut stream = WFStream::build(
|
|
213
|
+
basics,
|
|
214
|
+
extracted_wft_stream,
|
|
215
|
+
locals_stream,
|
|
216
|
+
local_activity_request_sink,
|
|
217
|
+
);
|
|
218
|
+
|
|
219
|
+
// However, we want to avoid plowing ahead until we've been asked to poll at least
|
|
220
|
+
// once. This supports activity-only workers.
|
|
221
|
+
let do_poll = tokio::select! {
|
|
222
|
+
sp = start_polling_rx => {
|
|
223
|
+
sp.is_ok()
|
|
224
|
+
}
|
|
225
|
+
_ = shutdown_tok.cancelled() => {
|
|
226
|
+
false
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
if !do_poll {
|
|
230
|
+
return;
|
|
147
231
|
}
|
|
148
|
-
|
|
149
|
-
|
|
232
|
+
while let Some(output) = stream.next().await {
|
|
233
|
+
match output {
|
|
234
|
+
Ok(o) => {
|
|
235
|
+
for fetchreq in o.fetch_histories {
|
|
236
|
+
fetch_tx
|
|
237
|
+
.send(fetchreq)
|
|
238
|
+
.expect("Fetch channel must not be dropped");
|
|
239
|
+
}
|
|
240
|
+
for act in o.activations {
|
|
241
|
+
activation_tx
|
|
242
|
+
.send(Ok(act))
|
|
243
|
+
.expect("Activation processor channel not dropped");
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
Err(e) => activation_tx
|
|
247
|
+
.send(Err(e))
|
|
248
|
+
.expect("Activation processor channel not dropped"),
|
|
249
|
+
}
|
|
150
250
|
}
|
|
151
|
-
};
|
|
152
|
-
if !do_poll {
|
|
153
|
-
return;
|
|
154
|
-
}
|
|
155
|
-
while let Some(act) = stream.next().await {
|
|
156
|
-
activation_tx
|
|
157
|
-
.send(act)
|
|
158
|
-
.expect("Activation processor channel not dropped");
|
|
159
|
-
}
|
|
251
|
+
});
|
|
160
252
|
});
|
|
161
253
|
Self {
|
|
162
254
|
task_queue,
|
|
163
255
|
local_tx,
|
|
164
|
-
processing_task:
|
|
256
|
+
processing_task: TakeCell::new(processing_task),
|
|
165
257
|
activation_stream: tokio::sync::Mutex::new((
|
|
166
258
|
UnboundedReceiverStream::new(activation_rx).boxed(),
|
|
167
259
|
Some(start_polling_tx),
|
|
@@ -169,12 +261,16 @@ impl Workflows {
|
|
|
169
261
|
client,
|
|
170
262
|
sticky_attrs,
|
|
171
263
|
activity_tasks_handle,
|
|
264
|
+
wft_semaphore,
|
|
265
|
+
local_act_mgr,
|
|
266
|
+
ever_polled: AtomicBool::new(false),
|
|
172
267
|
}
|
|
173
268
|
}
|
|
174
269
|
|
|
175
|
-
pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
270
|
+
pub(super) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
271
|
+
self.ever_polled.store(true, atomic::Ordering::Release);
|
|
176
272
|
loop {
|
|
177
|
-
let
|
|
273
|
+
let al = {
|
|
178
274
|
let mut lock = self.activation_stream.lock().await;
|
|
179
275
|
let (ref mut stream, ref mut beginner) = lock.deref_mut();
|
|
180
276
|
if let Some(beginner) = beginner.take() {
|
|
@@ -182,17 +278,39 @@ impl Workflows {
|
|
|
182
278
|
}
|
|
183
279
|
stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
|
|
184
280
|
};
|
|
185
|
-
Span::current().record("run_id",
|
|
186
|
-
match
|
|
187
|
-
ActivationOrAuto::LangActivation(
|
|
281
|
+
Span::current().record("run_id", al.run_id());
|
|
282
|
+
match al {
|
|
283
|
+
ActivationOrAuto::LangActivation(mut act)
|
|
284
|
+
| ActivationOrAuto::ReadyForQueries(mut act) => {
|
|
285
|
+
sort_act_jobs(&mut act);
|
|
188
286
|
debug!(activation=%act, "Sending activation to lang");
|
|
189
287
|
break Ok(act);
|
|
190
288
|
}
|
|
191
289
|
ActivationOrAuto::Autocomplete { run_id } => {
|
|
192
|
-
self.activation_completed(
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
290
|
+
self.activation_completed(
|
|
291
|
+
WorkflowActivationCompletion {
|
|
292
|
+
run_id,
|
|
293
|
+
status: Some(
|
|
294
|
+
workflow_completion::Success::from_variants(vec![]).into(),
|
|
295
|
+
),
|
|
296
|
+
},
|
|
297
|
+
// We need to say a type, but the type is irrelevant, so imagine some
|
|
298
|
+
// boxed function we'll never call.
|
|
299
|
+
Option::<Box<dyn Fn(PostActivateHookData) + Send>>::None,
|
|
300
|
+
)
|
|
301
|
+
.await?;
|
|
302
|
+
}
|
|
303
|
+
ActivationOrAuto::AutoFail {
|
|
304
|
+
run_id,
|
|
305
|
+
machines_err,
|
|
306
|
+
} => {
|
|
307
|
+
self.activation_completed(
|
|
308
|
+
WorkflowActivationCompletion {
|
|
309
|
+
run_id,
|
|
310
|
+
status: Some(auto_fail_to_complete_status(machines_err)),
|
|
311
|
+
},
|
|
312
|
+
Option::<Box<dyn Fn(PostActivateHookData) + Send>>::None,
|
|
313
|
+
)
|
|
196
314
|
.await?;
|
|
197
315
|
}
|
|
198
316
|
}
|
|
@@ -202,10 +320,11 @@ impl Workflows {
|
|
|
202
320
|
/// Queue an activation completion for processing, returning a future that will resolve with
|
|
203
321
|
/// the outcome of that completion. See [ActivationCompletedOutcome].
|
|
204
322
|
///
|
|
205
|
-
/// Returns the most-recently-processed event number for the run
|
|
206
|
-
pub async fn activation_completed(
|
|
323
|
+
/// Returns the most-recently-processed event number for the run.
|
|
324
|
+
pub(super) async fn activation_completed(
|
|
207
325
|
&self,
|
|
208
326
|
completion: WorkflowActivationCompletion,
|
|
327
|
+
post_activate_hook: Option<impl Fn(PostActivateHookData)>,
|
|
209
328
|
) -> Result<usize, CompleteWfError> {
|
|
210
329
|
let is_empty_completion = completion.is_empty();
|
|
211
330
|
let completion = validate_completion(completion)?;
|
|
@@ -213,7 +332,7 @@ impl Workflows {
|
|
|
213
332
|
let (tx, rx) = oneshot::channel();
|
|
214
333
|
let was_sent = self.send_local(WFActCompleteMsg {
|
|
215
334
|
completion,
|
|
216
|
-
response_tx: tx,
|
|
335
|
+
response_tx: Some(tx),
|
|
217
336
|
});
|
|
218
337
|
if !was_sent {
|
|
219
338
|
if is_empty_completion {
|
|
@@ -230,7 +349,7 @@ impl Workflows {
|
|
|
230
349
|
.await
|
|
231
350
|
.expect("Send half of activation complete response not dropped");
|
|
232
351
|
let mut wft_from_complete = None;
|
|
233
|
-
let
|
|
352
|
+
let wft_report_status = match completion_outcome.outcome {
|
|
234
353
|
ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
|
|
235
354
|
ServerCommandsWithWorkflowInfo {
|
|
236
355
|
task_token,
|
|
@@ -239,6 +358,7 @@ impl Workflows {
|
|
|
239
358
|
mut commands,
|
|
240
359
|
query_responses,
|
|
241
360
|
force_new_wft,
|
|
361
|
+
sdk_metadata,
|
|
242
362
|
},
|
|
243
363
|
} => {
|
|
244
364
|
let reserved_act_permits =
|
|
@@ -252,6 +372,13 @@ impl Workflows {
|
|
|
252
372
|
sticky_attributes: None,
|
|
253
373
|
return_new_workflow_task: true,
|
|
254
374
|
force_create_new_workflow_task: force_new_wft,
|
|
375
|
+
sdk_metadata,
|
|
376
|
+
metering_metadata: MeteringMetadata {
|
|
377
|
+
nonfirst_local_activity_execution_attempts: self
|
|
378
|
+
.local_act_mgr
|
|
379
|
+
.get_nonfirst_attempt_count(&run_id)
|
|
380
|
+
as u32,
|
|
381
|
+
},
|
|
255
382
|
};
|
|
256
383
|
let sticky_attrs = self.sticky_attrs.clone();
|
|
257
384
|
// Do not return new WFT if we would not cache, because returned new WFTs are
|
|
@@ -273,14 +400,14 @@ impl Workflows {
|
|
|
273
400
|
Ok(())
|
|
274
401
|
})
|
|
275
402
|
.await;
|
|
276
|
-
|
|
403
|
+
WFTReportStatus::Reported
|
|
277
404
|
}
|
|
278
405
|
ServerCommandsWithWorkflowInfo {
|
|
279
406
|
task_token,
|
|
280
407
|
action: ActivationAction::RespondLegacyQuery { result },
|
|
281
408
|
} => {
|
|
282
409
|
self.respond_legacy_query(task_token, *result).await;
|
|
283
|
-
|
|
410
|
+
WFTReportStatus::Reported
|
|
284
411
|
}
|
|
285
412
|
},
|
|
286
413
|
ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
|
|
@@ -292,29 +419,58 @@ impl Workflows {
|
|
|
292
419
|
.await
|
|
293
420
|
})
|
|
294
421
|
.await;
|
|
295
|
-
|
|
422
|
+
WFTReportStatus::Reported
|
|
296
423
|
}
|
|
297
424
|
FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
|
|
298
425
|
warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
|
|
299
426
|
self.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
300
427
|
.await;
|
|
301
|
-
|
|
428
|
+
WFTReportStatus::Reported
|
|
302
429
|
}
|
|
303
430
|
},
|
|
304
|
-
ActivationCompleteOutcome::
|
|
431
|
+
ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
|
|
432
|
+
ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
|
|
305
433
|
};
|
|
306
434
|
|
|
435
|
+
let maybe_pwft = if let Some(wft) = wft_from_complete {
|
|
436
|
+
match HistoryPaginator::from_poll(wft, self.client.clone()).await {
|
|
437
|
+
Ok((paginator, pwft)) => Some((pwft, paginator)),
|
|
438
|
+
Err(e) => {
|
|
439
|
+
self.request_eviction(
|
|
440
|
+
&run_id,
|
|
441
|
+
format!("Failed to paginate workflow task from completion: {e:?}"),
|
|
442
|
+
EvictionReason::Fatal,
|
|
443
|
+
);
|
|
444
|
+
None
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
} else {
|
|
448
|
+
None
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
if let Some(h) = post_activate_hook {
|
|
452
|
+
h(PostActivateHookData {
|
|
453
|
+
run_id: &run_id,
|
|
454
|
+
most_recent_event: completion_outcome.most_recently_processed_event,
|
|
455
|
+
replaying: completion_outcome.replaying,
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
|
|
307
459
|
self.post_activation(PostActivationMsg {
|
|
308
460
|
run_id,
|
|
309
|
-
|
|
310
|
-
wft_from_complete,
|
|
461
|
+
wft_report_status,
|
|
462
|
+
wft_from_complete: maybe_pwft,
|
|
311
463
|
});
|
|
312
464
|
|
|
313
465
|
Ok(completion_outcome.most_recently_processed_event)
|
|
314
466
|
}
|
|
315
467
|
|
|
316
468
|
/// Tell workflow that a local activity has finished with the provided result
|
|
317
|
-
pub fn notify_of_local_result(
|
|
469
|
+
pub(super) fn notify_of_local_result(
|
|
470
|
+
&self,
|
|
471
|
+
run_id: impl Into<String>,
|
|
472
|
+
resolved: LocalResolution,
|
|
473
|
+
) {
|
|
318
474
|
self.send_local(LocalResolutionMsg {
|
|
319
475
|
run_id: run_id.into(),
|
|
320
476
|
res: resolved,
|
|
@@ -322,7 +478,7 @@ impl Workflows {
|
|
|
322
478
|
}
|
|
323
479
|
|
|
324
480
|
/// Request eviction of a workflow
|
|
325
|
-
pub fn request_eviction(
|
|
481
|
+
pub(super) fn request_eviction(
|
|
326
482
|
&self,
|
|
327
483
|
run_id: impl Into<String>,
|
|
328
484
|
message: impl Into<String>,
|
|
@@ -332,26 +488,47 @@ impl Workflows {
|
|
|
332
488
|
run_id: run_id.into(),
|
|
333
489
|
message: message.into(),
|
|
334
490
|
reason,
|
|
491
|
+
auto_reply_fail_tt: None,
|
|
335
492
|
});
|
|
336
493
|
}
|
|
337
494
|
|
|
338
495
|
/// Query the state of workflow management. Can return `None` if workflow state is shut down.
|
|
339
|
-
pub fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
|
|
496
|
+
pub(super) fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
|
|
340
497
|
let (tx, rx) = oneshot::channel();
|
|
341
498
|
self.send_local(GetStateInfoMsg { response_tx: tx });
|
|
342
499
|
async move { rx.await.ok() }
|
|
343
500
|
}
|
|
344
501
|
|
|
345
|
-
pub
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
502
|
+
pub(super) fn available_wft_permits(&self) -> usize {
|
|
503
|
+
self.wft_semaphore.available_permits()
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
pub(super) async fn shutdown(&self) -> Result<(), anyhow::Error> {
|
|
507
|
+
if let Some(jh) = self.processing_task.take_once() {
|
|
508
|
+
// This serves to drive the stream if it is still alive and wouldn't otherwise receive
|
|
509
|
+
// another message. It allows it to shut itself down.
|
|
510
|
+
let (waker, stop_waker) = abortable(async {
|
|
511
|
+
let mut interval = tokio::time::interval(Duration::from_millis(10));
|
|
512
|
+
loop {
|
|
513
|
+
interval.tick().await;
|
|
514
|
+
let _ = self.get_state_info().await;
|
|
515
|
+
}
|
|
516
|
+
});
|
|
517
|
+
let (_, jh_res) = tokio::join!(
|
|
518
|
+
waker,
|
|
519
|
+
spawn_blocking(move || {
|
|
520
|
+
let r = jh.join();
|
|
521
|
+
stop_waker.abort();
|
|
522
|
+
r
|
|
523
|
+
})
|
|
524
|
+
);
|
|
525
|
+
jh_res?.map_err(|e| anyhow!("Error joining workflow processing thread: {e:?}"))?;
|
|
354
526
|
}
|
|
527
|
+
Ok(())
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
pub(super) fn ever_polled(&self) -> bool {
|
|
531
|
+
self.ever_polled.load(atomic::Ordering::Acquire)
|
|
355
532
|
}
|
|
356
533
|
|
|
357
534
|
/// Must be called after every activation completion has finished
|
|
@@ -393,7 +570,11 @@ impl Workflows {
|
|
|
393
570
|
/// successfully.
|
|
394
571
|
fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
|
|
395
572
|
let msg = msg.into();
|
|
396
|
-
let print_err =
|
|
573
|
+
let print_err = match &msg {
|
|
574
|
+
LocalInputs::GetStateInfo(_) => false,
|
|
575
|
+
LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
|
|
576
|
+
_ => true,
|
|
577
|
+
};
|
|
397
578
|
if let Err(e) = self.local_tx.send(LocalInput {
|
|
398
579
|
input: msg,
|
|
399
580
|
span: Span::current(),
|
|
@@ -414,7 +595,7 @@ impl Workflows {
|
|
|
414
595
|
/// Process eagerly returned activities from WFT completion
|
|
415
596
|
fn handle_eager_activities(
|
|
416
597
|
&self,
|
|
417
|
-
reserved_act_permits: Vec<
|
|
598
|
+
reserved_act_permits: Vec<TrackedOwnedMeteredSemPermit>,
|
|
418
599
|
eager_acts: Vec<PollActivityTaskQueueResponse>,
|
|
419
600
|
) {
|
|
420
601
|
if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
|
|
@@ -435,7 +616,7 @@ impl Workflows {
|
|
|
435
616
|
let with_permits = reserved_act_permits
|
|
436
617
|
.into_iter()
|
|
437
618
|
.zip(eager_acts.into_iter())
|
|
438
|
-
.map(|(permit, resp)|
|
|
619
|
+
.map(|(permit, resp)| TrackedPermittedTqResp { permit, resp });
|
|
439
620
|
if with_permits.len() > 0 {
|
|
440
621
|
debug!(
|
|
441
622
|
"Adding {} activity tasks received from WFT complete",
|
|
@@ -458,7 +639,7 @@ impl Workflows {
|
|
|
458
639
|
fn reserve_activity_slots_for_outgoing_commands(
|
|
459
640
|
&self,
|
|
460
641
|
commands: &mut [Command],
|
|
461
|
-
) -> Vec<
|
|
642
|
+
) -> Vec<TrackedOwnedMeteredSemPermit> {
|
|
462
643
|
let mut reserved = vec![];
|
|
463
644
|
for cmd in commands {
|
|
464
645
|
if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
|
|
@@ -509,186 +690,30 @@ impl Workflows {
|
|
|
509
690
|
}
|
|
510
691
|
}
|
|
511
692
|
|
|
512
|
-
///
|
|
513
|
-
///
|
|
514
|
-
#[derive(derive_more::
|
|
515
|
-
#[
|
|
516
|
-
fmt = "
|
|
517
|
-
|
|
518
|
-
trying_to_evict: {}, last_action_acked: {} }}",
|
|
519
|
-
wft,
|
|
520
|
-
activation,
|
|
521
|
-
buffered_resp,
|
|
522
|
-
have_seen_terminal_event,
|
|
523
|
-
most_recently_processed_event_number,
|
|
524
|
-
more_pending_work,
|
|
525
|
-
"trying_to_evict.is_some()",
|
|
526
|
-
last_action_acked
|
|
693
|
+
/// Returned when a cache miss happens and we need to fetch history from the beginning to
|
|
694
|
+
/// replay a run
|
|
695
|
+
#[derive(Debug, derive_more::Display)]
|
|
696
|
+
#[display(
|
|
697
|
+
fmt = "CacheMissFetchReq(run_id: {})",
|
|
698
|
+
"original_wft.work.execution.run_id"
|
|
527
699
|
)]
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
have_seen_terminal_event: bool,
|
|
540
|
-
/// The most recently processed event id this machine has seen. 0 means it has seen nothing.
|
|
541
|
-
most_recently_processed_event_number: usize,
|
|
542
|
-
/// Is set true when the machines indicate that there is additional known work to be processed
|
|
543
|
-
more_pending_work: bool,
|
|
544
|
-
/// Is set if an eviction has been requested for this run
|
|
545
|
-
trying_to_evict: Option<RequestEvictMsg>,
|
|
546
|
-
/// Set to true if the last action we tried to take to this run has been processed (ie: the
|
|
547
|
-
/// [RunUpdateResponse] for it has been seen.
|
|
548
|
-
last_action_acked: bool,
|
|
549
|
-
/// For sending work to the machines
|
|
550
|
-
run_actions_tx: UnboundedSender<RunAction>,
|
|
551
|
-
/// Handle to the task where the actual machines live
|
|
552
|
-
handle: JoinHandle<()>,
|
|
553
|
-
|
|
554
|
-
/// We track if we have recorded useful debugging values onto a certain span yet, to overcome
|
|
555
|
-
/// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
|
|
556
|
-
/// is fixed.
|
|
557
|
-
recorded_span_ids: HashSet<tracing::Id>,
|
|
558
|
-
metrics: MetricsContext,
|
|
700
|
+
#[must_use]
|
|
701
|
+
struct CacheMissFetchReq {
|
|
702
|
+
original_wft: PermittedWFT,
|
|
703
|
+
}
|
|
704
|
+
/// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
|
|
705
|
+
/// isn't in memory
|
|
706
|
+
#[derive(Debug)]
|
|
707
|
+
#[must_use]
|
|
708
|
+
struct NextPageReq {
|
|
709
|
+
paginator: HistoryPaginator,
|
|
710
|
+
span: Span,
|
|
559
711
|
}
|
|
560
|
-
impl ManagedRunHandle {
|
|
561
|
-
fn new(
|
|
562
|
-
wfm: WorkflowManager,
|
|
563
|
-
activations_tx: UnboundedSender<RunUpdateResponse>,
|
|
564
|
-
local_activity_request_sink: LocalActivityRequestSink,
|
|
565
|
-
metrics: MetricsContext,
|
|
566
|
-
) -> Self {
|
|
567
|
-
let (run_actions_tx, run_actions_rx) = unbounded_channel();
|
|
568
|
-
let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
|
|
569
|
-
let handle = tokio::task::spawn(managed.run(run_actions_rx));
|
|
570
|
-
Self {
|
|
571
|
-
wft: None,
|
|
572
|
-
activation: None,
|
|
573
|
-
buffered_resp: None,
|
|
574
|
-
have_seen_terminal_event: false,
|
|
575
|
-
most_recently_processed_event_number: 0,
|
|
576
|
-
more_pending_work: false,
|
|
577
|
-
trying_to_evict: None,
|
|
578
|
-
last_action_acked: true,
|
|
579
|
-
run_actions_tx,
|
|
580
|
-
handle,
|
|
581
|
-
recorded_span_ids: Default::default(),
|
|
582
|
-
metrics,
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
fn incoming_wft(&mut self, wft: NewIncomingWFT) {
|
|
587
|
-
if self.wft.is_some() {
|
|
588
|
-
error!("Trying to send a new WFT for a run which already has one!");
|
|
589
|
-
}
|
|
590
|
-
self.send_run_action(RunActions::NewIncomingWFT(wft));
|
|
591
|
-
}
|
|
592
|
-
fn check_more_activations(&mut self) {
|
|
593
|
-
// No point in checking for more activations if we have not acked the last update, or
|
|
594
|
-
// if there's already an outstanding activation.
|
|
595
|
-
if self.last_action_acked && self.activation.is_none() {
|
|
596
|
-
self.send_run_action(RunActions::CheckMoreWork {
|
|
597
|
-
want_to_evict: self.trying_to_evict.clone(),
|
|
598
|
-
has_pending_queries: self
|
|
599
|
-
.wft
|
|
600
|
-
.as_ref()
|
|
601
|
-
.map(|wft| !wft.pending_queries.is_empty())
|
|
602
|
-
.unwrap_or_default(),
|
|
603
|
-
has_wft: self.wft.is_some(),
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
fn send_completion(&mut self, c: RunActivationCompletion) {
|
|
608
|
-
self.send_run_action(RunActions::ActivationCompletion(c));
|
|
609
|
-
}
|
|
610
|
-
fn send_local_resolution(&mut self, r: LocalResolution) {
|
|
611
|
-
self.send_run_action(RunActions::LocalResolution(r));
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
|
|
615
|
-
let act_type = match &act {
|
|
616
|
-
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
617
|
-
if act.is_legacy_query() {
|
|
618
|
-
OutstandingActivation::LegacyQuery
|
|
619
|
-
} else {
|
|
620
|
-
OutstandingActivation::Normal {
|
|
621
|
-
contains_eviction: act.eviction_index().is_some(),
|
|
622
|
-
num_jobs: act.jobs.len(),
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
|
|
627
|
-
};
|
|
628
|
-
if let Some(old_act) = self.activation {
|
|
629
|
-
// This is a panic because we have screwed up core logic if this is violated. It must be
|
|
630
|
-
// upheld.
|
|
631
|
-
panic!(
|
|
632
|
-
"Attempted to insert a new outstanding activation {:?}, but there already was \
|
|
633
|
-
one outstanding: {:?}",
|
|
634
|
-
act, old_act
|
|
635
|
-
);
|
|
636
|
-
}
|
|
637
|
-
self.activation = Some(act_type);
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
fn send_run_action(&mut self, action: RunActions) {
|
|
641
|
-
self.last_action_acked = false;
|
|
642
|
-
self.run_actions_tx
|
|
643
|
-
.send(RunAction {
|
|
644
|
-
action,
|
|
645
|
-
trace_span: Span::current(),
|
|
646
|
-
})
|
|
647
|
-
.expect("Receive half of run actions not dropped");
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
/// Returns true if the managed run has any form of pending work
|
|
651
|
-
/// If `ignore_evicts` is true, pending evictions do not count as pending work.
|
|
652
|
-
/// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
|
|
653
|
-
fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
|
|
654
|
-
let evict_work = if ignore_evicts {
|
|
655
|
-
false
|
|
656
|
-
} else {
|
|
657
|
-
self.trying_to_evict.is_some()
|
|
658
|
-
};
|
|
659
|
-
let act_work = if ignore_evicts {
|
|
660
|
-
if let Some(ref act) = self.activation {
|
|
661
|
-
!act.has_only_eviction()
|
|
662
|
-
} else {
|
|
663
|
-
false
|
|
664
|
-
}
|
|
665
|
-
} else {
|
|
666
|
-
self.activation.is_some()
|
|
667
|
-
};
|
|
668
|
-
let buffered = if ignore_buffered {
|
|
669
|
-
false
|
|
670
|
-
} else {
|
|
671
|
-
self.buffered_resp.is_some()
|
|
672
|
-
};
|
|
673
|
-
self.wft.is_some()
|
|
674
|
-
|| buffered
|
|
675
|
-
|| !self.last_action_acked
|
|
676
|
-
|| self.more_pending_work
|
|
677
|
-
|| act_work
|
|
678
|
-
|| evict_work
|
|
679
|
-
}
|
|
680
712
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
|
|
686
|
-
|| self
|
|
687
|
-
.wft
|
|
688
|
-
.as_ref()
|
|
689
|
-
.map(|t| t.has_pending_legacy_query())
|
|
690
|
-
.unwrap_or_default()
|
|
691
|
-
}
|
|
713
|
+
#[derive(Debug)]
|
|
714
|
+
struct WFStreamOutput {
|
|
715
|
+
activations: VecDeque<ActivationOrAuto>,
|
|
716
|
+
fetch_histories: VecDeque<HistoryFetchReq>,
|
|
692
717
|
}
|
|
693
718
|
|
|
694
719
|
#[derive(Debug, derive_more::Display)]
|
|
@@ -697,9 +722,15 @@ enum ActivationOrAuto {
|
|
|
697
722
|
/// This type should only be filled with an empty activation which is ready to have queries
|
|
698
723
|
/// inserted into the joblist
|
|
699
724
|
ReadyForQueries(WorkflowActivation),
|
|
725
|
+
#[display(fmt = "Autocomplete(run_id={run_id})")]
|
|
700
726
|
Autocomplete {
|
|
701
727
|
run_id: String,
|
|
702
728
|
},
|
|
729
|
+
#[display(fmt = "AutoFail(run_id={run_id})")]
|
|
730
|
+
AutoFail {
|
|
731
|
+
run_id: String,
|
|
732
|
+
machines_err: WFMachinesError,
|
|
733
|
+
},
|
|
703
734
|
}
|
|
704
735
|
impl ActivationOrAuto {
|
|
705
736
|
pub fn run_id(&self) -> &str {
|
|
@@ -707,15 +738,53 @@ impl ActivationOrAuto {
|
|
|
707
738
|
ActivationOrAuto::LangActivation(act) => &act.run_id,
|
|
708
739
|
ActivationOrAuto::Autocomplete { run_id, .. } => run_id,
|
|
709
740
|
ActivationOrAuto::ReadyForQueries(act) => &act.run_id,
|
|
741
|
+
ActivationOrAuto::AutoFail { run_id, .. } => run_id,
|
|
710
742
|
}
|
|
711
743
|
}
|
|
712
744
|
}
|
|
713
745
|
|
|
746
|
+
/// A processed WFT which has been validated and had a history update extracted from it
|
|
714
747
|
#[derive(derive_more::DebugCustom)]
|
|
715
|
-
#[
|
|
748
|
+
#[cfg_attr(
|
|
749
|
+
feature = "save_wf_inputs",
|
|
750
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
751
|
+
)]
|
|
752
|
+
#[debug(fmt = "PermittedWft({work:?})")]
|
|
716
753
|
pub(crate) struct PermittedWFT {
|
|
717
|
-
|
|
718
|
-
|
|
754
|
+
work: PreparedWFT,
|
|
755
|
+
#[cfg_attr(
|
|
756
|
+
feature = "save_wf_inputs",
|
|
757
|
+
serde(skip, default = "UsedMeteredSemPermit::fake_deserialized")
|
|
758
|
+
)]
|
|
759
|
+
permit: UsedMeteredSemPermit,
|
|
760
|
+
#[cfg_attr(
|
|
761
|
+
feature = "save_wf_inputs",
|
|
762
|
+
serde(skip, default = "HistoryPaginator::fake_deserialized")
|
|
763
|
+
)]
|
|
764
|
+
paginator: HistoryPaginator,
|
|
765
|
+
}
|
|
766
|
+
#[derive(Debug)]
|
|
767
|
+
#[cfg_attr(
|
|
768
|
+
feature = "save_wf_inputs",
|
|
769
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
770
|
+
)]
|
|
771
|
+
struct PreparedWFT {
|
|
772
|
+
task_token: TaskToken,
|
|
773
|
+
attempt: u32,
|
|
774
|
+
execution: WorkflowExecution,
|
|
775
|
+
workflow_type: String,
|
|
776
|
+
legacy_query: Option<WorkflowQuery>,
|
|
777
|
+
query_requests: Vec<QueryWorkflow>,
|
|
778
|
+
update: HistoryUpdate,
|
|
779
|
+
}
|
|
780
|
+
impl PreparedWFT {
|
|
781
|
+
/// Returns true if the contained history update is incremental (IE: expects to hit a cached
|
|
782
|
+
/// workflow)
|
|
783
|
+
pub fn is_incremental(&self) -> bool {
|
|
784
|
+
let start_event_id = self.update.first_event_id();
|
|
785
|
+
let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
|
|
786
|
+
poll_resp_is_incremental || start_event_id.is_none()
|
|
787
|
+
}
|
|
719
788
|
}
|
|
720
789
|
|
|
721
790
|
#[derive(Debug)]
|
|
@@ -727,7 +796,7 @@ pub(crate) struct OutstandingTask {
|
|
|
727
796
|
pub start_time: Instant,
|
|
728
797
|
/// The WFT permit owned by this task, ensures we don't exceed max concurrent WFT, and makes
|
|
729
798
|
/// sure the permit is automatically freed when we delete the task.
|
|
730
|
-
pub permit:
|
|
799
|
+
pub permit: UsedMeteredSemPermit,
|
|
731
800
|
}
|
|
732
801
|
|
|
733
802
|
impl OutstandingTask {
|
|
@@ -806,47 +875,82 @@ pub(crate) enum ActivationAction {
|
|
|
806
875
|
commands: Vec<ProtoCommand>,
|
|
807
876
|
query_responses: Vec<QueryResult>,
|
|
808
877
|
force_new_wft: bool,
|
|
878
|
+
sdk_metadata: WorkflowTaskCompletedMetadata,
|
|
809
879
|
},
|
|
810
880
|
/// We should respond to a legacy query request
|
|
811
881
|
RespondLegacyQuery { result: Box<QueryResult> },
|
|
812
882
|
}
|
|
813
883
|
|
|
814
|
-
#[derive(Debug
|
|
815
|
-
|
|
816
|
-
EvictionRequested(Option<u32
|
|
884
|
+
#[derive(Debug)]
|
|
885
|
+
enum EvictionRequestResult {
|
|
886
|
+
EvictionRequested(Option<u32>, RunUpdateAct),
|
|
817
887
|
NotFound,
|
|
818
888
|
EvictionAlreadyRequested(Option<u32>),
|
|
819
889
|
}
|
|
890
|
+
impl EvictionRequestResult {
|
|
891
|
+
fn into_run_update_resp(self) -> RunUpdateAct {
|
|
892
|
+
match self {
|
|
893
|
+
EvictionRequestResult::EvictionRequested(_, resp) => resp,
|
|
894
|
+
EvictionRequestResult::NotFound
|
|
895
|
+
| EvictionRequestResult::EvictionAlreadyRequested(_) => None,
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
}
|
|
820
899
|
|
|
821
900
|
#[derive(Debug)]
|
|
822
901
|
#[allow(dead_code)] // Not always used in non-test
|
|
823
902
|
pub(crate) struct WorkflowStateInfo {
|
|
824
903
|
pub cached_workflows: usize,
|
|
825
904
|
pub outstanding_wft: usize,
|
|
826
|
-
pub available_wft_permits: usize,
|
|
827
905
|
}
|
|
828
906
|
|
|
829
907
|
#[derive(Debug)]
|
|
908
|
+
#[cfg_attr(
|
|
909
|
+
feature = "save_wf_inputs",
|
|
910
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
911
|
+
)]
|
|
830
912
|
struct WFActCompleteMsg {
|
|
831
913
|
completion: ValidatedCompletion,
|
|
832
|
-
|
|
914
|
+
#[cfg_attr(feature = "save_wf_inputs", serde(skip))]
|
|
915
|
+
response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
833
916
|
}
|
|
834
917
|
#[derive(Debug)]
|
|
918
|
+
#[cfg_attr(
|
|
919
|
+
feature = "save_wf_inputs",
|
|
920
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
921
|
+
)]
|
|
835
922
|
struct LocalResolutionMsg {
|
|
836
923
|
run_id: String,
|
|
837
924
|
res: LocalResolution,
|
|
838
925
|
}
|
|
839
926
|
#[derive(Debug)]
|
|
927
|
+
#[cfg_attr(
|
|
928
|
+
feature = "save_wf_inputs",
|
|
929
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
930
|
+
)]
|
|
840
931
|
struct PostActivationMsg {
|
|
841
932
|
run_id: String,
|
|
842
|
-
|
|
843
|
-
wft_from_complete: Option<
|
|
933
|
+
wft_report_status: WFTReportStatus,
|
|
934
|
+
wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
|
|
844
935
|
}
|
|
845
936
|
#[derive(Debug, Clone)]
|
|
937
|
+
#[cfg_attr(
|
|
938
|
+
feature = "save_wf_inputs",
|
|
939
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
940
|
+
)]
|
|
846
941
|
struct RequestEvictMsg {
|
|
847
942
|
run_id: String,
|
|
848
943
|
message: String,
|
|
849
944
|
reason: EvictionReason,
|
|
945
|
+
/// If set, we requested eviction because something went wrong processing a brand new poll task,
|
|
946
|
+
/// which means we won't have stored the WFT and we need to track the task token separately so
|
|
947
|
+
/// we can reply with a failure to server after the evict goes through.
|
|
948
|
+
auto_reply_fail_tt: Option<TaskToken>,
|
|
949
|
+
}
|
|
950
|
+
#[derive(Debug)]
|
|
951
|
+
pub(crate) struct HeartbeatTimeoutMsg {
|
|
952
|
+
pub(crate) run_id: String,
|
|
953
|
+
pub(crate) span: Span,
|
|
850
954
|
}
|
|
851
955
|
#[derive(Debug)]
|
|
852
956
|
struct GetStateInfoMsg {
|
|
@@ -857,6 +961,7 @@ struct GetStateInfoMsg {
|
|
|
857
961
|
#[derive(Debug)]
|
|
858
962
|
struct ActivationCompleteResult {
|
|
859
963
|
most_recently_processed_event: usize,
|
|
964
|
+
replaying: bool,
|
|
860
965
|
outcome: ActivationCompleteOutcome,
|
|
861
966
|
}
|
|
862
967
|
/// What needs to be done after calling [Workflows::activation_completed]
|
|
@@ -869,16 +974,24 @@ enum ActivationCompleteOutcome {
|
|
|
869
974
|
ReportWFTFail(FailedActivationWFTReport),
|
|
870
975
|
/// There's nothing to do right now. EX: The workflow needs to keep replaying.
|
|
871
976
|
DoNothing,
|
|
977
|
+
/// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
|
|
978
|
+
/// in a row.
|
|
979
|
+
WFTFailedDontReport,
|
|
872
980
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
981
|
+
/// Did we report, or not, completion of a WFT to server?
|
|
982
|
+
#[derive(Debug, Copy, Clone)]
|
|
983
|
+
#[cfg_attr(
|
|
984
|
+
feature = "save_wf_inputs",
|
|
985
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
986
|
+
)]
|
|
987
|
+
enum WFTReportStatus {
|
|
988
|
+
Reported,
|
|
989
|
+
/// The WFT completion was not reported when finishing the activation, because there's still
|
|
990
|
+
/// work to be done. EX: Running LAs.
|
|
991
|
+
NotReported,
|
|
992
|
+
/// We didn't report, but we want to clear the outstanding workflow task anyway. See
|
|
993
|
+
/// [ActivationCompleteOutcome::WFTFailedDontReport]
|
|
994
|
+
DropWft,
|
|
882
995
|
}
|
|
883
996
|
|
|
884
997
|
fn validate_completion(
|
|
@@ -887,7 +1000,7 @@ fn validate_completion(
|
|
|
887
1000
|
match completion.status {
|
|
888
1001
|
Some(workflow_activation_completion::Status::Successful(success)) => {
|
|
889
1002
|
// Convert to wf commands
|
|
890
|
-
let commands = success
|
|
1003
|
+
let mut commands = success
|
|
891
1004
|
.commands
|
|
892
1005
|
.into_iter()
|
|
893
1006
|
.map(|c| c.try_into())
|
|
@@ -908,16 +1021,26 @@ fn validate_completion(
|
|
|
908
1021
|
reason: format!(
|
|
909
1022
|
"Workflow completion had a legacy query response along with other \
|
|
910
1023
|
commands. This is not allowed and constitutes an error in the \
|
|
911
|
-
lang SDK. Commands: {:?}"
|
|
912
|
-
commands
|
|
1024
|
+
lang SDK. Commands: {commands:?}"
|
|
913
1025
|
),
|
|
914
1026
|
run_id: completion.run_id,
|
|
915
1027
|
});
|
|
916
1028
|
}
|
|
917
1029
|
|
|
1030
|
+
// Any non-query-response commands after a terminal command should be ignored
|
|
1031
|
+
if let Some(term_cmd_pos) = commands.iter().position(|c| c.is_terminal()) {
|
|
1032
|
+
// Query responses are just fine, so keep them.
|
|
1033
|
+
let queries = commands
|
|
1034
|
+
.split_off(term_cmd_pos + 1)
|
|
1035
|
+
.into_iter()
|
|
1036
|
+
.filter(|c| matches!(c, WFCommand::QueryResponse(_)));
|
|
1037
|
+
commands.extend(queries);
|
|
1038
|
+
}
|
|
1039
|
+
|
|
918
1040
|
Ok(ValidatedCompletion::Success {
|
|
919
1041
|
run_id: completion.run_id,
|
|
920
1042
|
commands,
|
|
1043
|
+
used_flags: success.used_internal_flags,
|
|
921
1044
|
})
|
|
922
1045
|
}
|
|
923
1046
|
Some(workflow_activation_completion::Status::Failed(failure)) => {
|
|
@@ -934,11 +1057,16 @@ fn validate_completion(
|
|
|
934
1057
|
}
|
|
935
1058
|
|
|
936
1059
|
#[derive(Debug)]
|
|
1060
|
+
#[cfg_attr(
|
|
1061
|
+
feature = "save_wf_inputs",
|
|
1062
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1063
|
+
)]
|
|
937
1064
|
#[allow(clippy::large_enum_variant)]
|
|
938
1065
|
enum ValidatedCompletion {
|
|
939
1066
|
Success {
|
|
940
1067
|
run_id: String,
|
|
941
1068
|
commands: Vec<WFCommand>,
|
|
1069
|
+
used_flags: Vec<u32>,
|
|
942
1070
|
},
|
|
943
1071
|
Fail {
|
|
944
1072
|
run_id: String,
|
|
@@ -955,112 +1083,6 @@ impl ValidatedCompletion {
|
|
|
955
1083
|
}
|
|
956
1084
|
}
|
|
957
1085
|
|
|
958
|
-
/// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
|
|
959
|
-
#[derive(Debug)]
|
|
960
|
-
struct RunAction {
|
|
961
|
-
action: RunActions,
|
|
962
|
-
trace_span: Span,
|
|
963
|
-
}
|
|
964
|
-
#[derive(Debug)]
|
|
965
|
-
#[allow(clippy::large_enum_variant)]
|
|
966
|
-
enum RunActions {
|
|
967
|
-
NewIncomingWFT(NewIncomingWFT),
|
|
968
|
-
ActivationCompletion(RunActivationCompletion),
|
|
969
|
-
CheckMoreWork {
|
|
970
|
-
want_to_evict: Option<RequestEvictMsg>,
|
|
971
|
-
has_pending_queries: bool,
|
|
972
|
-
has_wft: bool,
|
|
973
|
-
},
|
|
974
|
-
LocalResolution(LocalResolution),
|
|
975
|
-
HeartbeatTimeout,
|
|
976
|
-
}
|
|
977
|
-
#[derive(Debug)]
|
|
978
|
-
struct NewIncomingWFT {
|
|
979
|
-
/// This field is only populated if the machines already exist. Otherwise the machines
|
|
980
|
-
/// are instantiated with the workflow history.
|
|
981
|
-
history_update: Option<HistoryUpdate>,
|
|
982
|
-
/// Wft start time
|
|
983
|
-
start_time: Instant,
|
|
984
|
-
}
|
|
985
|
-
#[derive(Debug)]
|
|
986
|
-
struct RunActivationCompletion {
|
|
987
|
-
task_token: TaskToken,
|
|
988
|
-
start_time: Instant,
|
|
989
|
-
commands: Vec<WFCommand>,
|
|
990
|
-
activation_was_eviction: bool,
|
|
991
|
-
activation_was_only_eviction: bool,
|
|
992
|
-
has_pending_query: bool,
|
|
993
|
-
query_responses: Vec<QueryResult>,
|
|
994
|
-
/// Used to notify the worker when the completion is done processing and the completion can
|
|
995
|
-
/// unblock. Must always be `Some` when initialized.
|
|
996
|
-
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
/// A response from a [ManagedRun] held by a [ManagedRunHandle]
|
|
1000
|
-
#[derive(Debug)]
|
|
1001
|
-
struct RunUpdateResponse {
|
|
1002
|
-
kind: RunUpdateResponseKind,
|
|
1003
|
-
span: Span,
|
|
1004
|
-
}
|
|
1005
|
-
#[derive(Debug, derive_more::Display)]
|
|
1006
|
-
#[allow(clippy::large_enum_variant)]
|
|
1007
|
-
enum RunUpdateResponseKind {
|
|
1008
|
-
Good(GoodRunUpdate),
|
|
1009
|
-
Fail(FailRunUpdate),
|
|
1010
|
-
}
|
|
1011
|
-
impl RunUpdateResponseKind {
|
|
1012
|
-
pub(crate) fn run_id(&self) -> &str {
|
|
1013
|
-
match self {
|
|
1014
|
-
RunUpdateResponseKind::Good(g) => &g.run_id,
|
|
1015
|
-
RunUpdateResponseKind::Fail(f) => &f.run_id,
|
|
1016
|
-
}
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
#[derive(Debug)]
|
|
1021
|
-
struct GoodRunUpdate {
|
|
1022
|
-
run_id: String,
|
|
1023
|
-
outgoing_activation: Option<ActivationOrAuto>,
|
|
1024
|
-
fulfillable_complete: Option<FulfillableActivationComplete>,
|
|
1025
|
-
have_seen_terminal_event: bool,
|
|
1026
|
-
/// Is true if there are more jobs that need to be sent to lang
|
|
1027
|
-
more_pending_work: bool,
|
|
1028
|
-
most_recently_processed_event_number: usize,
|
|
1029
|
-
/// Is true if this update was in response to a new WFT
|
|
1030
|
-
in_response_to_wft: bool,
|
|
1031
|
-
}
|
|
1032
|
-
impl Display for GoodRunUpdate {
|
|
1033
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1034
|
-
write!(
|
|
1035
|
-
f,
|
|
1036
|
-
"GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
|
|
1037
|
-
self.run_id,
|
|
1038
|
-
if let Some(og) = self.outgoing_activation.as_ref() {
|
|
1039
|
-
format!("{}", og)
|
|
1040
|
-
} else {
|
|
1041
|
-
"None".to_string()
|
|
1042
|
-
},
|
|
1043
|
-
self.more_pending_work
|
|
1044
|
-
)
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
#[derive(Debug)]
|
|
1048
|
-
pub(crate) struct FailRunUpdate {
|
|
1049
|
-
run_id: String,
|
|
1050
|
-
err: WFMachinesError,
|
|
1051
|
-
/// This is populated if the run update failed while processing a completion - and thus we
|
|
1052
|
-
/// must respond down it when handling the failure.
|
|
1053
|
-
completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1054
|
-
}
|
|
1055
|
-
impl Display for FailRunUpdate {
|
|
1056
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1057
|
-
write!(
|
|
1058
|
-
f,
|
|
1059
|
-
"FailRunUpdate(run_id: {}, error: {:?})",
|
|
1060
|
-
self.run_id, self.err
|
|
1061
|
-
)
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
1086
|
#[derive(Debug)]
|
|
1065
1087
|
pub struct OutgoingServerCommands {
|
|
1066
1088
|
pub commands: Vec<ProtoCommand>,
|
|
@@ -1068,9 +1090,22 @@ pub struct OutgoingServerCommands {
|
|
|
1068
1090
|
}
|
|
1069
1091
|
|
|
1070
1092
|
#[derive(Debug)]
|
|
1093
|
+
#[cfg_attr(
|
|
1094
|
+
feature = "save_wf_inputs",
|
|
1095
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1096
|
+
)]
|
|
1071
1097
|
pub(crate) enum LocalResolution {
|
|
1072
1098
|
LocalActivity(LocalActivityResolution),
|
|
1073
1099
|
}
|
|
1100
|
+
impl LocalResolution {
|
|
1101
|
+
pub fn is_la_cancel_confirmation(&self) -> bool {
|
|
1102
|
+
match self {
|
|
1103
|
+
LocalResolution::LocalActivity(lar) => {
|
|
1104
|
+
matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1074
1109
|
|
|
1075
1110
|
#[derive(thiserror::Error, Debug, derive_more::From)]
|
|
1076
1111
|
#[error("Lang provided workflow command with empty variant")]
|
|
@@ -1079,6 +1114,10 @@ pub struct EmptyWorkflowCommandErr;
|
|
|
1079
1114
|
/// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
|
|
1080
1115
|
/// EX: Create a new timer, complete the workflow, etc.
|
|
1081
1116
|
#[derive(Debug, derive_more::From, derive_more::Display)]
|
|
1117
|
+
#[cfg_attr(
|
|
1118
|
+
feature = "save_wf_inputs",
|
|
1119
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1120
|
+
)]
|
|
1082
1121
|
#[allow(clippy::large_enum_variant)]
|
|
1083
1122
|
pub enum WFCommand {
|
|
1084
1123
|
/// Returned when we need to wait for the lang sdk to send us something
|
|
@@ -1150,6 +1189,23 @@ impl TryFrom<WorkflowCommand> for WFCommand {
|
|
|
1150
1189
|
}
|
|
1151
1190
|
}
|
|
1152
1191
|
|
|
1192
|
+
impl WFCommand {
|
|
1193
|
+
/// Returns true if the command is one which ends the workflow:
|
|
1194
|
+
/// * Completed
|
|
1195
|
+
/// * Failed
|
|
1196
|
+
/// * Cancelled
|
|
1197
|
+
/// * Continue-as-new
|
|
1198
|
+
pub fn is_terminal(&self) -> bool {
|
|
1199
|
+
matches!(
|
|
1200
|
+
self,
|
|
1201
|
+
WFCommand::CompleteWorkflow(_)
|
|
1202
|
+
| WFCommand::FailWorkflow(_)
|
|
1203
|
+
| WFCommand::CancelWorkflow(_)
|
|
1204
|
+
| WFCommand::ContinueAsNew(_)
|
|
1205
|
+
)
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1153
1209
|
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
|
1154
1210
|
enum CommandID {
|
|
1155
1211
|
Timer(u32),
|
|
@@ -1171,12 +1227,9 @@ pub struct WorkflowStartedInfo {
|
|
|
1171
1227
|
retry_policy: Option<RetryPolicy>,
|
|
1172
1228
|
}
|
|
1173
1229
|
|
|
1174
|
-
type LocalActivityRequestSink =
|
|
1175
|
-
Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
|
|
1176
|
-
|
|
1177
1230
|
/// Wraps outgoing activation job protos with some internal details core might care about
|
|
1178
1231
|
#[derive(Debug, derive_more::Display)]
|
|
1179
|
-
#[display(fmt = "{}"
|
|
1232
|
+
#[display(fmt = "{variant}")]
|
|
1180
1233
|
struct OutgoingJob {
|
|
1181
1234
|
variant: workflow_activation_job::Variant,
|
|
1182
1235
|
/// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
|
|
@@ -1198,3 +1251,158 @@ impl From<OutgoingJob> for WorkflowActivationJob {
|
|
|
1198
1251
|
}
|
|
1199
1252
|
}
|
|
1200
1253
|
}
|
|
1254
|
+
|
|
1255
|
+
/// Errors thrown inside of workflow machines
|
|
1256
|
+
#[derive(thiserror::Error, Debug)]
|
|
1257
|
+
pub(crate) enum WFMachinesError {
|
|
1258
|
+
#[error("Nondeterminism error: {0}")]
|
|
1259
|
+
Nondeterminism(String),
|
|
1260
|
+
#[error("Fatal error in workflow machines: {0}")]
|
|
1261
|
+
Fatal(String),
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
impl WFMachinesError {
|
|
1265
|
+
pub fn evict_reason(&self) -> EvictionReason {
|
|
1266
|
+
match self {
|
|
1267
|
+
WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
|
|
1268
|
+
WFMachinesError::Fatal(_) => EvictionReason::Fatal,
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
impl From<TimestampError> for WFMachinesError {
|
|
1274
|
+
fn from(_: TimestampError) -> Self {
|
|
1275
|
+
Self::Fatal("Could not decode timestamp".to_string())
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
impl From<anyhow::Error> for WFMachinesError {
|
|
1280
|
+
fn from(value: anyhow::Error) -> Self {
|
|
1281
|
+
WFMachinesError::Fatal(value.to_string())
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
fn auto_fail_to_complete_status(err: WFMachinesError) -> workflow_activation_completion::Status {
|
|
1286
|
+
workflow_activation_completion::Status::Failed(Failure {
|
|
1287
|
+
failure: Some(
|
|
1288
|
+
temporal_sdk_core_protos::temporal::api::failure::v1::Failure {
|
|
1289
|
+
message: "Error while processing workflow task".to_string(),
|
|
1290
|
+
source: err.to_string(),
|
|
1291
|
+
stack_trace: "".to_string(),
|
|
1292
|
+
encoded_attributes: None,
|
|
1293
|
+
cause: None,
|
|
1294
|
+
failure_info: None,
|
|
1295
|
+
},
|
|
1296
|
+
),
|
|
1297
|
+
force_cause: WorkflowTaskFailedCause::from(err.evict_reason()) as i32,
|
|
1298
|
+
})
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
|
|
1302
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
#[derive(derive_more::Constructor)]
|
|
1306
|
+
pub(super) struct LAReqSink {
|
|
1307
|
+
lam: Arc<LocalActivityManager>,
|
|
1308
|
+
/// If we're recording WF inputs, we also need to store immediate resolutions so they're
|
|
1309
|
+
/// available on replay.
|
|
1310
|
+
#[allow(dead_code)] // sometimes appears unused due to feature flagging
|
|
1311
|
+
recorder: Option<UnboundedSender<Vec<u8>>>,
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
impl LocalActivityRequestSink for LAReqSink {
|
|
1315
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
|
|
1316
|
+
if reqs.is_empty() {
|
|
1317
|
+
return vec![];
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
#[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
|
|
1321
|
+
let res = self.lam.enqueue(reqs);
|
|
1322
|
+
|
|
1323
|
+
// We always save when there are any reqs, even if the response might be empty, so that
|
|
1324
|
+
// calls/responses are 1:1
|
|
1325
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
1326
|
+
self.write_req(&res);
|
|
1327
|
+
|
|
1328
|
+
res
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
/// Sorts jobs in an activation to be in the order lang expects:
|
|
1333
|
+
/// `patches -> signals -> other -> queries`
|
|
1334
|
+
fn sort_act_jobs(wfa: &mut WorkflowActivation) {
|
|
1335
|
+
wfa.jobs.sort_by(|j1, j2| {
|
|
1336
|
+
// Unwrapping is fine here since we'll never issue empty variants
|
|
1337
|
+
let j1v = j1.variant.as_ref().unwrap();
|
|
1338
|
+
let j2v = j2.variant.as_ref().unwrap();
|
|
1339
|
+
if discriminant(j1v) == discriminant(j2v) {
|
|
1340
|
+
return Ordering::Equal;
|
|
1341
|
+
}
|
|
1342
|
+
fn variant_ordinal(v: &workflow_activation_job::Variant) -> u8 {
|
|
1343
|
+
match v {
|
|
1344
|
+
workflow_activation_job::Variant::NotifyHasPatch(_) => 1,
|
|
1345
|
+
workflow_activation_job::Variant::SignalWorkflow(_) => 2,
|
|
1346
|
+
workflow_activation_job::Variant::QueryWorkflow(_) => 4,
|
|
1347
|
+
_ => 3,
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
variant_ordinal(j1v).cmp(&variant_ordinal(j2v))
|
|
1351
|
+
})
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
#[cfg(test)]
|
|
1355
|
+
mod tests {
|
|
1356
|
+
use super::*;
|
|
1357
|
+
use itertools::Itertools;
|
|
1358
|
+
|
|
1359
|
+
#[test]
|
|
1360
|
+
fn jobs_sort() {
|
|
1361
|
+
let mut act = WorkflowActivation {
|
|
1362
|
+
jobs: vec![
|
|
1363
|
+
WorkflowActivationJob {
|
|
1364
|
+
variant: Some(workflow_activation_job::Variant::SignalWorkflow(
|
|
1365
|
+
Default::default(),
|
|
1366
|
+
)),
|
|
1367
|
+
},
|
|
1368
|
+
WorkflowActivationJob {
|
|
1369
|
+
variant: Some(workflow_activation_job::Variant::NotifyHasPatch(
|
|
1370
|
+
Default::default(),
|
|
1371
|
+
)),
|
|
1372
|
+
},
|
|
1373
|
+
WorkflowActivationJob {
|
|
1374
|
+
variant: Some(workflow_activation_job::Variant::QueryWorkflow(
|
|
1375
|
+
Default::default(),
|
|
1376
|
+
)),
|
|
1377
|
+
},
|
|
1378
|
+
WorkflowActivationJob {
|
|
1379
|
+
variant: Some(workflow_activation_job::Variant::FireTimer(
|
|
1380
|
+
Default::default(),
|
|
1381
|
+
)),
|
|
1382
|
+
},
|
|
1383
|
+
WorkflowActivationJob {
|
|
1384
|
+
variant: Some(workflow_activation_job::Variant::ResolveActivity(
|
|
1385
|
+
Default::default(),
|
|
1386
|
+
)),
|
|
1387
|
+
},
|
|
1388
|
+
],
|
|
1389
|
+
..Default::default()
|
|
1390
|
+
};
|
|
1391
|
+
sort_act_jobs(&mut act);
|
|
1392
|
+
let variants = act
|
|
1393
|
+
.jobs
|
|
1394
|
+
.into_iter()
|
|
1395
|
+
.map(|j| j.variant.unwrap())
|
|
1396
|
+
.collect_vec();
|
|
1397
|
+
assert_matches!(
|
|
1398
|
+
variants.as_slice(),
|
|
1399
|
+
&[
|
|
1400
|
+
workflow_activation_job::Variant::NotifyHasPatch(_),
|
|
1401
|
+
workflow_activation_job::Variant::SignalWorkflow(_),
|
|
1402
|
+
workflow_activation_job::Variant::FireTimer(_),
|
|
1403
|
+
workflow_activation_job::Variant::ResolveActivity(_),
|
|
1404
|
+
workflow_activation_job::Variant::QueryWorkflow(_)
|
|
1405
|
+
]
|
|
1406
|
+
)
|
|
1407
|
+
}
|
|
1408
|
+
}
|