temporalio 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -23
- data/bridge/Cargo.lock +168 -59
- data/bridge/Cargo.toml +4 -2
- data/bridge/sdk-core/README.md +19 -6
- data/bridge/sdk-core/client/src/lib.rs +215 -39
- data/bridge/sdk-core/client/src/metrics.rs +17 -8
- data/bridge/sdk-core/client/src/raw.rs +4 -4
- data/bridge/sdk-core/client/src/retry.rs +32 -20
- data/bridge/sdk-core/core/Cargo.toml +22 -9
- data/bridge/sdk-core/core/src/abstractions.rs +203 -14
- data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +76 -41
- data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
- data/bridge/sdk-core/core/src/core_tests/local_activities.rs +204 -83
- data/bridge/sdk-core/core/src/core_tests/queries.rs +3 -4
- data/bridge/sdk-core/core/src/core_tests/workers.rs +1 -3
- data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +397 -54
- data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
- data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
- data/bridge/sdk-core/core/src/lib.rs +16 -9
- data/bridge/sdk-core/core/src/telemetry/log_export.rs +1 -1
- data/bridge/sdk-core/core/src/telemetry/metrics.rs +69 -35
- data/bridge/sdk-core/core/src/telemetry/mod.rs +29 -13
- data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +17 -12
- data/bridge/sdk-core/core/src/test_help/mod.rs +62 -12
- data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
- data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +352 -122
- data/bridge/sdk-core/core/src/worker/activities.rs +233 -157
- data/bridge/sdk-core/core/src/worker/client/mocks.rs +22 -2
- data/bridge/sdk-core/core/src/worker/client.rs +18 -2
- data/bridge/sdk-core/core/src/worker/mod.rs +165 -58
- data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +856 -277
- data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +100 -43
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +87 -27
- data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +137 -62
- data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +25 -17
- data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +7 -6
- data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +103 -152
- data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +5 -16
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +201 -121
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +11 -14
- data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +30 -15
- data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1026 -376
- data/bridge/sdk-core/core/src/worker/workflow/mod.rs +460 -384
- data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +448 -718
- data/bridge/sdk-core/core-api/Cargo.toml +2 -1
- data/bridge/sdk-core/core-api/src/errors.rs +1 -34
- data/bridge/sdk-core/core-api/src/lib.rs +6 -2
- data/bridge/sdk-core/core-api/src/telemetry.rs +0 -6
- data/bridge/sdk-core/core-api/src/worker.rs +14 -1
- data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
- data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
- data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
- data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
- data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +7 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
- data/bridge/sdk-core/sdk/Cargo.toml +3 -2
- data/bridge/sdk-core/sdk/src/lib.rs +87 -20
- data/bridge/sdk-core/sdk/src/workflow_future.rs +9 -8
- data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- data/bridge/sdk-core/sdk-core-protos/build.rs +36 -1
- data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +100 -87
- data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +5 -1
- data/bridge/sdk-core/sdk-core-protos/src/lib.rs +175 -57
- data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- data/bridge/sdk-core/test-utils/Cargo.toml +3 -1
- data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
- data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
- data/bridge/sdk-core/test-utils/src/lib.rs +82 -23
- data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
- data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
- data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
- data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +72 -191
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
- data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +10 -11
- data/bridge/sdk-core/tests/main.rs +3 -13
- data/bridge/sdk-core/tests/runner.rs +75 -36
- data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
- data/bridge/src/connection.rs +41 -25
- data/bridge/src/lib.rs +269 -14
- data/bridge/src/runtime.rs +1 -1
- data/bridge/src/test_server.rs +153 -0
- data/bridge/src/worker.rs +89 -16
- data/lib/gen/temporal/api/command/v1/message_pb.rb +4 -18
- data/lib/gen/temporal/api/common/v1/message_pb.rb +4 -0
- data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +1 -3
- data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +3 -3
- data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +2 -0
- data/lib/gen/temporal/api/enums/v1/update_pb.rb +6 -4
- data/lib/gen/temporal/api/history/v1/message_pb.rb +27 -19
- data/lib/gen/temporal/api/namespace/v1/message_pb.rb +1 -0
- data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +3 -0
- data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
- data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
- data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
- data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
- data/lib/gen/temporal/api/update/v1/message_pb.rb +72 -0
- data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +26 -16
- data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
- data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
- data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +27 -21
- data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +28 -24
- data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
- data/lib/temporalio/activity/context.rb +13 -8
- data/lib/temporalio/activity/info.rb +1 -1
- data/lib/temporalio/bridge/connect_options.rb +15 -0
- data/lib/temporalio/bridge/retry_config.rb +24 -0
- data/lib/temporalio/bridge/tls_options.rb +19 -0
- data/lib/temporalio/client/implementation.rb +8 -8
- data/lib/temporalio/connection/retry_config.rb +44 -0
- data/lib/temporalio/connection/service.rb +20 -0
- data/lib/temporalio/connection/test_service.rb +92 -0
- data/lib/temporalio/connection/tls_options.rb +51 -0
- data/lib/temporalio/connection/workflow_service.rb +731 -0
- data/lib/temporalio/connection.rb +55 -720
- data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
- data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
- data/lib/temporalio/interceptor/chain.rb +5 -5
- data/lib/temporalio/interceptor/client.rb +8 -4
- data/lib/temporalio/interceptor.rb +22 -0
- data/lib/temporalio/retry_policy.rb +13 -3
- data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
- data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
- data/lib/temporalio/testing/workflow_environment.rb +112 -0
- data/lib/temporalio/testing.rb +175 -0
- data/lib/temporalio/version.rb +1 -1
- data/lib/temporalio/worker/activity_runner.rb +26 -4
- data/lib/temporalio/worker/activity_worker.rb +44 -18
- data/lib/temporalio/worker/sync_worker.rb +47 -11
- data/lib/temporalio/worker.rb +27 -21
- data/lib/temporalio/workflow/async.rb +46 -0
- data/lib/temporalio/workflow/future.rb +138 -0
- data/lib/temporalio/workflow/info.rb +76 -0
- data/temporalio.gemspec +4 -3
- metadata +67 -17
- data/bridge/sdk-core/Cargo.lock +0 -2606
- data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
- data/lib/bridge.so +0 -0
- data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +0 -25
- data/lib/gen/temporal/api/interaction/v1/message_pb.rb +0 -49
- data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
|
@@ -8,40 +8,58 @@ mod history_update;
|
|
|
8
8
|
mod machines;
|
|
9
9
|
mod managed_run;
|
|
10
10
|
mod run_cache;
|
|
11
|
+
mod wft_extraction;
|
|
11
12
|
pub(crate) mod wft_poller;
|
|
12
13
|
mod workflow_stream;
|
|
13
14
|
|
|
15
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
16
|
+
pub use workflow_stream::replay_wf_state_inputs;
|
|
17
|
+
|
|
14
18
|
pub(crate) use bridge::WorkflowBridge;
|
|
15
19
|
pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
|
|
16
|
-
pub(crate) use history_update::
|
|
17
|
-
pub(crate) use machines::WFMachinesError;
|
|
20
|
+
pub(crate) use history_update::HistoryUpdate;
|
|
18
21
|
#[cfg(test)]
|
|
19
22
|
pub(crate) use managed_run::ManagedWFFunc;
|
|
20
23
|
|
|
24
|
+
use crate::worker::activities::TrackedPermittedTqResp;
|
|
21
25
|
use crate::{
|
|
22
|
-
abstractions::
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
abstractions::{
|
|
27
|
+
stream_when_allowed, MeteredSemaphore, TrackedOwnedMeteredSemPermit, UsedMeteredSemPermit,
|
|
28
|
+
},
|
|
29
|
+
internal_flags::InternalFlags,
|
|
30
|
+
protosext::{legacy_query_failure, ValidPollWFTQResponse},
|
|
31
|
+
telemetry::{
|
|
32
|
+
metrics::workflow_worker_type, set_trace_subscriber_for_current_thread, TelemetryInstance,
|
|
33
|
+
VecDisplayer,
|
|
34
|
+
},
|
|
25
35
|
worker::{
|
|
26
|
-
activities::{ActivitiesFromWFTsHandle,
|
|
36
|
+
activities::{ActivitiesFromWFTsHandle, LocalActivityManager},
|
|
27
37
|
client::{WorkerClient, WorkflowTaskCompletion},
|
|
28
38
|
workflow::{
|
|
29
|
-
|
|
39
|
+
history_update::HistoryPaginator,
|
|
40
|
+
managed_run::RunUpdateAct,
|
|
41
|
+
wft_extraction::{HistoryFetchReq, WFTExtractor},
|
|
30
42
|
wft_poller::validate_wft,
|
|
31
43
|
workflow_stream::{LocalInput, LocalInputs, WFStream},
|
|
32
44
|
},
|
|
33
|
-
LocalActRequest, LocalActivityResolution,
|
|
45
|
+
LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
34
46
|
},
|
|
35
47
|
MetricsContext,
|
|
36
48
|
};
|
|
49
|
+
use anyhow::anyhow;
|
|
37
50
|
use futures::{stream::BoxStream, Stream, StreamExt};
|
|
51
|
+
use futures_util::{future::abortable, stream};
|
|
52
|
+
use prost_types::TimestampError;
|
|
38
53
|
use std::{
|
|
39
|
-
|
|
40
|
-
|
|
54
|
+
cell::RefCell,
|
|
55
|
+
collections::VecDeque,
|
|
56
|
+
fmt::Debug,
|
|
41
57
|
future::Future,
|
|
42
58
|
ops::DerefMut,
|
|
59
|
+
rc::Rc,
|
|
43
60
|
result,
|
|
44
61
|
sync::Arc,
|
|
62
|
+
thread,
|
|
45
63
|
time::{Duration, Instant},
|
|
46
64
|
};
|
|
47
65
|
use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
|
|
@@ -59,36 +77,41 @@ use temporal_sdk_core_protos::{
|
|
|
59
77
|
},
|
|
60
78
|
temporal::api::{
|
|
61
79
|
command::v1::{command::Attributes, Command as ProtoCommand, Command},
|
|
62
|
-
common::v1::{Memo, RetryPolicy, SearchAttributes},
|
|
80
|
+
common::v1::{Memo, MeteringMetadata, RetryPolicy, SearchAttributes, WorkflowExecution},
|
|
63
81
|
enums::v1::WorkflowTaskFailedCause,
|
|
82
|
+
query::v1::WorkflowQuery,
|
|
83
|
+
sdk::v1::WorkflowTaskCompletedMetadata,
|
|
64
84
|
taskqueue::v1::StickyExecutionAttributes,
|
|
65
|
-
workflowservice::v1::PollActivityTaskQueueResponse,
|
|
85
|
+
workflowservice::v1::{get_system_info_response, PollActivityTaskQueueResponse},
|
|
66
86
|
},
|
|
67
87
|
TaskToken,
|
|
68
88
|
};
|
|
69
89
|
use tokio::{
|
|
70
90
|
sync::{
|
|
71
|
-
mpsc::{unbounded_channel, UnboundedSender},
|
|
91
|
+
mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
|
|
72
92
|
oneshot,
|
|
73
93
|
},
|
|
74
|
-
task,
|
|
75
|
-
task::{JoinError, JoinHandle},
|
|
94
|
+
task::{spawn_blocking, LocalSet},
|
|
76
95
|
};
|
|
77
96
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
78
97
|
use tokio_util::sync::CancellationToken;
|
|
79
98
|
use tracing::Span;
|
|
80
99
|
|
|
81
100
|
pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
|
|
101
|
+
/// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
|
|
102
|
+
/// necessary.
|
|
103
|
+
const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
|
|
82
104
|
const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
|
|
83
105
|
|
|
84
106
|
type Result<T, E = WFMachinesError> = result::Result<T, E>;
|
|
85
107
|
type BoxedActivationStream = BoxStream<'static, Result<ActivationOrAuto, PollWfError>>;
|
|
108
|
+
type InternalFlagsRef = Rc<RefCell<InternalFlags>>;
|
|
86
109
|
|
|
87
110
|
/// Centralizes all state related to workflows and workflow tasks
|
|
88
111
|
pub(crate) struct Workflows {
|
|
89
112
|
task_queue: String,
|
|
90
113
|
local_tx: UnboundedSender<LocalInput>,
|
|
91
|
-
processing_task: tokio::sync::Mutex<Option<JoinHandle<()>>>,
|
|
114
|
+
processing_task: tokio::sync::Mutex<Option<thread::JoinHandle<()>>>,
|
|
92
115
|
activation_stream: tokio::sync::Mutex<(
|
|
93
116
|
BoxedActivationStream,
|
|
94
117
|
// Used to indicate polling may begin
|
|
@@ -100,9 +123,12 @@ pub(crate) struct Workflows {
|
|
|
100
123
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
101
124
|
/// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
|
|
102
125
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
126
|
+
/// Ensures we stay at or below this worker's maximum concurrent workflow task limit
|
|
127
|
+
wft_semaphore: MeteredSemaphore,
|
|
128
|
+
local_act_mgr: Arc<LocalActivityManager>,
|
|
103
129
|
}
|
|
104
130
|
|
|
105
|
-
pub(
|
|
131
|
+
pub(crate) struct WorkflowBasics {
|
|
106
132
|
pub max_cached_workflows: usize,
|
|
107
133
|
pub max_outstanding_wfts: usize,
|
|
108
134
|
pub shutdown_token: CancellationToken,
|
|
@@ -110,53 +136,115 @@ pub(super) struct WorkflowBasics {
|
|
|
110
136
|
pub namespace: String,
|
|
111
137
|
pub task_queue: String,
|
|
112
138
|
pub ignore_evicts_on_shutdown: bool,
|
|
139
|
+
pub fetching_concurrency: usize,
|
|
140
|
+
pub server_capabilities: get_system_info_response::Capabilities,
|
|
141
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
142
|
+
pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
pub(crate) struct RunBasics<'a> {
|
|
146
|
+
pub namespace: String,
|
|
147
|
+
pub workflow_id: String,
|
|
148
|
+
pub workflow_type: String,
|
|
149
|
+
pub run_id: String,
|
|
150
|
+
pub history: HistoryUpdate,
|
|
151
|
+
pub metrics: MetricsContext,
|
|
152
|
+
pub capabilities: &'a get_system_info_response::Capabilities,
|
|
113
153
|
}
|
|
114
154
|
|
|
115
155
|
impl Workflows {
|
|
156
|
+
#[allow(clippy::too_many_arguments)] // Not much worth combining here
|
|
116
157
|
pub(super) fn new(
|
|
117
158
|
basics: WorkflowBasics,
|
|
118
159
|
sticky_attrs: Option<StickyExecutionAttributes>,
|
|
119
160
|
client: Arc<dyn WorkerClient>,
|
|
120
161
|
wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
|
|
121
|
-
local_activity_request_sink: impl
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
+ 'static,
|
|
162
|
+
local_activity_request_sink: impl LocalActivityRequestSink,
|
|
163
|
+
local_act_mgr: Arc<LocalActivityManager>,
|
|
164
|
+
heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
|
|
125
165
|
activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
|
|
166
|
+
telem_instance: Option<&TelemetryInstance>,
|
|
126
167
|
) -> Self {
|
|
127
168
|
let (local_tx, local_rx) = unbounded_channel();
|
|
169
|
+
let (fetch_tx, fetch_rx) = unbounded_channel();
|
|
128
170
|
let shutdown_tok = basics.shutdown_token.clone();
|
|
129
171
|
let task_queue = basics.task_queue.clone();
|
|
130
|
-
let
|
|
131
|
-
basics,
|
|
172
|
+
let wft_semaphore = MeteredSemaphore::new(
|
|
173
|
+
basics.max_outstanding_wfts,
|
|
174
|
+
basics.metrics.with_new_attrs([workflow_worker_type()]),
|
|
175
|
+
MetricsContext::available_task_slots,
|
|
176
|
+
);
|
|
177
|
+
// Only allow polling of the new WFT stream if there are available task slots
|
|
178
|
+
let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
|
|
179
|
+
Some((sem.acquire_owned().await.unwrap(), sem))
|
|
180
|
+
});
|
|
181
|
+
let wft_stream = stream_when_allowed(wft_stream, proceeder);
|
|
182
|
+
let extracted_wft_stream = WFTExtractor::build(
|
|
183
|
+
client.clone(),
|
|
184
|
+
basics.fetching_concurrency,
|
|
132
185
|
wft_stream,
|
|
186
|
+
UnboundedReceiverStream::new(fetch_rx),
|
|
187
|
+
);
|
|
188
|
+
let locals_stream = stream::select(
|
|
133
189
|
UnboundedReceiverStream::new(local_rx),
|
|
134
|
-
|
|
135
|
-
local_activity_request_sink,
|
|
190
|
+
UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
|
|
136
191
|
);
|
|
137
192
|
let (activation_tx, activation_rx) = unbounded_channel();
|
|
138
193
|
let (start_polling_tx, start_polling_rx) = oneshot::channel();
|
|
139
194
|
// We must spawn a task to constantly poll the activation stream, because otherwise
|
|
140
195
|
// activation completions would not cause anything to happen until the next poll.
|
|
141
|
-
let
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
196
|
+
let tracing_sub = telem_instance.map(|ti| ti.trace_subscriber());
|
|
197
|
+
let processing_task = thread::spawn(move || {
|
|
198
|
+
if let Some(ts) = tracing_sub {
|
|
199
|
+
set_trace_subscriber_for_current_thread(ts);
|
|
200
|
+
}
|
|
201
|
+
let rt = tokio::runtime::Builder::new_current_thread()
|
|
202
|
+
.enable_all()
|
|
203
|
+
.thread_name("workflow-processing")
|
|
204
|
+
.build()
|
|
205
|
+
.unwrap();
|
|
206
|
+
let local = LocalSet::new();
|
|
207
|
+
local.block_on(&rt, async move {
|
|
208
|
+
let mut stream = WFStream::build(
|
|
209
|
+
basics,
|
|
210
|
+
extracted_wft_stream,
|
|
211
|
+
locals_stream,
|
|
212
|
+
local_activity_request_sink,
|
|
213
|
+
);
|
|
214
|
+
|
|
215
|
+
// However, we want to avoid plowing ahead until we've been asked to poll at least
|
|
216
|
+
// once. This supports activity-only workers.
|
|
217
|
+
let do_poll = tokio::select! {
|
|
218
|
+
sp = start_polling_rx => {
|
|
219
|
+
sp.is_ok()
|
|
220
|
+
}
|
|
221
|
+
_ = shutdown_tok.cancelled() => {
|
|
222
|
+
false
|
|
223
|
+
}
|
|
224
|
+
};
|
|
225
|
+
if !do_poll {
|
|
226
|
+
return;
|
|
147
227
|
}
|
|
148
|
-
|
|
149
|
-
|
|
228
|
+
while let Some(output) = stream.next().await {
|
|
229
|
+
match output {
|
|
230
|
+
Ok(o) => {
|
|
231
|
+
for fetchreq in o.fetch_histories {
|
|
232
|
+
fetch_tx
|
|
233
|
+
.send(fetchreq)
|
|
234
|
+
.expect("Fetch channel must not be dropped");
|
|
235
|
+
}
|
|
236
|
+
for act in o.activations {
|
|
237
|
+
activation_tx
|
|
238
|
+
.send(Ok(act))
|
|
239
|
+
.expect("Activation processor channel not dropped");
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
Err(e) => activation_tx
|
|
243
|
+
.send(Err(e))
|
|
244
|
+
.expect("Activation processor channel not dropped"),
|
|
245
|
+
}
|
|
150
246
|
}
|
|
151
|
-
};
|
|
152
|
-
if !do_poll {
|
|
153
|
-
return;
|
|
154
|
-
}
|
|
155
|
-
while let Some(act) = stream.next().await {
|
|
156
|
-
activation_tx
|
|
157
|
-
.send(act)
|
|
158
|
-
.expect("Activation processor channel not dropped");
|
|
159
|
-
}
|
|
247
|
+
});
|
|
160
248
|
});
|
|
161
249
|
Self {
|
|
162
250
|
task_queue,
|
|
@@ -169,12 +257,14 @@ impl Workflows {
|
|
|
169
257
|
client,
|
|
170
258
|
sticky_attrs,
|
|
171
259
|
activity_tasks_handle,
|
|
260
|
+
wft_semaphore,
|
|
261
|
+
local_act_mgr,
|
|
172
262
|
}
|
|
173
263
|
}
|
|
174
264
|
|
|
175
|
-
pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
265
|
+
pub(super) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
|
|
176
266
|
loop {
|
|
177
|
-
let
|
|
267
|
+
let al = {
|
|
178
268
|
let mut lock = self.activation_stream.lock().await;
|
|
179
269
|
let (ref mut stream, ref mut beginner) = lock.deref_mut();
|
|
180
270
|
if let Some(beginner) = beginner.take() {
|
|
@@ -182,17 +272,37 @@ impl Workflows {
|
|
|
182
272
|
}
|
|
183
273
|
stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
|
|
184
274
|
};
|
|
185
|
-
Span::current().record("run_id",
|
|
186
|
-
match
|
|
275
|
+
Span::current().record("run_id", al.run_id());
|
|
276
|
+
match al {
|
|
187
277
|
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
188
278
|
debug!(activation=%act, "Sending activation to lang");
|
|
189
279
|
break Ok(act);
|
|
190
280
|
}
|
|
191
281
|
ActivationOrAuto::Autocomplete { run_id } => {
|
|
192
|
-
self.activation_completed(
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
282
|
+
self.activation_completed(
|
|
283
|
+
WorkflowActivationCompletion {
|
|
284
|
+
run_id,
|
|
285
|
+
status: Some(
|
|
286
|
+
workflow_completion::Success::from_variants(vec![]).into(),
|
|
287
|
+
),
|
|
288
|
+
},
|
|
289
|
+
// We need to say a type, but the type is irrelevant, so imagine some
|
|
290
|
+
// boxed function we'll never call.
|
|
291
|
+
Option::<Box<dyn Fn(&str, usize) + Send>>::None,
|
|
292
|
+
)
|
|
293
|
+
.await?;
|
|
294
|
+
}
|
|
295
|
+
ActivationOrAuto::AutoFail {
|
|
296
|
+
run_id,
|
|
297
|
+
machines_err,
|
|
298
|
+
} => {
|
|
299
|
+
self.activation_completed(
|
|
300
|
+
WorkflowActivationCompletion {
|
|
301
|
+
run_id,
|
|
302
|
+
status: Some(auto_fail_to_complete_status(machines_err)),
|
|
303
|
+
},
|
|
304
|
+
Option::<Box<dyn Fn(&str, usize) + Send>>::None,
|
|
305
|
+
)
|
|
196
306
|
.await?;
|
|
197
307
|
}
|
|
198
308
|
}
|
|
@@ -202,10 +312,11 @@ impl Workflows {
|
|
|
202
312
|
/// Queue an activation completion for processing, returning a future that will resolve with
|
|
203
313
|
/// the outcome of that completion. See [ActivationCompletedOutcome].
|
|
204
314
|
///
|
|
205
|
-
/// Returns the most-recently-processed event number for the run
|
|
206
|
-
pub async fn activation_completed(
|
|
315
|
+
/// Returns the most-recently-processed event number for the run.
|
|
316
|
+
pub(super) async fn activation_completed(
|
|
207
317
|
&self,
|
|
208
318
|
completion: WorkflowActivationCompletion,
|
|
319
|
+
post_activate_hook: Option<impl Fn(&str, usize)>,
|
|
209
320
|
) -> Result<usize, CompleteWfError> {
|
|
210
321
|
let is_empty_completion = completion.is_empty();
|
|
211
322
|
let completion = validate_completion(completion)?;
|
|
@@ -213,7 +324,7 @@ impl Workflows {
|
|
|
213
324
|
let (tx, rx) = oneshot::channel();
|
|
214
325
|
let was_sent = self.send_local(WFActCompleteMsg {
|
|
215
326
|
completion,
|
|
216
|
-
response_tx: tx,
|
|
327
|
+
response_tx: Some(tx),
|
|
217
328
|
});
|
|
218
329
|
if !was_sent {
|
|
219
330
|
if is_empty_completion {
|
|
@@ -230,7 +341,7 @@ impl Workflows {
|
|
|
230
341
|
.await
|
|
231
342
|
.expect("Send half of activation complete response not dropped");
|
|
232
343
|
let mut wft_from_complete = None;
|
|
233
|
-
let
|
|
344
|
+
let wft_report_status = match completion_outcome.outcome {
|
|
234
345
|
ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
|
|
235
346
|
ServerCommandsWithWorkflowInfo {
|
|
236
347
|
task_token,
|
|
@@ -239,6 +350,7 @@ impl Workflows {
|
|
|
239
350
|
mut commands,
|
|
240
351
|
query_responses,
|
|
241
352
|
force_new_wft,
|
|
353
|
+
sdk_metadata,
|
|
242
354
|
},
|
|
243
355
|
} => {
|
|
244
356
|
let reserved_act_permits =
|
|
@@ -252,6 +364,13 @@ impl Workflows {
|
|
|
252
364
|
sticky_attributes: None,
|
|
253
365
|
return_new_workflow_task: true,
|
|
254
366
|
force_create_new_workflow_task: force_new_wft,
|
|
367
|
+
sdk_metadata,
|
|
368
|
+
metering_metadata: MeteringMetadata {
|
|
369
|
+
nonfirst_local_activity_execution_attempts: self
|
|
370
|
+
.local_act_mgr
|
|
371
|
+
.get_nonfirst_attempt_count(&run_id)
|
|
372
|
+
as u32,
|
|
373
|
+
},
|
|
255
374
|
};
|
|
256
375
|
let sticky_attrs = self.sticky_attrs.clone();
|
|
257
376
|
// Do not return new WFT if we would not cache, because returned new WFTs are
|
|
@@ -273,14 +392,14 @@ impl Workflows {
|
|
|
273
392
|
Ok(())
|
|
274
393
|
})
|
|
275
394
|
.await;
|
|
276
|
-
|
|
395
|
+
WFTReportStatus::Reported
|
|
277
396
|
}
|
|
278
397
|
ServerCommandsWithWorkflowInfo {
|
|
279
398
|
task_token,
|
|
280
399
|
action: ActivationAction::RespondLegacyQuery { result },
|
|
281
400
|
} => {
|
|
282
401
|
self.respond_legacy_query(task_token, *result).await;
|
|
283
|
-
|
|
402
|
+
WFTReportStatus::Reported
|
|
284
403
|
}
|
|
285
404
|
},
|
|
286
405
|
ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
|
|
@@ -292,29 +411,54 @@ impl Workflows {
|
|
|
292
411
|
.await
|
|
293
412
|
})
|
|
294
413
|
.await;
|
|
295
|
-
|
|
414
|
+
WFTReportStatus::Reported
|
|
296
415
|
}
|
|
297
416
|
FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
|
|
298
417
|
warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
|
|
299
418
|
self.respond_legacy_query(task_token, legacy_query_failure(failure))
|
|
300
419
|
.await;
|
|
301
|
-
|
|
420
|
+
WFTReportStatus::Reported
|
|
302
421
|
}
|
|
303
422
|
},
|
|
304
|
-
ActivationCompleteOutcome::
|
|
423
|
+
ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
|
|
424
|
+
ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
|
|
425
|
+
};
|
|
426
|
+
|
|
427
|
+
let maybe_pwft = if let Some(wft) = wft_from_complete {
|
|
428
|
+
match HistoryPaginator::from_poll(wft, self.client.clone()).await {
|
|
429
|
+
Ok((paginator, pwft)) => Some((pwft, paginator)),
|
|
430
|
+
Err(e) => {
|
|
431
|
+
self.request_eviction(
|
|
432
|
+
&run_id,
|
|
433
|
+
format!("Failed to paginate workflow task from completion: {e:?}"),
|
|
434
|
+
EvictionReason::Fatal,
|
|
435
|
+
);
|
|
436
|
+
None
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
} else {
|
|
440
|
+
None
|
|
305
441
|
};
|
|
306
442
|
|
|
443
|
+
if let Some(h) = post_activate_hook {
|
|
444
|
+
h(&run_id, completion_outcome.most_recently_processed_event);
|
|
445
|
+
}
|
|
446
|
+
|
|
307
447
|
self.post_activation(PostActivationMsg {
|
|
308
448
|
run_id,
|
|
309
|
-
|
|
310
|
-
wft_from_complete,
|
|
449
|
+
wft_report_status,
|
|
450
|
+
wft_from_complete: maybe_pwft,
|
|
311
451
|
});
|
|
312
452
|
|
|
313
453
|
Ok(completion_outcome.most_recently_processed_event)
|
|
314
454
|
}
|
|
315
455
|
|
|
316
456
|
/// Tell workflow that a local activity has finished with the provided result
|
|
317
|
-
pub fn notify_of_local_result(
|
|
457
|
+
pub(super) fn notify_of_local_result(
|
|
458
|
+
&self,
|
|
459
|
+
run_id: impl Into<String>,
|
|
460
|
+
resolved: LocalResolution,
|
|
461
|
+
) {
|
|
318
462
|
self.send_local(LocalResolutionMsg {
|
|
319
463
|
run_id: run_id.into(),
|
|
320
464
|
res: resolved,
|
|
@@ -322,7 +466,7 @@ impl Workflows {
|
|
|
322
466
|
}
|
|
323
467
|
|
|
324
468
|
/// Request eviction of a workflow
|
|
325
|
-
pub fn request_eviction(
|
|
469
|
+
pub(super) fn request_eviction(
|
|
326
470
|
&self,
|
|
327
471
|
run_id: impl Into<String>,
|
|
328
472
|
message: impl Into<String>,
|
|
@@ -336,22 +480,39 @@ impl Workflows {
|
|
|
336
480
|
}
|
|
337
481
|
|
|
338
482
|
/// Query the state of workflow management. Can return `None` if workflow state is shut down.
|
|
339
|
-
pub fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
|
|
483
|
+
pub(super) fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
|
|
340
484
|
let (tx, rx) = oneshot::channel();
|
|
341
485
|
self.send_local(GetStateInfoMsg { response_tx: tx });
|
|
342
486
|
async move { rx.await.ok() }
|
|
343
487
|
}
|
|
344
488
|
|
|
345
|
-
pub
|
|
489
|
+
pub(super) fn available_wft_permits(&self) -> usize {
|
|
490
|
+
self.wft_semaphore.available_permits()
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
pub(super) async fn shutdown(&self) -> Result<(), anyhow::Error> {
|
|
346
494
|
let maybe_jh = self.processing_task.lock().await.take();
|
|
347
495
|
if let Some(jh) = maybe_jh {
|
|
348
|
-
// This
|
|
349
|
-
//
|
|
350
|
-
let
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
496
|
+
// This serves to drive the stream if it is still alive and wouldn't otherwise receive
|
|
497
|
+
// another message. It allows it to shut itself down.
|
|
498
|
+
let (waker, stop_waker) = abortable(async {
|
|
499
|
+
let mut interval = tokio::time::interval(Duration::from_millis(10));
|
|
500
|
+
loop {
|
|
501
|
+
interval.tick().await;
|
|
502
|
+
let _ = self.get_state_info().await;
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
let (_, jh_res) = tokio::join!(
|
|
506
|
+
waker,
|
|
507
|
+
spawn_blocking(move || {
|
|
508
|
+
let r = jh.join();
|
|
509
|
+
stop_waker.abort();
|
|
510
|
+
r
|
|
511
|
+
})
|
|
512
|
+
);
|
|
513
|
+
jh_res?.map_err(|e| anyhow!("Error joining workflow processing thread: {e:?}"))?;
|
|
354
514
|
}
|
|
515
|
+
Ok(())
|
|
355
516
|
}
|
|
356
517
|
|
|
357
518
|
/// Must be called after every activation completion has finished
|
|
@@ -393,7 +554,11 @@ impl Workflows {
|
|
|
393
554
|
/// successfully.
|
|
394
555
|
fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
|
|
395
556
|
let msg = msg.into();
|
|
396
|
-
let print_err =
|
|
557
|
+
let print_err = match &msg {
|
|
558
|
+
LocalInputs::GetStateInfo(_) => false,
|
|
559
|
+
LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
|
|
560
|
+
_ => true,
|
|
561
|
+
};
|
|
397
562
|
if let Err(e) = self.local_tx.send(LocalInput {
|
|
398
563
|
input: msg,
|
|
399
564
|
span: Span::current(),
|
|
@@ -414,7 +579,7 @@ impl Workflows {
|
|
|
414
579
|
/// Process eagerly returned activities from WFT completion
|
|
415
580
|
fn handle_eager_activities(
|
|
416
581
|
&self,
|
|
417
|
-
reserved_act_permits: Vec<
|
|
582
|
+
reserved_act_permits: Vec<TrackedOwnedMeteredSemPermit>,
|
|
418
583
|
eager_acts: Vec<PollActivityTaskQueueResponse>,
|
|
419
584
|
) {
|
|
420
585
|
if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
|
|
@@ -435,7 +600,7 @@ impl Workflows {
|
|
|
435
600
|
let with_permits = reserved_act_permits
|
|
436
601
|
.into_iter()
|
|
437
602
|
.zip(eager_acts.into_iter())
|
|
438
|
-
.map(|(permit, resp)|
|
|
603
|
+
.map(|(permit, resp)| TrackedPermittedTqResp { permit, resp });
|
|
439
604
|
if with_permits.len() > 0 {
|
|
440
605
|
debug!(
|
|
441
606
|
"Adding {} activity tasks received from WFT complete",
|
|
@@ -458,7 +623,7 @@ impl Workflows {
|
|
|
458
623
|
fn reserve_activity_slots_for_outgoing_commands(
|
|
459
624
|
&self,
|
|
460
625
|
commands: &mut [Command],
|
|
461
|
-
) -> Vec<
|
|
626
|
+
) -> Vec<TrackedOwnedMeteredSemPermit> {
|
|
462
627
|
let mut reserved = vec![];
|
|
463
628
|
for cmd in commands {
|
|
464
629
|
if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
|
|
@@ -509,186 +674,30 @@ impl Workflows {
|
|
|
509
674
|
}
|
|
510
675
|
}
|
|
511
676
|
|
|
512
|
-
///
|
|
513
|
-
///
|
|
514
|
-
#[derive(derive_more::
|
|
515
|
-
#[
|
|
516
|
-
fmt = "
|
|
517
|
-
|
|
518
|
-
trying_to_evict: {}, last_action_acked: {} }}",
|
|
519
|
-
wft,
|
|
520
|
-
activation,
|
|
521
|
-
buffered_resp,
|
|
522
|
-
have_seen_terminal_event,
|
|
523
|
-
most_recently_processed_event_number,
|
|
524
|
-
more_pending_work,
|
|
525
|
-
"trying_to_evict.is_some()",
|
|
526
|
-
last_action_acked
|
|
677
|
+
/// Returned when a cache miss happens and we need to fetch history from the beginning to
|
|
678
|
+
/// replay a run
|
|
679
|
+
#[derive(Debug, derive_more::Display)]
|
|
680
|
+
#[display(
|
|
681
|
+
fmt = "CacheMissFetchReq(run_id: {})",
|
|
682
|
+
"original_wft.work.execution.run_id"
|
|
527
683
|
)]
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
have_seen_terminal_event: bool,
|
|
540
|
-
/// The most recently processed event id this machine has seen. 0 means it has seen nothing.
|
|
541
|
-
most_recently_processed_event_number: usize,
|
|
542
|
-
/// Is set true when the machines indicate that there is additional known work to be processed
|
|
543
|
-
more_pending_work: bool,
|
|
544
|
-
/// Is set if an eviction has been requested for this run
|
|
545
|
-
trying_to_evict: Option<RequestEvictMsg>,
|
|
546
|
-
/// Set to true if the last action we tried to take to this run has been processed (ie: the
|
|
547
|
-
/// [RunUpdateResponse] for it has been seen.
|
|
548
|
-
last_action_acked: bool,
|
|
549
|
-
/// For sending work to the machines
|
|
550
|
-
run_actions_tx: UnboundedSender<RunAction>,
|
|
551
|
-
/// Handle to the task where the actual machines live
|
|
552
|
-
handle: JoinHandle<()>,
|
|
553
|
-
|
|
554
|
-
/// We track if we have recorded useful debugging values onto a certain span yet, to overcome
|
|
555
|
-
/// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
|
|
556
|
-
/// is fixed.
|
|
557
|
-
recorded_span_ids: HashSet<tracing::Id>,
|
|
558
|
-
metrics: MetricsContext,
|
|
684
|
+
#[must_use]
|
|
685
|
+
struct CacheMissFetchReq {
|
|
686
|
+
original_wft: PermittedWFT,
|
|
687
|
+
}
|
|
688
|
+
/// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
|
|
689
|
+
/// isn't in memory
|
|
690
|
+
#[derive(Debug)]
|
|
691
|
+
#[must_use]
|
|
692
|
+
struct NextPageReq {
|
|
693
|
+
paginator: HistoryPaginator,
|
|
694
|
+
span: Span,
|
|
559
695
|
}
|
|
560
|
-
impl ManagedRunHandle {
|
|
561
|
-
fn new(
|
|
562
|
-
wfm: WorkflowManager,
|
|
563
|
-
activations_tx: UnboundedSender<RunUpdateResponse>,
|
|
564
|
-
local_activity_request_sink: LocalActivityRequestSink,
|
|
565
|
-
metrics: MetricsContext,
|
|
566
|
-
) -> Self {
|
|
567
|
-
let (run_actions_tx, run_actions_rx) = unbounded_channel();
|
|
568
|
-
let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
|
|
569
|
-
let handle = tokio::task::spawn(managed.run(run_actions_rx));
|
|
570
|
-
Self {
|
|
571
|
-
wft: None,
|
|
572
|
-
activation: None,
|
|
573
|
-
buffered_resp: None,
|
|
574
|
-
have_seen_terminal_event: false,
|
|
575
|
-
most_recently_processed_event_number: 0,
|
|
576
|
-
more_pending_work: false,
|
|
577
|
-
trying_to_evict: None,
|
|
578
|
-
last_action_acked: true,
|
|
579
|
-
run_actions_tx,
|
|
580
|
-
handle,
|
|
581
|
-
recorded_span_ids: Default::default(),
|
|
582
|
-
metrics,
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
fn incoming_wft(&mut self, wft: NewIncomingWFT) {
|
|
587
|
-
if self.wft.is_some() {
|
|
588
|
-
error!("Trying to send a new WFT for a run which already has one!");
|
|
589
|
-
}
|
|
590
|
-
self.send_run_action(RunActions::NewIncomingWFT(wft));
|
|
591
|
-
}
|
|
592
|
-
fn check_more_activations(&mut self) {
|
|
593
|
-
// No point in checking for more activations if we have not acked the last update, or
|
|
594
|
-
// if there's already an outstanding activation.
|
|
595
|
-
if self.last_action_acked && self.activation.is_none() {
|
|
596
|
-
self.send_run_action(RunActions::CheckMoreWork {
|
|
597
|
-
want_to_evict: self.trying_to_evict.clone(),
|
|
598
|
-
has_pending_queries: self
|
|
599
|
-
.wft
|
|
600
|
-
.as_ref()
|
|
601
|
-
.map(|wft| !wft.pending_queries.is_empty())
|
|
602
|
-
.unwrap_or_default(),
|
|
603
|
-
has_wft: self.wft.is_some(),
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
fn send_completion(&mut self, c: RunActivationCompletion) {
|
|
608
|
-
self.send_run_action(RunActions::ActivationCompletion(c));
|
|
609
|
-
}
|
|
610
|
-
fn send_local_resolution(&mut self, r: LocalResolution) {
|
|
611
|
-
self.send_run_action(RunActions::LocalResolution(r));
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
|
|
615
|
-
let act_type = match &act {
|
|
616
|
-
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
|
617
|
-
if act.is_legacy_query() {
|
|
618
|
-
OutstandingActivation::LegacyQuery
|
|
619
|
-
} else {
|
|
620
|
-
OutstandingActivation::Normal {
|
|
621
|
-
contains_eviction: act.eviction_index().is_some(),
|
|
622
|
-
num_jobs: act.jobs.len(),
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
|
|
627
|
-
};
|
|
628
|
-
if let Some(old_act) = self.activation {
|
|
629
|
-
// This is a panic because we have screwed up core logic if this is violated. It must be
|
|
630
|
-
// upheld.
|
|
631
|
-
panic!(
|
|
632
|
-
"Attempted to insert a new outstanding activation {:?}, but there already was \
|
|
633
|
-
one outstanding: {:?}",
|
|
634
|
-
act, old_act
|
|
635
|
-
);
|
|
636
|
-
}
|
|
637
|
-
self.activation = Some(act_type);
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
fn send_run_action(&mut self, action: RunActions) {
|
|
641
|
-
self.last_action_acked = false;
|
|
642
|
-
self.run_actions_tx
|
|
643
|
-
.send(RunAction {
|
|
644
|
-
action,
|
|
645
|
-
trace_span: Span::current(),
|
|
646
|
-
})
|
|
647
|
-
.expect("Receive half of run actions not dropped");
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
/// Returns true if the managed run has any form of pending work
|
|
651
|
-
/// If `ignore_evicts` is true, pending evictions do not count as pending work.
|
|
652
|
-
/// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
|
|
653
|
-
fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
|
|
654
|
-
let evict_work = if ignore_evicts {
|
|
655
|
-
false
|
|
656
|
-
} else {
|
|
657
|
-
self.trying_to_evict.is_some()
|
|
658
|
-
};
|
|
659
|
-
let act_work = if ignore_evicts {
|
|
660
|
-
if let Some(ref act) = self.activation {
|
|
661
|
-
!act.has_only_eviction()
|
|
662
|
-
} else {
|
|
663
|
-
false
|
|
664
|
-
}
|
|
665
|
-
} else {
|
|
666
|
-
self.activation.is_some()
|
|
667
|
-
};
|
|
668
|
-
let buffered = if ignore_buffered {
|
|
669
|
-
false
|
|
670
|
-
} else {
|
|
671
|
-
self.buffered_resp.is_some()
|
|
672
|
-
};
|
|
673
|
-
self.wft.is_some()
|
|
674
|
-
|| buffered
|
|
675
|
-
|| !self.last_action_acked
|
|
676
|
-
|| self.more_pending_work
|
|
677
|
-
|| act_work
|
|
678
|
-
|| evict_work
|
|
679
|
-
}
|
|
680
696
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
|
|
686
|
-
|| self
|
|
687
|
-
.wft
|
|
688
|
-
.as_ref()
|
|
689
|
-
.map(|t| t.has_pending_legacy_query())
|
|
690
|
-
.unwrap_or_default()
|
|
691
|
-
}
|
|
697
|
+
#[derive(Debug)]
|
|
698
|
+
struct WFStreamOutput {
|
|
699
|
+
activations: VecDeque<ActivationOrAuto>,
|
|
700
|
+
fetch_histories: VecDeque<HistoryFetchReq>,
|
|
692
701
|
}
|
|
693
702
|
|
|
694
703
|
#[derive(Debug, derive_more::Display)]
|
|
@@ -697,9 +706,15 @@ enum ActivationOrAuto {
|
|
|
697
706
|
/// This type should only be filled with an empty activation which is ready to have queries
|
|
698
707
|
/// inserted into the joblist
|
|
699
708
|
ReadyForQueries(WorkflowActivation),
|
|
709
|
+
#[display(fmt = "Autocomplete(run_id={run_id})")]
|
|
700
710
|
Autocomplete {
|
|
701
711
|
run_id: String,
|
|
702
712
|
},
|
|
713
|
+
#[display(fmt = "AutoFail(run_id={run_id})")]
|
|
714
|
+
AutoFail {
|
|
715
|
+
run_id: String,
|
|
716
|
+
machines_err: WFMachinesError,
|
|
717
|
+
},
|
|
703
718
|
}
|
|
704
719
|
impl ActivationOrAuto {
|
|
705
720
|
pub fn run_id(&self) -> &str {
|
|
@@ -707,15 +722,53 @@ impl ActivationOrAuto {
|
|
|
707
722
|
ActivationOrAuto::LangActivation(act) => &act.run_id,
|
|
708
723
|
ActivationOrAuto::Autocomplete { run_id, .. } => run_id,
|
|
709
724
|
ActivationOrAuto::ReadyForQueries(act) => &act.run_id,
|
|
725
|
+
ActivationOrAuto::AutoFail { run_id, .. } => run_id,
|
|
710
726
|
}
|
|
711
727
|
}
|
|
712
728
|
}
|
|
713
729
|
|
|
730
|
+
/// A processed WFT which has been validated and had a history update extracted from it
|
|
714
731
|
#[derive(derive_more::DebugCustom)]
|
|
715
|
-
#[
|
|
732
|
+
#[cfg_attr(
|
|
733
|
+
feature = "save_wf_inputs",
|
|
734
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
735
|
+
)]
|
|
736
|
+
#[debug(fmt = "PermittedWft({work:?})")]
|
|
716
737
|
pub(crate) struct PermittedWFT {
|
|
717
|
-
|
|
718
|
-
|
|
738
|
+
work: PreparedWFT,
|
|
739
|
+
#[cfg_attr(
|
|
740
|
+
feature = "save_wf_inputs",
|
|
741
|
+
serde(skip, default = "UsedMeteredSemPermit::fake_deserialized")
|
|
742
|
+
)]
|
|
743
|
+
permit: UsedMeteredSemPermit,
|
|
744
|
+
#[cfg_attr(
|
|
745
|
+
feature = "save_wf_inputs",
|
|
746
|
+
serde(skip, default = "HistoryPaginator::fake_deserialized")
|
|
747
|
+
)]
|
|
748
|
+
paginator: HistoryPaginator,
|
|
749
|
+
}
|
|
750
|
+
#[derive(Debug)]
|
|
751
|
+
#[cfg_attr(
|
|
752
|
+
feature = "save_wf_inputs",
|
|
753
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
754
|
+
)]
|
|
755
|
+
struct PreparedWFT {
|
|
756
|
+
task_token: TaskToken,
|
|
757
|
+
attempt: u32,
|
|
758
|
+
execution: WorkflowExecution,
|
|
759
|
+
workflow_type: String,
|
|
760
|
+
legacy_query: Option<WorkflowQuery>,
|
|
761
|
+
query_requests: Vec<QueryWorkflow>,
|
|
762
|
+
update: HistoryUpdate,
|
|
763
|
+
}
|
|
764
|
+
impl PreparedWFT {
|
|
765
|
+
/// Returns true if the contained history update is incremental (IE: expects to hit a cached
|
|
766
|
+
/// workflow)
|
|
767
|
+
pub fn is_incremental(&self) -> bool {
|
|
768
|
+
let start_event_id = self.update.first_event_id();
|
|
769
|
+
let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
|
|
770
|
+
poll_resp_is_incremental || start_event_id.is_none()
|
|
771
|
+
}
|
|
719
772
|
}
|
|
720
773
|
|
|
721
774
|
#[derive(Debug)]
|
|
@@ -727,7 +780,7 @@ pub(crate) struct OutstandingTask {
|
|
|
727
780
|
pub start_time: Instant,
|
|
728
781
|
/// The WFT permit owned by this task, ensures we don't exceed max concurrent WFT, and makes
|
|
729
782
|
/// sure the permit is automatically freed when we delete the task.
|
|
730
|
-
pub permit:
|
|
783
|
+
pub permit: UsedMeteredSemPermit,
|
|
731
784
|
}
|
|
732
785
|
|
|
733
786
|
impl OutstandingTask {
|
|
@@ -806,49 +859,80 @@ pub(crate) enum ActivationAction {
|
|
|
806
859
|
commands: Vec<ProtoCommand>,
|
|
807
860
|
query_responses: Vec<QueryResult>,
|
|
808
861
|
force_new_wft: bool,
|
|
862
|
+
sdk_metadata: WorkflowTaskCompletedMetadata,
|
|
809
863
|
},
|
|
810
864
|
/// We should respond to a legacy query request
|
|
811
865
|
RespondLegacyQuery { result: Box<QueryResult> },
|
|
812
866
|
}
|
|
813
867
|
|
|
814
|
-
#[derive(Debug
|
|
815
|
-
|
|
816
|
-
EvictionRequested(Option<u32
|
|
868
|
+
#[derive(Debug)]
|
|
869
|
+
enum EvictionRequestResult {
|
|
870
|
+
EvictionRequested(Option<u32>, RunUpdateAct),
|
|
817
871
|
NotFound,
|
|
818
872
|
EvictionAlreadyRequested(Option<u32>),
|
|
819
873
|
}
|
|
874
|
+
impl EvictionRequestResult {
|
|
875
|
+
fn into_run_update_resp(self) -> RunUpdateAct {
|
|
876
|
+
match self {
|
|
877
|
+
EvictionRequestResult::EvictionRequested(_, resp) => resp,
|
|
878
|
+
EvictionRequestResult::NotFound
|
|
879
|
+
| EvictionRequestResult::EvictionAlreadyRequested(_) => None,
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
}
|
|
820
883
|
|
|
821
884
|
#[derive(Debug)]
|
|
822
885
|
#[allow(dead_code)] // Not always used in non-test
|
|
823
886
|
pub(crate) struct WorkflowStateInfo {
|
|
824
887
|
pub cached_workflows: usize,
|
|
825
888
|
pub outstanding_wft: usize,
|
|
826
|
-
pub available_wft_permits: usize,
|
|
827
889
|
}
|
|
828
890
|
|
|
829
891
|
#[derive(Debug)]
|
|
892
|
+
#[cfg_attr(
|
|
893
|
+
feature = "save_wf_inputs",
|
|
894
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
895
|
+
)]
|
|
830
896
|
struct WFActCompleteMsg {
|
|
831
897
|
completion: ValidatedCompletion,
|
|
832
|
-
|
|
898
|
+
#[cfg_attr(feature = "save_wf_inputs", serde(skip))]
|
|
899
|
+
response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
833
900
|
}
|
|
834
901
|
#[derive(Debug)]
|
|
902
|
+
#[cfg_attr(
|
|
903
|
+
feature = "save_wf_inputs",
|
|
904
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
905
|
+
)]
|
|
835
906
|
struct LocalResolutionMsg {
|
|
836
907
|
run_id: String,
|
|
837
908
|
res: LocalResolution,
|
|
838
909
|
}
|
|
839
910
|
#[derive(Debug)]
|
|
911
|
+
#[cfg_attr(
|
|
912
|
+
feature = "save_wf_inputs",
|
|
913
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
914
|
+
)]
|
|
840
915
|
struct PostActivationMsg {
|
|
841
916
|
run_id: String,
|
|
842
|
-
|
|
843
|
-
wft_from_complete: Option<
|
|
917
|
+
wft_report_status: WFTReportStatus,
|
|
918
|
+
wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
|
|
844
919
|
}
|
|
845
920
|
#[derive(Debug, Clone)]
|
|
921
|
+
#[cfg_attr(
|
|
922
|
+
feature = "save_wf_inputs",
|
|
923
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
924
|
+
)]
|
|
846
925
|
struct RequestEvictMsg {
|
|
847
926
|
run_id: String,
|
|
848
927
|
message: String,
|
|
849
928
|
reason: EvictionReason,
|
|
850
929
|
}
|
|
851
930
|
#[derive(Debug)]
|
|
931
|
+
pub(crate) struct HeartbeatTimeoutMsg {
|
|
932
|
+
pub(crate) run_id: String,
|
|
933
|
+
pub(crate) span: Span,
|
|
934
|
+
}
|
|
935
|
+
#[derive(Debug)]
|
|
852
936
|
struct GetStateInfoMsg {
|
|
853
937
|
response_tx: oneshot::Sender<WorkflowStateInfo>,
|
|
854
938
|
}
|
|
@@ -869,16 +953,24 @@ enum ActivationCompleteOutcome {
|
|
|
869
953
|
ReportWFTFail(FailedActivationWFTReport),
|
|
870
954
|
/// There's nothing to do right now. EX: The workflow needs to keep replaying.
|
|
871
955
|
DoNothing,
|
|
956
|
+
/// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
|
|
957
|
+
/// in a row.
|
|
958
|
+
WFTFailedDontReport,
|
|
872
959
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
960
|
+
/// Did we report, or not, completion of a WFT to server?
|
|
961
|
+
#[derive(Debug, Copy, Clone)]
|
|
962
|
+
#[cfg_attr(
|
|
963
|
+
feature = "save_wf_inputs",
|
|
964
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
965
|
+
)]
|
|
966
|
+
enum WFTReportStatus {
|
|
967
|
+
Reported,
|
|
968
|
+
/// The WFT completion was not reported when finishing the activation, because there's still
|
|
969
|
+
/// work to be done. EX: Running LAs.
|
|
970
|
+
NotReported,
|
|
971
|
+
/// We didn't report, but we want to clear the outstanding workflow task anyway. See
|
|
972
|
+
/// [ActivationCompleteOutcome::WFTFailedDontReport]
|
|
973
|
+
DropWft,
|
|
882
974
|
}
|
|
883
975
|
|
|
884
976
|
fn validate_completion(
|
|
@@ -908,8 +1000,7 @@ fn validate_completion(
|
|
|
908
1000
|
reason: format!(
|
|
909
1001
|
"Workflow completion had a legacy query response along with other \
|
|
910
1002
|
commands. This is not allowed and constitutes an error in the \
|
|
911
|
-
lang SDK. Commands: {:?}"
|
|
912
|
-
commands
|
|
1003
|
+
lang SDK. Commands: {commands:?}"
|
|
913
1004
|
),
|
|
914
1005
|
run_id: completion.run_id,
|
|
915
1006
|
});
|
|
@@ -918,6 +1009,7 @@ fn validate_completion(
|
|
|
918
1009
|
Ok(ValidatedCompletion::Success {
|
|
919
1010
|
run_id: completion.run_id,
|
|
920
1011
|
commands,
|
|
1012
|
+
used_flags: success.used_internal_flags,
|
|
921
1013
|
})
|
|
922
1014
|
}
|
|
923
1015
|
Some(workflow_activation_completion::Status::Failed(failure)) => {
|
|
@@ -934,11 +1026,16 @@ fn validate_completion(
|
|
|
934
1026
|
}
|
|
935
1027
|
|
|
936
1028
|
#[derive(Debug)]
|
|
1029
|
+
#[cfg_attr(
|
|
1030
|
+
feature = "save_wf_inputs",
|
|
1031
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1032
|
+
)]
|
|
937
1033
|
#[allow(clippy::large_enum_variant)]
|
|
938
1034
|
enum ValidatedCompletion {
|
|
939
1035
|
Success {
|
|
940
1036
|
run_id: String,
|
|
941
1037
|
commands: Vec<WFCommand>,
|
|
1038
|
+
used_flags: Vec<u32>,
|
|
942
1039
|
},
|
|
943
1040
|
Fail {
|
|
944
1041
|
run_id: String,
|
|
@@ -955,112 +1052,6 @@ impl ValidatedCompletion {
|
|
|
955
1052
|
}
|
|
956
1053
|
}
|
|
957
1054
|
|
|
958
|
-
/// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
|
|
959
|
-
#[derive(Debug)]
|
|
960
|
-
struct RunAction {
|
|
961
|
-
action: RunActions,
|
|
962
|
-
trace_span: Span,
|
|
963
|
-
}
|
|
964
|
-
#[derive(Debug)]
|
|
965
|
-
#[allow(clippy::large_enum_variant)]
|
|
966
|
-
enum RunActions {
|
|
967
|
-
NewIncomingWFT(NewIncomingWFT),
|
|
968
|
-
ActivationCompletion(RunActivationCompletion),
|
|
969
|
-
CheckMoreWork {
|
|
970
|
-
want_to_evict: Option<RequestEvictMsg>,
|
|
971
|
-
has_pending_queries: bool,
|
|
972
|
-
has_wft: bool,
|
|
973
|
-
},
|
|
974
|
-
LocalResolution(LocalResolution),
|
|
975
|
-
HeartbeatTimeout,
|
|
976
|
-
}
|
|
977
|
-
#[derive(Debug)]
|
|
978
|
-
struct NewIncomingWFT {
|
|
979
|
-
/// This field is only populated if the machines already exist. Otherwise the machines
|
|
980
|
-
/// are instantiated with the workflow history.
|
|
981
|
-
history_update: Option<HistoryUpdate>,
|
|
982
|
-
/// Wft start time
|
|
983
|
-
start_time: Instant,
|
|
984
|
-
}
|
|
985
|
-
#[derive(Debug)]
|
|
986
|
-
struct RunActivationCompletion {
|
|
987
|
-
task_token: TaskToken,
|
|
988
|
-
start_time: Instant,
|
|
989
|
-
commands: Vec<WFCommand>,
|
|
990
|
-
activation_was_eviction: bool,
|
|
991
|
-
activation_was_only_eviction: bool,
|
|
992
|
-
has_pending_query: bool,
|
|
993
|
-
query_responses: Vec<QueryResult>,
|
|
994
|
-
/// Used to notify the worker when the completion is done processing and the completion can
|
|
995
|
-
/// unblock. Must always be `Some` when initialized.
|
|
996
|
-
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
/// A response from a [ManagedRun] held by a [ManagedRunHandle]
|
|
1000
|
-
#[derive(Debug)]
|
|
1001
|
-
struct RunUpdateResponse {
|
|
1002
|
-
kind: RunUpdateResponseKind,
|
|
1003
|
-
span: Span,
|
|
1004
|
-
}
|
|
1005
|
-
#[derive(Debug, derive_more::Display)]
|
|
1006
|
-
#[allow(clippy::large_enum_variant)]
|
|
1007
|
-
enum RunUpdateResponseKind {
|
|
1008
|
-
Good(GoodRunUpdate),
|
|
1009
|
-
Fail(FailRunUpdate),
|
|
1010
|
-
}
|
|
1011
|
-
impl RunUpdateResponseKind {
|
|
1012
|
-
pub(crate) fn run_id(&self) -> &str {
|
|
1013
|
-
match self {
|
|
1014
|
-
RunUpdateResponseKind::Good(g) => &g.run_id,
|
|
1015
|
-
RunUpdateResponseKind::Fail(f) => &f.run_id,
|
|
1016
|
-
}
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
#[derive(Debug)]
|
|
1021
|
-
struct GoodRunUpdate {
|
|
1022
|
-
run_id: String,
|
|
1023
|
-
outgoing_activation: Option<ActivationOrAuto>,
|
|
1024
|
-
fulfillable_complete: Option<FulfillableActivationComplete>,
|
|
1025
|
-
have_seen_terminal_event: bool,
|
|
1026
|
-
/// Is true if there are more jobs that need to be sent to lang
|
|
1027
|
-
more_pending_work: bool,
|
|
1028
|
-
most_recently_processed_event_number: usize,
|
|
1029
|
-
/// Is true if this update was in response to a new WFT
|
|
1030
|
-
in_response_to_wft: bool,
|
|
1031
|
-
}
|
|
1032
|
-
impl Display for GoodRunUpdate {
|
|
1033
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1034
|
-
write!(
|
|
1035
|
-
f,
|
|
1036
|
-
"GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
|
|
1037
|
-
self.run_id,
|
|
1038
|
-
if let Some(og) = self.outgoing_activation.as_ref() {
|
|
1039
|
-
format!("{}", og)
|
|
1040
|
-
} else {
|
|
1041
|
-
"None".to_string()
|
|
1042
|
-
},
|
|
1043
|
-
self.more_pending_work
|
|
1044
|
-
)
|
|
1045
|
-
}
|
|
1046
|
-
}
|
|
1047
|
-
#[derive(Debug)]
|
|
1048
|
-
pub(crate) struct FailRunUpdate {
|
|
1049
|
-
run_id: String,
|
|
1050
|
-
err: WFMachinesError,
|
|
1051
|
-
/// This is populated if the run update failed while processing a completion - and thus we
|
|
1052
|
-
/// must respond down it when handling the failure.
|
|
1053
|
-
completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
|
|
1054
|
-
}
|
|
1055
|
-
impl Display for FailRunUpdate {
|
|
1056
|
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
1057
|
-
write!(
|
|
1058
|
-
f,
|
|
1059
|
-
"FailRunUpdate(run_id: {}, error: {:?})",
|
|
1060
|
-
self.run_id, self.err
|
|
1061
|
-
)
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
1055
|
#[derive(Debug)]
|
|
1065
1056
|
pub struct OutgoingServerCommands {
|
|
1066
1057
|
pub commands: Vec<ProtoCommand>,
|
|
@@ -1068,9 +1059,22 @@ pub struct OutgoingServerCommands {
|
|
|
1068
1059
|
}
|
|
1069
1060
|
|
|
1070
1061
|
#[derive(Debug)]
|
|
1062
|
+
#[cfg_attr(
|
|
1063
|
+
feature = "save_wf_inputs",
|
|
1064
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1065
|
+
)]
|
|
1071
1066
|
pub(crate) enum LocalResolution {
|
|
1072
1067
|
LocalActivity(LocalActivityResolution),
|
|
1073
1068
|
}
|
|
1069
|
+
impl LocalResolution {
|
|
1070
|
+
pub fn is_la_cancel_confirmation(&self) -> bool {
|
|
1071
|
+
match self {
|
|
1072
|
+
LocalResolution::LocalActivity(lar) => {
|
|
1073
|
+
matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1074
1078
|
|
|
1075
1079
|
#[derive(thiserror::Error, Debug, derive_more::From)]
|
|
1076
1080
|
#[error("Lang provided workflow command with empty variant")]
|
|
@@ -1079,6 +1083,10 @@ pub struct EmptyWorkflowCommandErr;
|
|
|
1079
1083
|
/// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
|
|
1080
1084
|
/// EX: Create a new timer, complete the workflow, etc.
|
|
1081
1085
|
#[derive(Debug, derive_more::From, derive_more::Display)]
|
|
1086
|
+
#[cfg_attr(
|
|
1087
|
+
feature = "save_wf_inputs",
|
|
1088
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
1089
|
+
)]
|
|
1082
1090
|
#[allow(clippy::large_enum_variant)]
|
|
1083
1091
|
pub enum WFCommand {
|
|
1084
1092
|
/// Returned when we need to wait for the lang sdk to send us something
|
|
@@ -1171,12 +1179,9 @@ pub struct WorkflowStartedInfo {
|
|
|
1171
1179
|
retry_policy: Option<RetryPolicy>,
|
|
1172
1180
|
}
|
|
1173
1181
|
|
|
1174
|
-
type LocalActivityRequestSink =
|
|
1175
|
-
Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
|
|
1176
|
-
|
|
1177
1182
|
/// Wraps outgoing activation job protos with some internal details core might care about
|
|
1178
1183
|
#[derive(Debug, derive_more::Display)]
|
|
1179
|
-
#[display(fmt = "{}"
|
|
1184
|
+
#[display(fmt = "{variant}")]
|
|
1180
1185
|
struct OutgoingJob {
|
|
1181
1186
|
variant: workflow_activation_job::Variant,
|
|
1182
1187
|
/// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
|
|
@@ -1198,3 +1203,74 @@ impl From<OutgoingJob> for WorkflowActivationJob {
|
|
|
1198
1203
|
}
|
|
1199
1204
|
}
|
|
1200
1205
|
}
|
|
1206
|
+
|
|
1207
|
+
/// Errors thrown inside of workflow machines
|
|
1208
|
+
#[derive(thiserror::Error, Debug)]
|
|
1209
|
+
pub(crate) enum WFMachinesError {
|
|
1210
|
+
#[error("Nondeterminism error: {0}")]
|
|
1211
|
+
Nondeterminism(String),
|
|
1212
|
+
#[error("Fatal error in workflow machines: {0}")]
|
|
1213
|
+
Fatal(String),
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
impl WFMachinesError {
|
|
1217
|
+
pub fn evict_reason(&self) -> EvictionReason {
|
|
1218
|
+
match self {
|
|
1219
|
+
WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
|
|
1220
|
+
WFMachinesError::Fatal(_) => EvictionReason::Fatal,
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
impl From<TimestampError> for WFMachinesError {
|
|
1226
|
+
fn from(_: TimestampError) -> Self {
|
|
1227
|
+
Self::Fatal("Could not decode timestamp".to_string())
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
fn auto_fail_to_complete_status(err: WFMachinesError) -> workflow_activation_completion::Status {
|
|
1232
|
+
workflow_activation_completion::Status::Failed(Failure {
|
|
1233
|
+
failure: Some(
|
|
1234
|
+
temporal_sdk_core_protos::temporal::api::failure::v1::Failure {
|
|
1235
|
+
message: "Error while processing workflow task".to_string(),
|
|
1236
|
+
source: err.to_string(),
|
|
1237
|
+
stack_trace: "".to_string(),
|
|
1238
|
+
encoded_attributes: None,
|
|
1239
|
+
cause: None,
|
|
1240
|
+
failure_info: None,
|
|
1241
|
+
},
|
|
1242
|
+
),
|
|
1243
|
+
force_cause: WorkflowTaskFailedCause::from(err.evict_reason()) as i32,
|
|
1244
|
+
})
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
|
|
1248
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
#[derive(derive_more::Constructor)]
|
|
1252
|
+
pub(super) struct LAReqSink {
|
|
1253
|
+
lam: Arc<LocalActivityManager>,
|
|
1254
|
+
/// If we're recording WF inputs, we also need to store immediate resolutions so they're
|
|
1255
|
+
/// available on replay.
|
|
1256
|
+
#[allow(dead_code)] // sometimes appears unused due to feature flagging
|
|
1257
|
+
recorder: Option<UnboundedSender<Vec<u8>>>,
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
impl LocalActivityRequestSink for LAReqSink {
|
|
1261
|
+
fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
|
|
1262
|
+
if reqs.is_empty() {
|
|
1263
|
+
return vec![];
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
#[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
|
|
1267
|
+
let res = self.lam.enqueue(reqs);
|
|
1268
|
+
|
|
1269
|
+
// We always save when there are any reqs, even if the response might be empty, so that
|
|
1270
|
+
// calls/responses are 1:1
|
|
1271
|
+
#[cfg(feature = "save_wf_inputs")]
|
|
1272
|
+
self.write_req(&res);
|
|
1273
|
+
|
|
1274
|
+
res
|
|
1275
|
+
}
|
|
1276
|
+
}
|