temporalio 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -23
- data/bridge/Cargo.lock +168 -59
- data/bridge/Cargo.toml +4 -2
- data/bridge/sdk-core/README.md +19 -6
- data/bridge/sdk-core/client/src/lib.rs +215 -39
- data/bridge/sdk-core/client/src/metrics.rs +17 -8
- data/bridge/sdk-core/client/src/raw.rs +4 -4
- data/bridge/sdk-core/client/src/retry.rs +32 -20
- data/bridge/sdk-core/core/Cargo.toml +22 -9
- data/bridge/sdk-core/core/src/abstractions.rs +203 -14
- data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +76 -41
- data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
- data/bridge/sdk-core/core/src/core_tests/local_activities.rs +204 -83
- data/bridge/sdk-core/core/src/core_tests/queries.rs +3 -4
- data/bridge/sdk-core/core/src/core_tests/workers.rs +1 -3
- data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +397 -54
- data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
- data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
- data/bridge/sdk-core/core/src/lib.rs +16 -9
- data/bridge/sdk-core/core/src/telemetry/log_export.rs +1 -1
- data/bridge/sdk-core/core/src/telemetry/metrics.rs +69 -35
- data/bridge/sdk-core/core/src/telemetry/mod.rs +29 -13
- data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +17 -12
- data/bridge/sdk-core/core/src/test_help/mod.rs +62 -12
- data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
- data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +352 -122
- data/bridge/sdk-core/core/src/worker/activities.rs +233 -157
- data/bridge/sdk-core/core/src/worker/client/mocks.rs +22 -2
- data/bridge/sdk-core/core/src/worker/client.rs +18 -2
- data/bridge/sdk-core/core/src/worker/mod.rs +165 -58
- data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +856 -277
- data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +100 -43
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +87 -27
- data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +137 -62
- data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +25 -17
- data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +7 -6
- data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +103 -152
- data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +5 -16
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +201 -121
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +11 -14
- data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +30 -15
- data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1026 -376
- data/bridge/sdk-core/core/src/worker/workflow/mod.rs +460 -384
- data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +448 -718
- data/bridge/sdk-core/core-api/Cargo.toml +2 -1
- data/bridge/sdk-core/core-api/src/errors.rs +1 -34
- data/bridge/sdk-core/core-api/src/lib.rs +6 -2
- data/bridge/sdk-core/core-api/src/telemetry.rs +0 -6
- data/bridge/sdk-core/core-api/src/worker.rs +14 -1
- data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
- data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
- data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
- data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
- data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +7 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
- data/bridge/sdk-core/sdk/Cargo.toml +3 -2
- data/bridge/sdk-core/sdk/src/lib.rs +87 -20
- data/bridge/sdk-core/sdk/src/workflow_future.rs +9 -8
- data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- data/bridge/sdk-core/sdk-core-protos/build.rs +36 -1
- data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +100 -87
- data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +5 -1
- data/bridge/sdk-core/sdk-core-protos/src/lib.rs +175 -57
- data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- data/bridge/sdk-core/test-utils/Cargo.toml +3 -1
- data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
- data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
- data/bridge/sdk-core/test-utils/src/lib.rs +82 -23
- data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
- data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
- data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
- data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +72 -191
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
- data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +10 -11
- data/bridge/sdk-core/tests/main.rs +3 -13
- data/bridge/sdk-core/tests/runner.rs +75 -36
- data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
- data/bridge/src/connection.rs +41 -25
- data/bridge/src/lib.rs +269 -14
- data/bridge/src/runtime.rs +1 -1
- data/bridge/src/test_server.rs +153 -0
- data/bridge/src/worker.rs +89 -16
- data/lib/gen/temporal/api/command/v1/message_pb.rb +4 -18
- data/lib/gen/temporal/api/common/v1/message_pb.rb +4 -0
- data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +1 -3
- data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +3 -3
- data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +2 -0
- data/lib/gen/temporal/api/enums/v1/update_pb.rb +6 -4
- data/lib/gen/temporal/api/history/v1/message_pb.rb +27 -19
- data/lib/gen/temporal/api/namespace/v1/message_pb.rb +1 -0
- data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +3 -0
- data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
- data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
- data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
- data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
- data/lib/gen/temporal/api/update/v1/message_pb.rb +72 -0
- data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +26 -16
- data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
- data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
- data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +27 -21
- data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +28 -24
- data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
- data/lib/temporalio/activity/context.rb +13 -8
- data/lib/temporalio/activity/info.rb +1 -1
- data/lib/temporalio/bridge/connect_options.rb +15 -0
- data/lib/temporalio/bridge/retry_config.rb +24 -0
- data/lib/temporalio/bridge/tls_options.rb +19 -0
- data/lib/temporalio/client/implementation.rb +8 -8
- data/lib/temporalio/connection/retry_config.rb +44 -0
- data/lib/temporalio/connection/service.rb +20 -0
- data/lib/temporalio/connection/test_service.rb +92 -0
- data/lib/temporalio/connection/tls_options.rb +51 -0
- data/lib/temporalio/connection/workflow_service.rb +731 -0
- data/lib/temporalio/connection.rb +55 -720
- data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
- data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
- data/lib/temporalio/interceptor/chain.rb +5 -5
- data/lib/temporalio/interceptor/client.rb +8 -4
- data/lib/temporalio/interceptor.rb +22 -0
- data/lib/temporalio/retry_policy.rb +13 -3
- data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
- data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
- data/lib/temporalio/testing/workflow_environment.rb +112 -0
- data/lib/temporalio/testing.rb +175 -0
- data/lib/temporalio/version.rb +1 -1
- data/lib/temporalio/worker/activity_runner.rb +26 -4
- data/lib/temporalio/worker/activity_worker.rb +44 -18
- data/lib/temporalio/worker/sync_worker.rb +47 -11
- data/lib/temporalio/worker.rb +27 -21
- data/lib/temporalio/workflow/async.rb +46 -0
- data/lib/temporalio/workflow/future.rb +138 -0
- data/lib/temporalio/workflow/info.rb +76 -0
- data/temporalio.gemspec +4 -3
- metadata +67 -17
- data/bridge/sdk-core/Cargo.lock +0 -2606
- data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
- data/lib/bridge.so +0 -0
- data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +0 -25
- data/lib/gen/temporal/api/interaction/v1/message_pb.rb +0 -49
- data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
use crate::{
|
|
2
|
-
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
2
|
+
abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
|
|
3
3
|
protosext::ValidScheduleLA,
|
|
4
4
|
retry_logic::RetryPolicyExt,
|
|
5
|
+
worker::workflow::HeartbeatTimeoutMsg,
|
|
5
6
|
MetricsContext, TaskToken,
|
|
6
7
|
};
|
|
7
|
-
use
|
|
8
|
+
use futures::{stream::BoxStream, Stream};
|
|
9
|
+
use futures_util::{future, future::AbortRegistration, stream, StreamExt};
|
|
10
|
+
use parking_lot::{Mutex, MutexGuard};
|
|
8
11
|
use std::{
|
|
9
|
-
collections::HashMap,
|
|
12
|
+
collections::{hash_map::Entry, HashMap},
|
|
10
13
|
fmt::{Debug, Formatter},
|
|
14
|
+
pin::Pin,
|
|
15
|
+
task::{Context, Poll},
|
|
11
16
|
time::{Duration, Instant, SystemTime},
|
|
12
17
|
};
|
|
13
18
|
use temporal_sdk_core_protos::{
|
|
@@ -25,6 +30,7 @@ use tokio::{
|
|
|
25
30
|
task::JoinHandle,
|
|
26
31
|
time::sleep,
|
|
27
32
|
};
|
|
33
|
+
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
28
34
|
use tokio_util::sync::CancellationToken;
|
|
29
35
|
|
|
30
36
|
#[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
|
|
@@ -45,10 +51,14 @@ pub(crate) struct LocalInFlightActInfo {
|
|
|
45
51
|
pub la_info: NewLocalAct,
|
|
46
52
|
pub dispatch_time: Instant,
|
|
47
53
|
pub attempt: u32,
|
|
48
|
-
_permit:
|
|
54
|
+
_permit: UsedMeteredSemPermit,
|
|
49
55
|
}
|
|
50
56
|
|
|
51
57
|
#[derive(Debug, Clone)]
|
|
58
|
+
#[cfg_attr(
|
|
59
|
+
feature = "save_wf_inputs",
|
|
60
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
61
|
+
)]
|
|
52
62
|
pub(crate) enum LocalActivityExecutionResult {
|
|
53
63
|
Completed(Success),
|
|
54
64
|
Failed(ActFail),
|
|
@@ -65,6 +75,10 @@ impl LocalActivityExecutionResult {
|
|
|
65
75
|
}
|
|
66
76
|
|
|
67
77
|
#[derive(Debug, Clone)]
|
|
78
|
+
#[cfg_attr(
|
|
79
|
+
feature = "save_wf_inputs",
|
|
80
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
81
|
+
)]
|
|
68
82
|
pub(crate) struct LocalActivityResolution {
|
|
69
83
|
pub seq: u32,
|
|
70
84
|
pub result: LocalActivityExecutionResult,
|
|
@@ -96,6 +110,17 @@ impl Debug for NewLocalAct {
|
|
|
96
110
|
pub(crate) enum LocalActRequest {
|
|
97
111
|
New(NewLocalAct),
|
|
98
112
|
Cancel(ExecutingLAId),
|
|
113
|
+
#[from(ignore)]
|
|
114
|
+
CancelAllInRun(String),
|
|
115
|
+
StartHeartbeatTimeout {
|
|
116
|
+
send_on_elapse: HeartbeatTimeoutMsg,
|
|
117
|
+
deadline: Instant,
|
|
118
|
+
abort_reg: AbortRegistration,
|
|
119
|
+
},
|
|
120
|
+
/// Tell the LA manager that a workflow task was responded to (completed or failed) for a
|
|
121
|
+
/// certain run id
|
|
122
|
+
#[from(ignore)]
|
|
123
|
+
IndicateWorkflowTaskCompleted(String),
|
|
99
124
|
}
|
|
100
125
|
|
|
101
126
|
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
|
@@ -107,28 +132,43 @@ pub(crate) struct ExecutingLAId {
|
|
|
107
132
|
pub(crate) struct LocalActivityManager {
|
|
108
133
|
/// Just so we can provide activity tasks the same namespace as the worker
|
|
109
134
|
namespace: String,
|
|
110
|
-
/// Constrains number of currently executing local activities
|
|
111
|
-
semaphore: MeteredSemaphore,
|
|
112
135
|
/// Sink for new activity execution requests
|
|
113
136
|
act_req_tx: UnboundedSender<NewOrRetry>,
|
|
114
137
|
/// Cancels need a different queue since they should be taken first, and don't take a permit
|
|
115
138
|
cancels_req_tx: UnboundedSender<CancelOrTimeout>,
|
|
139
|
+
/// For the emission of heartbeat timeouts, back into the workflow machines. This channel
|
|
140
|
+
/// needs to come in from above us, because we cannot rely on callers getting the next
|
|
141
|
+
/// activation as a way to deliver heartbeats.
|
|
142
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
116
143
|
/// Wakes every time a complete is processed
|
|
117
144
|
complete_notify: Notify,
|
|
145
|
+
/// Set once workflows have finished shutting down, and thus we know we will no longer receive
|
|
146
|
+
/// any requests to spawn new LAs
|
|
147
|
+
workflows_have_shut_down: CancellationToken,
|
|
118
148
|
|
|
119
149
|
rcvs: tokio::sync::Mutex<RcvChans>,
|
|
120
150
|
shutdown_complete_tok: CancellationToken,
|
|
121
151
|
dat: Mutex<LAMData>,
|
|
122
152
|
}
|
|
123
153
|
|
|
154
|
+
struct LocalActivityInfo {
|
|
155
|
+
task_token: TaskToken,
|
|
156
|
+
/// Tasks for the current backoff until the next retry, if any.
|
|
157
|
+
backing_off_task: Option<JoinHandle<()>>,
|
|
158
|
+
/// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
|
|
159
|
+
/// while the LA id has been generated, but it has not yet been scheduled.
|
|
160
|
+
timeout_bag: Option<TimeoutBag>,
|
|
161
|
+
/// True once the first workflow task this LA started in has elapsed
|
|
162
|
+
first_wft_has_ended: bool,
|
|
163
|
+
/// Attempts at executing this LA during the current WFT
|
|
164
|
+
attempts_in_wft: usize,
|
|
165
|
+
}
|
|
166
|
+
|
|
124
167
|
struct LAMData {
|
|
168
|
+
/// Maps local activity identifiers to information about them
|
|
169
|
+
la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
|
|
125
170
|
/// Activities that have been issued to lang but not yet completed
|
|
126
171
|
outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
|
|
127
|
-
id_to_tt: HashMap<ExecutingLAId, TaskToken>,
|
|
128
|
-
/// Tasks for activities which are currently backing off. May be used to cancel retrying them.
|
|
129
|
-
backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
|
|
130
|
-
/// Tasks for timing out activities which are currently in the queue or dispatched.
|
|
131
|
-
timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
|
|
132
172
|
next_tt_num: u32,
|
|
133
173
|
}
|
|
134
174
|
|
|
@@ -143,42 +183,46 @@ impl LocalActivityManager {
|
|
|
143
183
|
pub(crate) fn new(
|
|
144
184
|
max_concurrent: usize,
|
|
145
185
|
namespace: String,
|
|
186
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
146
187
|
metrics_context: MetricsContext,
|
|
147
188
|
) -> Self {
|
|
148
189
|
let (act_req_tx, act_req_rx) = unbounded_channel();
|
|
149
190
|
let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
|
|
150
191
|
let shutdown_complete_tok = CancellationToken::new();
|
|
192
|
+
let semaphore = MeteredSemaphore::new(
|
|
193
|
+
max_concurrent,
|
|
194
|
+
metrics_context,
|
|
195
|
+
MetricsContext::available_task_slots,
|
|
196
|
+
);
|
|
151
197
|
Self {
|
|
152
198
|
namespace,
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
199
|
+
rcvs: tokio::sync::Mutex::new(RcvChans::new(
|
|
200
|
+
act_req_rx,
|
|
201
|
+
semaphore,
|
|
202
|
+
cancels_req_rx,
|
|
203
|
+
shutdown_complete_tok.clone(),
|
|
204
|
+
)),
|
|
158
205
|
act_req_tx,
|
|
159
206
|
cancels_req_tx,
|
|
207
|
+
heartbeat_timeout_tx,
|
|
160
208
|
complete_notify: Notify::new(),
|
|
161
|
-
rcvs: tokio::sync::Mutex::new(RcvChans {
|
|
162
|
-
act_req_rx,
|
|
163
|
-
cancels_req_rx,
|
|
164
|
-
shutdown: shutdown_complete_tok.clone(),
|
|
165
|
-
}),
|
|
166
209
|
shutdown_complete_tok,
|
|
167
210
|
dat: Mutex::new(LAMData {
|
|
168
211
|
outstanding_activity_tasks: Default::default(),
|
|
169
|
-
|
|
170
|
-
backing_off_tasks: Default::default(),
|
|
171
|
-
timeout_tasks: Default::default(),
|
|
212
|
+
la_info: Default::default(),
|
|
172
213
|
next_tt_num: 0,
|
|
173
214
|
}),
|
|
215
|
+
workflows_have_shut_down: Default::default(),
|
|
174
216
|
}
|
|
175
217
|
}
|
|
176
218
|
|
|
177
219
|
#[cfg(test)]
|
|
178
220
|
fn test(max_concurrent: usize) -> Self {
|
|
221
|
+
let (hb_tx, _hb_rx) = unbounded_channel();
|
|
179
222
|
Self::new(
|
|
180
223
|
max_concurrent,
|
|
181
224
|
"fake_ns".to_string(),
|
|
225
|
+
hb_tx,
|
|
182
226
|
MetricsContext::no_op(),
|
|
183
227
|
)
|
|
184
228
|
}
|
|
@@ -190,76 +234,116 @@ impl LocalActivityManager {
|
|
|
190
234
|
|
|
191
235
|
#[cfg(test)]
|
|
192
236
|
fn num_in_backoff(&self) -> usize {
|
|
193
|
-
self.dat
|
|
237
|
+
self.dat
|
|
238
|
+
.lock()
|
|
239
|
+
.la_info
|
|
240
|
+
.values()
|
|
241
|
+
.filter(|lai| lai.backing_off_task.is_some())
|
|
242
|
+
.count()
|
|
194
243
|
}
|
|
195
244
|
|
|
196
245
|
pub(crate) fn enqueue(
|
|
197
246
|
&self,
|
|
198
247
|
reqs: impl IntoIterator<Item = LocalActRequest>,
|
|
199
248
|
) -> Vec<LocalActivityResolution> {
|
|
249
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
250
|
+
dbg_panic!("Tried to enqueue local activity after workflows were shut down");
|
|
251
|
+
return vec![];
|
|
252
|
+
}
|
|
200
253
|
let mut immediate_resolutions = vec![];
|
|
201
254
|
for req in reqs {
|
|
202
|
-
debug!(local_activity = ?req, "Queuing local activity");
|
|
203
255
|
match req {
|
|
204
256
|
LocalActRequest::New(act) => {
|
|
257
|
+
debug!(local_activity=?act, "Queuing local activity");
|
|
205
258
|
let id = ExecutingLAId {
|
|
206
259
|
run_id: act.workflow_exec_info.run_id.clone(),
|
|
207
260
|
seq_num: act.schedule_cmd.seq,
|
|
208
261
|
};
|
|
209
262
|
let mut dlock = self.dat.lock();
|
|
210
|
-
if dlock.id_to_tt.contains_key(&id) {
|
|
211
|
-
// Do not queue local activities which are in fact already executing.
|
|
212
|
-
// This can happen during evictions.
|
|
213
|
-
debug!("Tried to queue already-executing local activity {:?}", &id);
|
|
214
|
-
continue;
|
|
215
|
-
}
|
|
216
|
-
// Pre-generate and insert the task token now, before we may or may not dispatch
|
|
217
|
-
// the activity, so we can enforce idempotency. Prevents two identical LAs
|
|
218
|
-
// ending up in the queue at once.
|
|
219
263
|
let tt = dlock.gen_next_token();
|
|
220
|
-
dlock.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
264
|
+
match dlock.la_info.entry(id) {
|
|
265
|
+
Entry::Occupied(o) => {
|
|
266
|
+
// Do not queue local activities which are in fact already executing.
|
|
267
|
+
// This can happen during evictions.
|
|
268
|
+
debug!(
|
|
269
|
+
"Tried to queue already-executing local activity {:?}",
|
|
270
|
+
o.key()
|
|
271
|
+
);
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
Entry::Vacant(ve) => {
|
|
275
|
+
// Insert the task token now, before we may or may not dispatch the
|
|
276
|
+
// activity, so we can enforce idempotency. Prevents two identical LAs
|
|
277
|
+
// ending up in the queue at once.
|
|
278
|
+
let lai = ve.insert(LocalActivityInfo {
|
|
279
|
+
task_token: tt,
|
|
280
|
+
backing_off_task: None,
|
|
281
|
+
timeout_bag: None,
|
|
282
|
+
first_wft_has_ended: false,
|
|
283
|
+
attempts_in_wft: 0,
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
// Set up timeouts for the new activity
|
|
287
|
+
match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
|
|
288
|
+
Ok(tb) => {
|
|
289
|
+
lai.timeout_bag = Some(tb);
|
|
290
|
+
|
|
291
|
+
self.act_req_tx.send(NewOrRetry::New(act)).expect(
|
|
292
|
+
"Receive half of LA request channel cannot be dropped",
|
|
293
|
+
);
|
|
294
|
+
}
|
|
295
|
+
Err(res) => immediate_resolutions.push(res),
|
|
296
|
+
}
|
|
230
297
|
}
|
|
231
|
-
Err(res) => immediate_resolutions.push(res),
|
|
232
298
|
}
|
|
233
299
|
}
|
|
300
|
+
LocalActRequest::StartHeartbeatTimeout {
|
|
301
|
+
send_on_elapse,
|
|
302
|
+
deadline,
|
|
303
|
+
abort_reg,
|
|
304
|
+
} => {
|
|
305
|
+
let chan = self.heartbeat_timeout_tx.clone();
|
|
306
|
+
tokio::spawn(future::Abortable::new(
|
|
307
|
+
async move {
|
|
308
|
+
tokio::time::sleep_until(deadline.into()).await;
|
|
309
|
+
let _ = chan.send(send_on_elapse);
|
|
310
|
+
},
|
|
311
|
+
abort_reg,
|
|
312
|
+
));
|
|
313
|
+
}
|
|
234
314
|
LocalActRequest::Cancel(id) => {
|
|
315
|
+
debug!(id=?id, "Cancelling local activity");
|
|
235
316
|
let mut dlock = self.dat.lock();
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
t.abort();
|
|
241
|
-
immediate_resolutions.push(LocalActivityResolution {
|
|
242
|
-
seq: id.seq_num,
|
|
243
|
-
result: LocalActivityExecutionResult::Cancelled(
|
|
244
|
-
Cancellation::from_details(None),
|
|
245
|
-
),
|
|
246
|
-
runtime: Duration::from_secs(0),
|
|
247
|
-
attempt: 0,
|
|
248
|
-
backoff: None,
|
|
249
|
-
original_schedule_time: None,
|
|
250
|
-
});
|
|
251
|
-
continue;
|
|
317
|
+
if let Some(lai) = dlock.la_info.get_mut(&id) {
|
|
318
|
+
if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
|
|
319
|
+
immediate_resolutions.push(immediate_res);
|
|
320
|
+
}
|
|
252
321
|
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
322
|
+
}
|
|
323
|
+
LocalActRequest::CancelAllInRun(run_id) => {
|
|
324
|
+
debug!(run_id=%run_id, "Cancelling all local activities for run");
|
|
325
|
+
let mut dlock = self.dat.lock();
|
|
326
|
+
// Even if we've got 100k+ LAs this should only take a ms or two. Not worth
|
|
327
|
+
// adding another map to keep in sync.
|
|
328
|
+
let las_for_run = dlock
|
|
329
|
+
.la_info
|
|
330
|
+
.iter_mut()
|
|
331
|
+
.filter(|(id, _)| id.run_id == run_id);
|
|
332
|
+
for (laid, lainf) in las_for_run {
|
|
333
|
+
if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
|
|
334
|
+
immediate_resolutions.push(immediate_res);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
|
|
339
|
+
let mut dlock = self.dat.lock();
|
|
340
|
+
let las_for_run = dlock
|
|
341
|
+
.la_info
|
|
342
|
+
.iter_mut()
|
|
343
|
+
.filter(|(id, _)| id.run_id == run_id);
|
|
344
|
+
for (_, lainf) in las_for_run {
|
|
345
|
+
lainf.first_wft_has_ended = true;
|
|
346
|
+
lainf.attempts_in_wft = 0;
|
|
263
347
|
}
|
|
264
348
|
}
|
|
265
349
|
}
|
|
@@ -270,7 +354,7 @@ impl LocalActivityManager {
|
|
|
270
354
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
|
271
355
|
/// and there are no more remaining actions to take.
|
|
272
356
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
|
273
|
-
let (new_or_retry, permit) = match self.rcvs.lock().await.next(
|
|
357
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
|
|
274
358
|
NewOrCancel::Cancel(c) => {
|
|
275
359
|
return match c {
|
|
276
360
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
|
@@ -283,12 +367,13 @@ impl LocalActivityManager {
|
|
|
283
367
|
let tt = self
|
|
284
368
|
.dat
|
|
285
369
|
.lock()
|
|
286
|
-
.
|
|
370
|
+
.la_info
|
|
287
371
|
.get(&ExecutingLAId {
|
|
288
372
|
run_id: run_id.clone(),
|
|
289
373
|
seq_num: resolution.seq,
|
|
290
374
|
})
|
|
291
|
-
.
|
|
375
|
+
.as_ref()
|
|
376
|
+
.map(|lai| lai.task_token.clone());
|
|
292
377
|
if let Some(task_token) = tt {
|
|
293
378
|
self.complete(&task_token, &resolution.result);
|
|
294
379
|
Some(ActivityTask {
|
|
@@ -335,7 +420,9 @@ impl LocalActivityManager {
|
|
|
335
420
|
// If this request originated from a local backoff task, clear the entry for it. We
|
|
336
421
|
// don't await the handle because we know it must already be done, and there's no
|
|
337
422
|
// meaningful value.
|
|
338
|
-
dat.
|
|
423
|
+
dat.la_info
|
|
424
|
+
.get_mut(&id)
|
|
425
|
+
.map(|lai| lai.backing_off_task.take());
|
|
339
426
|
|
|
340
427
|
// If this task sat in the queue for too long, return a timeout for it instead
|
|
341
428
|
if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
|
|
@@ -356,23 +443,20 @@ impl LocalActivityManager {
|
|
|
356
443
|
}
|
|
357
444
|
}
|
|
358
445
|
|
|
359
|
-
let
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
.
|
|
363
|
-
|
|
446
|
+
let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
|
|
447
|
+
let tt = la_info.task_token.clone();
|
|
448
|
+
if let Some(to) = la_info.timeout_bag.as_mut() {
|
|
449
|
+
to.mark_started();
|
|
450
|
+
}
|
|
364
451
|
dat.outstanding_activity_tasks.insert(
|
|
365
452
|
tt.clone(),
|
|
366
453
|
LocalInFlightActInfo {
|
|
367
454
|
la_info: la_info_for_in_flight_map,
|
|
368
455
|
dispatch_time: Instant::now(),
|
|
369
456
|
attempt,
|
|
370
|
-
_permit: permit,
|
|
457
|
+
_permit: permit.into_used(),
|
|
371
458
|
},
|
|
372
459
|
);
|
|
373
|
-
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
|
374
|
-
to.mark_started();
|
|
375
|
-
}
|
|
376
460
|
|
|
377
461
|
let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
|
|
378
462
|
Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
|
|
@@ -407,11 +491,23 @@ impl LocalActivityManager {
|
|
|
407
491
|
) -> LACompleteAction {
|
|
408
492
|
let mut dlock = self.dat.lock();
|
|
409
493
|
if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
|
|
494
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
495
|
+
// If workflows are already shut down, the results of all this don't matter.
|
|
496
|
+
// Just say we're done if there's nothing outstanding any more.
|
|
497
|
+
self.set_shutdown_complete_if_ready(&mut dlock);
|
|
498
|
+
}
|
|
499
|
+
|
|
410
500
|
let exec_id = ExecutingLAId {
|
|
411
501
|
run_id: info.la_info.workflow_exec_info.run_id.clone(),
|
|
412
502
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
413
503
|
};
|
|
414
|
-
dlock.
|
|
504
|
+
let maybe_old_lai = dlock.la_info.remove(&exec_id);
|
|
505
|
+
if let Some(ref oldlai) = maybe_old_lai {
|
|
506
|
+
if let Some(ref bot) = oldlai.backing_off_task {
|
|
507
|
+
dbg_panic!("Just-resolved LA should not have backoff task");
|
|
508
|
+
bot.abort();
|
|
509
|
+
}
|
|
510
|
+
}
|
|
415
511
|
|
|
416
512
|
match status {
|
|
417
513
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -447,8 +543,6 @@ impl LocalActivityManager {
|
|
|
447
543
|
}
|
|
448
544
|
// Immediately create a new task token for the to-be-retried LA
|
|
449
545
|
let tt = dlock.gen_next_token();
|
|
450
|
-
dlock.id_to_tt.insert(exec_id.clone(), tt);
|
|
451
|
-
|
|
452
546
|
// Send the retry request after waiting the backoff duration
|
|
453
547
|
let send_chan = self.act_req_tx.clone();
|
|
454
548
|
let jh = tokio::spawn(async move {
|
|
@@ -461,7 +555,22 @@ impl LocalActivityManager {
|
|
|
461
555
|
})
|
|
462
556
|
.expect("Receive half of LA request channel cannot be dropped");
|
|
463
557
|
});
|
|
464
|
-
dlock.
|
|
558
|
+
dlock.la_info.insert(
|
|
559
|
+
exec_id,
|
|
560
|
+
LocalActivityInfo {
|
|
561
|
+
task_token: tt,
|
|
562
|
+
backing_off_task: Some(jh),
|
|
563
|
+
first_wft_has_ended: maybe_old_lai
|
|
564
|
+
.as_ref()
|
|
565
|
+
.map(|old| old.first_wft_has_ended)
|
|
566
|
+
.unwrap_or_default(),
|
|
567
|
+
attempts_in_wft: maybe_old_lai
|
|
568
|
+
.as_ref()
|
|
569
|
+
.map(|old| old.attempts_in_wft + 1)
|
|
570
|
+
.unwrap_or(1),
|
|
571
|
+
timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
|
|
572
|
+
},
|
|
573
|
+
);
|
|
465
574
|
|
|
466
575
|
LACompleteAction::WillBeRetried
|
|
467
576
|
} else {
|
|
@@ -474,11 +583,70 @@ impl LocalActivityManager {
|
|
|
474
583
|
}
|
|
475
584
|
}
|
|
476
585
|
|
|
477
|
-
pub(crate)
|
|
478
|
-
|
|
586
|
+
pub(crate) fn workflows_have_shutdown(&self) {
|
|
587
|
+
self.workflows_have_shut_down.cancel();
|
|
588
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
|
|
592
|
+
while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
|
|
479
593
|
self.complete_notify.notified().await;
|
|
480
594
|
}
|
|
481
|
-
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/// Try to close the activity stream as soon as worker shutdown is initiated.
|
|
598
|
+
/// This is required for activity-only workers where since workflows are not polled and the activity poller might
|
|
599
|
+
/// get "stuck".
|
|
600
|
+
pub(crate) fn shutdown_initiated(&self) {
|
|
601
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
|
|
605
|
+
let dlock = self.dat.lock();
|
|
606
|
+
dlock
|
|
607
|
+
.la_info
|
|
608
|
+
.iter()
|
|
609
|
+
.filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
|
|
610
|
+
.map(|(_, info)| info.attempts_in_wft)
|
|
611
|
+
.sum()
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
|
|
615
|
+
let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
|
|
616
|
+
if nothing_outstanding {
|
|
617
|
+
self.shutdown_complete_tok.cancel();
|
|
618
|
+
}
|
|
619
|
+
nothing_outstanding
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
fn cancel_one_la(
|
|
623
|
+
&self,
|
|
624
|
+
seq: u32,
|
|
625
|
+
lai: &mut LocalActivityInfo,
|
|
626
|
+
) -> Option<LocalActivityResolution> {
|
|
627
|
+
// First check if this ID is currently backing off, if so abort the backoff
|
|
628
|
+
// task
|
|
629
|
+
if let Some(t) = lai.backing_off_task.take() {
|
|
630
|
+
t.abort();
|
|
631
|
+
return Some(LocalActivityResolution {
|
|
632
|
+
seq,
|
|
633
|
+
result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
|
|
634
|
+
runtime: Duration::from_secs(0),
|
|
635
|
+
attempt: 0,
|
|
636
|
+
backoff: None,
|
|
637
|
+
original_schedule_time: None,
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
self.cancels_req_tx
|
|
642
|
+
.send(CancelOrTimeout::Cancel(ActivityTask {
|
|
643
|
+
task_token: lai.task_token.0.clone(),
|
|
644
|
+
variant: Some(activity_task::Variant::Cancel(Cancel {
|
|
645
|
+
reason: ActivityCancelReason::Cancelled as i32,
|
|
646
|
+
})),
|
|
647
|
+
}))
|
|
648
|
+
.expect("Receive half of LA cancel channel cannot be dropped");
|
|
649
|
+
None
|
|
482
650
|
}
|
|
483
651
|
}
|
|
484
652
|
|
|
@@ -522,32 +690,45 @@ enum NewOrCancel {
|
|
|
522
690
|
Cancel(CancelOrTimeout),
|
|
523
691
|
}
|
|
524
692
|
|
|
693
|
+
#[pin_project::pin_project]
|
|
525
694
|
struct RcvChans {
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
/// Cancels to send to lang or apply internally
|
|
529
|
-
cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
|
|
530
|
-
shutdown: CancellationToken,
|
|
695
|
+
#[pin]
|
|
696
|
+
inner: BoxStream<'static, NewOrCancel>,
|
|
531
697
|
}
|
|
532
698
|
|
|
533
699
|
impl RcvChans {
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
700
|
+
fn new(
|
|
701
|
+
new_reqs: UnboundedReceiver<NewOrRetry>,
|
|
702
|
+
new_sem: MeteredSemaphore,
|
|
703
|
+
cancels: UnboundedReceiver<CancelOrTimeout>,
|
|
704
|
+
shutdown_completed: CancellationToken,
|
|
705
|
+
) -> Self {
|
|
706
|
+
let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
|
|
707
|
+
let new_stream = UnboundedReceiverStream::new(new_reqs)
|
|
708
|
+
// Get a permit for each new activity request
|
|
709
|
+
.zip(stream::unfold(new_sem, |new_sem| async move {
|
|
710
|
+
let permit = new_sem
|
|
711
|
+
.acquire_owned()
|
|
712
|
+
.await
|
|
713
|
+
.expect("Local activity semaphore is never closed");
|
|
714
|
+
Some((permit, new_sem))
|
|
715
|
+
}))
|
|
716
|
+
.map(|(req, permit)| NewOrCancel::New(req, permit));
|
|
717
|
+
Self {
|
|
718
|
+
inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
|
|
719
|
+
.take_until(async move { shutdown_completed.cancelled().await })
|
|
720
|
+
.boxed(),
|
|
548
721
|
}
|
|
549
722
|
}
|
|
550
723
|
}
|
|
724
|
+
impl Stream for RcvChans {
|
|
725
|
+
type Item = NewOrCancel;
|
|
726
|
+
|
|
727
|
+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
728
|
+
let this = self.project();
|
|
729
|
+
this.inner.poll_next(cx)
|
|
730
|
+
}
|
|
731
|
+
}
|
|
551
732
|
|
|
552
733
|
struct TimeoutBag {
|
|
553
734
|
sched_to_close_handle: JoinHandle<()>,
|
|
@@ -645,18 +826,19 @@ impl Drop for TimeoutBag {
|
|
|
645
826
|
mod tests {
|
|
646
827
|
use super::*;
|
|
647
828
|
use crate::{prost_dur, protosext::LACloseTimeouts};
|
|
829
|
+
use futures_util::FutureExt;
|
|
648
830
|
use temporal_sdk_core_protos::temporal::api::{
|
|
649
831
|
common::v1::RetryPolicy,
|
|
650
832
|
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
|
651
833
|
};
|
|
652
|
-
use tokio::
|
|
834
|
+
use tokio::task::yield_now;
|
|
653
835
|
|
|
654
836
|
impl DispatchOrTimeoutLA {
|
|
655
837
|
fn unwrap(self) -> ActivityTask {
|
|
656
838
|
match self {
|
|
657
839
|
DispatchOrTimeoutLA::Dispatch(t) => t,
|
|
658
|
-
|
|
659
|
-
panic!("
|
|
840
|
+
_ => {
|
|
841
|
+
panic!("Non-dispatched action returned")
|
|
660
842
|
}
|
|
661
843
|
}
|
|
662
844
|
}
|
|
@@ -1031,18 +1213,66 @@ mod tests {
|
|
|
1031
1213
|
lam.next_pending().await.unwrap().unwrap();
|
|
1032
1214
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1033
1215
|
// There should be nothing else in the queue
|
|
1034
|
-
|
|
1035
|
-
lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
|
|
1036
|
-
TryRecvError::Empty
|
|
1037
|
-
);
|
|
1216
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1038
1217
|
|
|
1039
1218
|
// Verify that if we now enqueue the same act again, after the task is outstanding, we still
|
|
1040
1219
|
// don't add it.
|
|
1041
1220
|
lam.enqueue([new_la.into()]);
|
|
1042
1221
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1222
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
#[tokio::test]
|
|
1226
|
+
async fn nonfirst_la_attempt_count_is_accurate() {
|
|
1227
|
+
let run_id = "run_id";
|
|
1228
|
+
let lam = LocalActivityManager::test(10);
|
|
1229
|
+
let new_la = NewLocalAct {
|
|
1230
|
+
schedule_cmd: ValidScheduleLA {
|
|
1231
|
+
seq: 1,
|
|
1232
|
+
activity_id: 1.to_string(),
|
|
1233
|
+
retry_policy: RetryPolicy {
|
|
1234
|
+
initial_interval: Some(prost_dur!(from_millis(1))),
|
|
1235
|
+
backoff_coefficient: 1.0,
|
|
1236
|
+
..Default::default()
|
|
1237
|
+
},
|
|
1238
|
+
local_retry_threshold: Duration::from_secs(500),
|
|
1239
|
+
..Default::default()
|
|
1240
|
+
},
|
|
1241
|
+
workflow_type: "".to_string(),
|
|
1242
|
+
workflow_exec_info: WorkflowExecution {
|
|
1243
|
+
workflow_id: "".to_string(),
|
|
1244
|
+
run_id: run_id.to_string(),
|
|
1245
|
+
},
|
|
1246
|
+
schedule_time: SystemTime::now(),
|
|
1247
|
+
};
|
|
1248
|
+
lam.enqueue([new_la.clone().into()]);
|
|
1249
|
+
let spinfail = || async {
|
|
1250
|
+
for _ in 1..=10 {
|
|
1251
|
+
let next = lam.next_pending().await.unwrap().unwrap();
|
|
1252
|
+
let tt = TaskToken(next.task_token);
|
|
1253
|
+
lam.complete(
|
|
1254
|
+
&tt,
|
|
1255
|
+
&LocalActivityExecutionResult::Failed(Default::default()),
|
|
1256
|
+
);
|
|
1257
|
+
}
|
|
1258
|
+
};
|
|
1259
|
+
|
|
1260
|
+
// Fail a bunch of times
|
|
1261
|
+
spinfail().await;
|
|
1262
|
+
// Nonfirst attempt count should still be zero
|
|
1263
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
|
1264
|
+
assert_eq!(count, 0);
|
|
1265
|
+
|
|
1266
|
+
for _ in 1..=2 {
|
|
1267
|
+
// This should work over multiple WFTs
|
|
1268
|
+
// say the first wft was completed
|
|
1269
|
+
lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
|
|
1270
|
+
run_id.to_string(),
|
|
1271
|
+
)]);
|
|
1272
|
+
// Do some more attempts
|
|
1273
|
+
spinfail().await;
|
|
1274
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
|
1275
|
+
assert_eq!(count, 10);
|
|
1276
|
+
}
|
|
1047
1277
|
}
|
|
1048
1278
|
}
|