@temporalio/core-bridge 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +304 -112
- package/lib/index.d.ts +8 -6
- package/lib/index.js.map +1 -1
- package/package.json +9 -4
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +2 -2
- package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
- package/sdk-core/.buildkite/pipeline.yml +2 -4
- package/sdk-core/.cargo/config.toml +5 -2
- package/sdk-core/.github/workflows/heavy.yml +29 -0
- package/sdk-core/Cargo.toml +1 -1
- package/sdk-core/README.md +20 -10
- package/sdk-core/client/src/lib.rs +215 -39
- package/sdk-core/client/src/metrics.rs +17 -8
- package/sdk-core/client/src/raw.rs +4 -4
- package/sdk-core/client/src/retry.rs +32 -20
- package/sdk-core/core/Cargo.toml +25 -12
- package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
- package/sdk-core/core/src/abstractions.rs +204 -14
- package/sdk-core/core/src/core_tests/activity_tasks.rs +143 -50
- package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
- package/sdk-core/core/src/core_tests/determinism.rs +165 -2
- package/sdk-core/core/src/core_tests/local_activities.rs +431 -43
- package/sdk-core/core/src/core_tests/queries.rs +34 -16
- package/sdk-core/core/src/core_tests/workers.rs +8 -5
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +588 -55
- package/sdk-core/core/src/ephemeral_server/mod.rs +113 -12
- package/sdk-core/core/src/internal_flags.rs +155 -0
- package/sdk-core/core/src/lib.rs +16 -9
- package/sdk-core/core/src/protosext/mod.rs +1 -1
- package/sdk-core/core/src/replay/mod.rs +16 -27
- package/sdk-core/core/src/telemetry/log_export.rs +1 -1
- package/sdk-core/core/src/telemetry/metrics.rs +69 -35
- package/sdk-core/core/src/telemetry/mod.rs +60 -21
- package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
- package/sdk-core/core/src/test_help/mod.rs +73 -14
- package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
- package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- package/sdk-core/core/src/worker/activities/local_activities.rs +379 -129
- package/sdk-core/core/src/worker/activities.rs +350 -175
- package/sdk-core/core/src/worker/client/mocks.rs +22 -2
- package/sdk-core/core/src/worker/client.rs +18 -2
- package/sdk-core/core/src/worker/mod.rs +183 -64
- package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- package/sdk-core/core/src/worker/workflow/history_update.rs +916 -277
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +216 -183
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +9 -12
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +160 -87
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +13 -14
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -9
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +14 -17
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +242 -110
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +27 -19
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +9 -11
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +321 -206
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +13 -18
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +20 -29
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +257 -51
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +310 -150
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +17 -20
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +31 -15
- package/sdk-core/core/src/worker/workflow/managed_run.rs +1052 -380
- package/sdk-core/core/src/worker/workflow/mod.rs +598 -390
- package/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- package/sdk-core/core/src/worker/workflow/wft_extraction.rs +137 -0
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +469 -718
- package/sdk-core/core-api/Cargo.toml +2 -1
- package/sdk-core/core-api/src/errors.rs +1 -34
- package/sdk-core/core-api/src/lib.rs +19 -9
- package/sdk-core/core-api/src/telemetry.rs +4 -6
- package/sdk-core/core-api/src/worker.rs +19 -1
- package/sdk-core/etc/deps.svg +115 -140
- package/sdk-core/etc/regen-depgraph.sh +5 -0
- package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +86 -61
- package/sdk-core/fsm/rustfsm_trait/src/lib.rs +29 -71
- package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
- package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- package/sdk-core/histories/old_change_marker_format.bin +0 -0
- package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
- package/sdk-core/protos/api_upstream/Makefile +6 -6
- package/sdk-core/protos/api_upstream/build/go.mod +7 -0
- package/sdk-core/protos/api_upstream/build/go.sum +5 -0
- package/sdk-core/protos/api_upstream/build/tools.go +29 -0
- package/sdk-core/protos/api_upstream/go.mod +6 -0
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -26
- package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -7
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +8 -8
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +25 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +49 -26
- package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +5 -2
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
- package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
- package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -28
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -4
- package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
- package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
- package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
- package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +67 -60
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
- package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- package/sdk-core/sdk/Cargo.toml +5 -4
- package/sdk-core/sdk/src/lib.rs +108 -26
- package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
- package/sdk-core/sdk/src/workflow_context.rs +24 -17
- package/sdk-core/sdk/src/workflow_future.rs +16 -15
- package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- package/sdk-core/sdk-core-protos/build.rs +36 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +138 -106
- package/sdk-core/sdk-core-protos/src/history_info.rs +10 -1
- package/sdk-core/sdk-core-protos/src/lib.rs +272 -87
- package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- package/sdk-core/test-utils/Cargo.toml +3 -1
- package/sdk-core/test-utils/src/canned_histories.rs +106 -296
- package/sdk-core/test-utils/src/histfetch.rs +1 -1
- package/sdk-core/test-utils/src/lib.rs +82 -23
- package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- package/sdk-core/test-utils/src/workflows.rs +29 -0
- package/sdk-core/tests/fuzzy_workflow.rs +130 -0
- package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- package/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
- package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- package/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- package/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- package/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +161 -72
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
- package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +94 -200
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
- package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +34 -28
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +76 -7
- package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +7 -8
- package/sdk-core/tests/integ_tests/workflow_tests.rs +13 -14
- package/sdk-core/tests/main.rs +3 -13
- package/sdk-core/tests/runner.rs +75 -36
- package/sdk-core/tests/wf_input_replay.rs +32 -0
- package/src/conversions.rs +14 -8
- package/src/runtime.rs +9 -8
- package/ts/index.ts +8 -6
- package/sdk-core/bridge-ffi/Cargo.toml +0 -24
- package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- package/sdk-core/bridge-ffi/build.rs +0 -25
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
- package/sdk-core/bridge-ffi/src/lib.rs +0 -746
- package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
- package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- package/sdk-core/sdk/src/conversions.rs +0 -8
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
use crate::{
|
|
2
|
-
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
|
2
|
+
abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
|
|
3
3
|
protosext::ValidScheduleLA,
|
|
4
4
|
retry_logic::RetryPolicyExt,
|
|
5
|
+
worker::workflow::HeartbeatTimeoutMsg,
|
|
5
6
|
MetricsContext, TaskToken,
|
|
6
7
|
};
|
|
7
|
-
use
|
|
8
|
+
use futures::{stream::BoxStream, Stream};
|
|
9
|
+
use futures_util::{future, future::AbortRegistration, stream, StreamExt};
|
|
10
|
+
use parking_lot::{Mutex, MutexGuard};
|
|
8
11
|
use std::{
|
|
9
|
-
collections::HashMap,
|
|
12
|
+
collections::{hash_map::Entry, HashMap},
|
|
10
13
|
fmt::{Debug, Formatter},
|
|
14
|
+
pin::Pin,
|
|
15
|
+
task::{Context, Poll},
|
|
11
16
|
time::{Duration, Instant, SystemTime},
|
|
12
17
|
};
|
|
13
18
|
use temporal_sdk_core_protos::{
|
|
@@ -15,7 +20,11 @@ use temporal_sdk_core_protos::{
|
|
|
15
20
|
activity_result::{Cancellation, Failure as ActFail, Success},
|
|
16
21
|
activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
|
|
17
22
|
},
|
|
18
|
-
temporal::api::{
|
|
23
|
+
temporal::api::{
|
|
24
|
+
common::v1::WorkflowExecution,
|
|
25
|
+
enums::v1::TimeoutType,
|
|
26
|
+
failure::v1::{failure, Failure as APIFailure, TimeoutFailureInfo},
|
|
27
|
+
},
|
|
19
28
|
};
|
|
20
29
|
use tokio::{
|
|
21
30
|
sync::{
|
|
@@ -25,6 +34,7 @@ use tokio::{
|
|
|
25
34
|
task::JoinHandle,
|
|
26
35
|
time::sleep,
|
|
27
36
|
};
|
|
37
|
+
use tokio_stream::wrappers::UnboundedReceiverStream;
|
|
28
38
|
use tokio_util::sync::CancellationToken;
|
|
29
39
|
|
|
30
40
|
#[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
|
|
@@ -45,10 +55,14 @@ pub(crate) struct LocalInFlightActInfo {
|
|
|
45
55
|
pub la_info: NewLocalAct,
|
|
46
56
|
pub dispatch_time: Instant,
|
|
47
57
|
pub attempt: u32,
|
|
48
|
-
_permit:
|
|
58
|
+
_permit: UsedMeteredSemPermit,
|
|
49
59
|
}
|
|
50
60
|
|
|
51
61
|
#[derive(Debug, Clone)]
|
|
62
|
+
#[cfg_attr(
|
|
63
|
+
feature = "save_wf_inputs",
|
|
64
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
65
|
+
)]
|
|
52
66
|
pub(crate) enum LocalActivityExecutionResult {
|
|
53
67
|
Completed(Success),
|
|
54
68
|
Failed(ActFail),
|
|
@@ -60,11 +74,26 @@ impl LocalActivityExecutionResult {
|
|
|
60
74
|
Self::Cancelled(Cancellation::from_details(None))
|
|
61
75
|
}
|
|
62
76
|
pub(crate) fn timeout(tt: TimeoutType) -> Self {
|
|
63
|
-
Self::TimedOut(ActFail
|
|
77
|
+
Self::TimedOut(ActFail {
|
|
78
|
+
failure: Some(APIFailure {
|
|
79
|
+
message: "Activity timed out".to_string(),
|
|
80
|
+
failure_info: Some(failure::FailureInfo::TimeoutFailureInfo(
|
|
81
|
+
TimeoutFailureInfo {
|
|
82
|
+
timeout_type: tt as i32,
|
|
83
|
+
last_heartbeat_details: None,
|
|
84
|
+
},
|
|
85
|
+
)),
|
|
86
|
+
..Default::default()
|
|
87
|
+
}),
|
|
88
|
+
})
|
|
64
89
|
}
|
|
65
90
|
}
|
|
66
91
|
|
|
67
92
|
#[derive(Debug, Clone)]
|
|
93
|
+
#[cfg_attr(
|
|
94
|
+
feature = "save_wf_inputs",
|
|
95
|
+
derive(serde::Serialize, serde::Deserialize)
|
|
96
|
+
)]
|
|
68
97
|
pub(crate) struct LocalActivityResolution {
|
|
69
98
|
pub seq: u32,
|
|
70
99
|
pub result: LocalActivityExecutionResult,
|
|
@@ -96,6 +125,17 @@ impl Debug for NewLocalAct {
|
|
|
96
125
|
pub(crate) enum LocalActRequest {
|
|
97
126
|
New(NewLocalAct),
|
|
98
127
|
Cancel(ExecutingLAId),
|
|
128
|
+
#[from(ignore)]
|
|
129
|
+
CancelAllInRun(String),
|
|
130
|
+
StartHeartbeatTimeout {
|
|
131
|
+
send_on_elapse: HeartbeatTimeoutMsg,
|
|
132
|
+
deadline: Instant,
|
|
133
|
+
abort_reg: AbortRegistration,
|
|
134
|
+
},
|
|
135
|
+
/// Tell the LA manager that a workflow task was responded to (completed or failed) for a
|
|
136
|
+
/// certain run id
|
|
137
|
+
#[from(ignore)]
|
|
138
|
+
IndicateWorkflowTaskCompleted(String),
|
|
99
139
|
}
|
|
100
140
|
|
|
101
141
|
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
|
@@ -107,28 +147,43 @@ pub(crate) struct ExecutingLAId {
|
|
|
107
147
|
pub(crate) struct LocalActivityManager {
|
|
108
148
|
/// Just so we can provide activity tasks the same namespace as the worker
|
|
109
149
|
namespace: String,
|
|
110
|
-
/// Constrains number of currently executing local activities
|
|
111
|
-
semaphore: MeteredSemaphore,
|
|
112
150
|
/// Sink for new activity execution requests
|
|
113
151
|
act_req_tx: UnboundedSender<NewOrRetry>,
|
|
114
152
|
/// Cancels need a different queue since they should be taken first, and don't take a permit
|
|
115
153
|
cancels_req_tx: UnboundedSender<CancelOrTimeout>,
|
|
154
|
+
/// For the emission of heartbeat timeouts, back into the workflow machines. This channel
|
|
155
|
+
/// needs to come in from above us, because we cannot rely on callers getting the next
|
|
156
|
+
/// activation as a way to deliver heartbeats.
|
|
157
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
116
158
|
/// Wakes every time a complete is processed
|
|
117
159
|
complete_notify: Notify,
|
|
160
|
+
/// Set once workflows have finished shutting down, and thus we know we will no longer receive
|
|
161
|
+
/// any requests to spawn new LAs
|
|
162
|
+
workflows_have_shut_down: CancellationToken,
|
|
118
163
|
|
|
119
164
|
rcvs: tokio::sync::Mutex<RcvChans>,
|
|
120
165
|
shutdown_complete_tok: CancellationToken,
|
|
121
166
|
dat: Mutex<LAMData>,
|
|
122
167
|
}
|
|
123
168
|
|
|
169
|
+
struct LocalActivityInfo {
|
|
170
|
+
task_token: TaskToken,
|
|
171
|
+
/// Tasks for the current backoff until the next retry, if any.
|
|
172
|
+
backing_off_task: Option<JoinHandle<()>>,
|
|
173
|
+
/// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
|
|
174
|
+
/// while the LA id has been generated, but it has not yet been scheduled.
|
|
175
|
+
timeout_bag: Option<TimeoutBag>,
|
|
176
|
+
/// True once the first workflow task this LA started in has elapsed
|
|
177
|
+
first_wft_has_ended: bool,
|
|
178
|
+
/// Attempts at executing this LA during the current WFT
|
|
179
|
+
attempts_in_wft: usize,
|
|
180
|
+
}
|
|
181
|
+
|
|
124
182
|
struct LAMData {
|
|
183
|
+
/// Maps local activity identifiers to information about them
|
|
184
|
+
la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
|
|
125
185
|
/// Activities that have been issued to lang but not yet completed
|
|
126
186
|
outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
|
|
127
|
-
id_to_tt: HashMap<ExecutingLAId, TaskToken>,
|
|
128
|
-
/// Tasks for activities which are currently backing off. May be used to cancel retrying them.
|
|
129
|
-
backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
|
|
130
|
-
/// Tasks for timing out activities which are currently in the queue or dispatched.
|
|
131
|
-
timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
|
|
132
187
|
next_tt_num: u32,
|
|
133
188
|
}
|
|
134
189
|
|
|
@@ -143,42 +198,46 @@ impl LocalActivityManager {
|
|
|
143
198
|
pub(crate) fn new(
|
|
144
199
|
max_concurrent: usize,
|
|
145
200
|
namespace: String,
|
|
201
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
|
146
202
|
metrics_context: MetricsContext,
|
|
147
203
|
) -> Self {
|
|
148
204
|
let (act_req_tx, act_req_rx) = unbounded_channel();
|
|
149
205
|
let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
|
|
150
206
|
let shutdown_complete_tok = CancellationToken::new();
|
|
207
|
+
let semaphore = MeteredSemaphore::new(
|
|
208
|
+
max_concurrent,
|
|
209
|
+
metrics_context,
|
|
210
|
+
MetricsContext::available_task_slots,
|
|
211
|
+
);
|
|
151
212
|
Self {
|
|
152
213
|
namespace,
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
214
|
+
rcvs: tokio::sync::Mutex::new(RcvChans::new(
|
|
215
|
+
act_req_rx,
|
|
216
|
+
semaphore,
|
|
217
|
+
cancels_req_rx,
|
|
218
|
+
shutdown_complete_tok.clone(),
|
|
219
|
+
)),
|
|
158
220
|
act_req_tx,
|
|
159
221
|
cancels_req_tx,
|
|
222
|
+
heartbeat_timeout_tx,
|
|
160
223
|
complete_notify: Notify::new(),
|
|
161
|
-
rcvs: tokio::sync::Mutex::new(RcvChans {
|
|
162
|
-
act_req_rx,
|
|
163
|
-
cancels_req_rx,
|
|
164
|
-
shutdown: shutdown_complete_tok.clone(),
|
|
165
|
-
}),
|
|
166
224
|
shutdown_complete_tok,
|
|
167
225
|
dat: Mutex::new(LAMData {
|
|
168
226
|
outstanding_activity_tasks: Default::default(),
|
|
169
|
-
|
|
170
|
-
backing_off_tasks: Default::default(),
|
|
171
|
-
timeout_tasks: Default::default(),
|
|
227
|
+
la_info: Default::default(),
|
|
172
228
|
next_tt_num: 0,
|
|
173
229
|
}),
|
|
230
|
+
workflows_have_shut_down: Default::default(),
|
|
174
231
|
}
|
|
175
232
|
}
|
|
176
233
|
|
|
177
234
|
#[cfg(test)]
|
|
178
235
|
fn test(max_concurrent: usize) -> Self {
|
|
236
|
+
let (hb_tx, _hb_rx) = unbounded_channel();
|
|
179
237
|
Self::new(
|
|
180
238
|
max_concurrent,
|
|
181
239
|
"fake_ns".to_string(),
|
|
240
|
+
hb_tx,
|
|
182
241
|
MetricsContext::no_op(),
|
|
183
242
|
)
|
|
184
243
|
}
|
|
@@ -190,76 +249,116 @@ impl LocalActivityManager {
|
|
|
190
249
|
|
|
191
250
|
#[cfg(test)]
|
|
192
251
|
fn num_in_backoff(&self) -> usize {
|
|
193
|
-
self.dat
|
|
252
|
+
self.dat
|
|
253
|
+
.lock()
|
|
254
|
+
.la_info
|
|
255
|
+
.values()
|
|
256
|
+
.filter(|lai| lai.backing_off_task.is_some())
|
|
257
|
+
.count()
|
|
194
258
|
}
|
|
195
259
|
|
|
196
260
|
pub(crate) fn enqueue(
|
|
197
261
|
&self,
|
|
198
262
|
reqs: impl IntoIterator<Item = LocalActRequest>,
|
|
199
263
|
) -> Vec<LocalActivityResolution> {
|
|
264
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
265
|
+
dbg_panic!("Tried to enqueue local activity after workflows were shut down");
|
|
266
|
+
return vec![];
|
|
267
|
+
}
|
|
200
268
|
let mut immediate_resolutions = vec![];
|
|
201
269
|
for req in reqs {
|
|
202
|
-
debug!(local_activity = ?req, "Queuing local activity");
|
|
203
270
|
match req {
|
|
204
271
|
LocalActRequest::New(act) => {
|
|
272
|
+
debug!(local_activity=?act, "Queuing local activity");
|
|
205
273
|
let id = ExecutingLAId {
|
|
206
274
|
run_id: act.workflow_exec_info.run_id.clone(),
|
|
207
275
|
seq_num: act.schedule_cmd.seq,
|
|
208
276
|
};
|
|
209
277
|
let mut dlock = self.dat.lock();
|
|
210
|
-
if dlock.id_to_tt.contains_key(&id) {
|
|
211
|
-
// Do not queue local activities which are in fact already executing.
|
|
212
|
-
// This can happen during evictions.
|
|
213
|
-
debug!("Tried to queue already-executing local activity {:?}", &id);
|
|
214
|
-
continue;
|
|
215
|
-
}
|
|
216
|
-
// Pre-generate and insert the task token now, before we may or may not dispatch
|
|
217
|
-
// the activity, so we can enforce idempotency. Prevents two identical LAs
|
|
218
|
-
// ending up in the queue at once.
|
|
219
278
|
let tt = dlock.gen_next_token();
|
|
220
|
-
dlock.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
279
|
+
match dlock.la_info.entry(id) {
|
|
280
|
+
Entry::Occupied(o) => {
|
|
281
|
+
// Do not queue local activities which are in fact already executing.
|
|
282
|
+
// This can happen during evictions.
|
|
283
|
+
debug!(
|
|
284
|
+
"Tried to queue already-executing local activity {:?}",
|
|
285
|
+
o.key()
|
|
286
|
+
);
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
Entry::Vacant(ve) => {
|
|
290
|
+
// Insert the task token now, before we may or may not dispatch the
|
|
291
|
+
// activity, so we can enforce idempotency. Prevents two identical LAs
|
|
292
|
+
// ending up in the queue at once.
|
|
293
|
+
let lai = ve.insert(LocalActivityInfo {
|
|
294
|
+
task_token: tt,
|
|
295
|
+
backing_off_task: None,
|
|
296
|
+
timeout_bag: None,
|
|
297
|
+
first_wft_has_ended: false,
|
|
298
|
+
attempts_in_wft: 0,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// Set up timeouts for the new activity
|
|
302
|
+
match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
|
|
303
|
+
Ok(tb) => {
|
|
304
|
+
lai.timeout_bag = Some(tb);
|
|
305
|
+
|
|
306
|
+
self.act_req_tx.send(NewOrRetry::New(act)).expect(
|
|
307
|
+
"Receive half of LA request channel cannot be dropped",
|
|
308
|
+
);
|
|
309
|
+
}
|
|
310
|
+
Err(res) => immediate_resolutions.push(res),
|
|
311
|
+
}
|
|
230
312
|
}
|
|
231
|
-
Err(res) => immediate_resolutions.push(res),
|
|
232
313
|
}
|
|
233
314
|
}
|
|
315
|
+
LocalActRequest::StartHeartbeatTimeout {
|
|
316
|
+
send_on_elapse,
|
|
317
|
+
deadline,
|
|
318
|
+
abort_reg,
|
|
319
|
+
} => {
|
|
320
|
+
let chan = self.heartbeat_timeout_tx.clone();
|
|
321
|
+
tokio::spawn(future::Abortable::new(
|
|
322
|
+
async move {
|
|
323
|
+
tokio::time::sleep_until(deadline.into()).await;
|
|
324
|
+
let _ = chan.send(send_on_elapse);
|
|
325
|
+
},
|
|
326
|
+
abort_reg,
|
|
327
|
+
));
|
|
328
|
+
}
|
|
234
329
|
LocalActRequest::Cancel(id) => {
|
|
330
|
+
debug!(id=?id, "Cancelling local activity");
|
|
235
331
|
let mut dlock = self.dat.lock();
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
t.abort();
|
|
241
|
-
immediate_resolutions.push(LocalActivityResolution {
|
|
242
|
-
seq: id.seq_num,
|
|
243
|
-
result: LocalActivityExecutionResult::Cancelled(
|
|
244
|
-
Cancellation::from_details(None),
|
|
245
|
-
),
|
|
246
|
-
runtime: Duration::from_secs(0),
|
|
247
|
-
attempt: 0,
|
|
248
|
-
backoff: None,
|
|
249
|
-
original_schedule_time: None,
|
|
250
|
-
});
|
|
251
|
-
continue;
|
|
332
|
+
if let Some(lai) = dlock.la_info.get_mut(&id) {
|
|
333
|
+
if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
|
|
334
|
+
immediate_resolutions.push(immediate_res);
|
|
335
|
+
}
|
|
252
336
|
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
337
|
+
}
|
|
338
|
+
LocalActRequest::CancelAllInRun(run_id) => {
|
|
339
|
+
debug!(run_id=%run_id, "Cancelling all local activities for run");
|
|
340
|
+
let mut dlock = self.dat.lock();
|
|
341
|
+
// Even if we've got 100k+ LAs this should only take a ms or two. Not worth
|
|
342
|
+
// adding another map to keep in sync.
|
|
343
|
+
let las_for_run = dlock
|
|
344
|
+
.la_info
|
|
345
|
+
.iter_mut()
|
|
346
|
+
.filter(|(id, _)| id.run_id == run_id);
|
|
347
|
+
for (laid, lainf) in las_for_run {
|
|
348
|
+
if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
|
|
349
|
+
immediate_resolutions.push(immediate_res);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
|
|
354
|
+
let mut dlock = self.dat.lock();
|
|
355
|
+
let las_for_run = dlock
|
|
356
|
+
.la_info
|
|
357
|
+
.iter_mut()
|
|
358
|
+
.filter(|(id, _)| id.run_id == run_id);
|
|
359
|
+
for (_, lainf) in las_for_run {
|
|
360
|
+
lainf.first_wft_has_ended = true;
|
|
361
|
+
lainf.attempts_in_wft = 0;
|
|
263
362
|
}
|
|
264
363
|
}
|
|
265
364
|
}
|
|
@@ -270,7 +369,7 @@ impl LocalActivityManager {
|
|
|
270
369
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
|
271
370
|
/// and there are no more remaining actions to take.
|
|
272
371
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
|
273
|
-
let (new_or_retry, permit) = match self.rcvs.lock().await.next(
|
|
372
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
|
|
274
373
|
NewOrCancel::Cancel(c) => {
|
|
275
374
|
return match c {
|
|
276
375
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
|
@@ -283,12 +382,13 @@ impl LocalActivityManager {
|
|
|
283
382
|
let tt = self
|
|
284
383
|
.dat
|
|
285
384
|
.lock()
|
|
286
|
-
.
|
|
385
|
+
.la_info
|
|
287
386
|
.get(&ExecutingLAId {
|
|
288
387
|
run_id: run_id.clone(),
|
|
289
388
|
seq_num: resolution.seq,
|
|
290
389
|
})
|
|
291
|
-
.
|
|
390
|
+
.as_ref()
|
|
391
|
+
.map(|lai| lai.task_token.clone());
|
|
292
392
|
if let Some(task_token) = tt {
|
|
293
393
|
self.complete(&task_token, &resolution.result);
|
|
294
394
|
Some(ActivityTask {
|
|
@@ -323,18 +423,21 @@ impl LocalActivityManager {
|
|
|
323
423
|
}
|
|
324
424
|
NewOrRetry::Retry { in_flight, attempt } => (in_flight, attempt),
|
|
325
425
|
};
|
|
326
|
-
let
|
|
426
|
+
let la_info_for_in_flight_map = new_la.clone();
|
|
327
427
|
let id = ExecutingLAId {
|
|
328
428
|
run_id: new_la.workflow_exec_info.run_id.clone(),
|
|
329
429
|
seq_num: new_la.schedule_cmd.seq,
|
|
330
430
|
};
|
|
431
|
+
let orig_sched_time = new_la.schedule_cmd.original_schedule_time;
|
|
331
432
|
let sa = new_la.schedule_cmd;
|
|
332
433
|
|
|
333
434
|
let mut dat = self.dat.lock();
|
|
334
435
|
// If this request originated from a local backoff task, clear the entry for it. We
|
|
335
436
|
// don't await the handle because we know it must already be done, and there's no
|
|
336
437
|
// meaningful value.
|
|
337
|
-
dat.
|
|
438
|
+
dat.la_info
|
|
439
|
+
.get_mut(&id)
|
|
440
|
+
.map(|lai| lai.backing_off_task.take());
|
|
338
441
|
|
|
339
442
|
// If this task sat in the queue for too long, return a timeout for it instead
|
|
340
443
|
if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
|
|
@@ -348,30 +451,27 @@ impl LocalActivityManager {
|
|
|
348
451
|
runtime: sat_for,
|
|
349
452
|
attempt,
|
|
350
453
|
backoff: None,
|
|
351
|
-
original_schedule_time:
|
|
454
|
+
original_schedule_time: orig_sched_time,
|
|
352
455
|
},
|
|
353
456
|
task: None,
|
|
354
457
|
});
|
|
355
458
|
}
|
|
356
459
|
}
|
|
357
460
|
|
|
358
|
-
let
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
.
|
|
362
|
-
|
|
461
|
+
let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
|
|
462
|
+
let tt = la_info.task_token.clone();
|
|
463
|
+
if let Some(to) = la_info.timeout_bag.as_mut() {
|
|
464
|
+
to.mark_started();
|
|
465
|
+
}
|
|
363
466
|
dat.outstanding_activity_tasks.insert(
|
|
364
467
|
tt.clone(),
|
|
365
468
|
LocalInFlightActInfo {
|
|
366
|
-
la_info:
|
|
469
|
+
la_info: la_info_for_in_flight_map,
|
|
367
470
|
dispatch_time: Instant::now(),
|
|
368
471
|
attempt,
|
|
369
|
-
_permit: permit,
|
|
472
|
+
_permit: permit.into_used(),
|
|
370
473
|
},
|
|
371
474
|
);
|
|
372
|
-
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
|
373
|
-
to.mark_started();
|
|
374
|
-
}
|
|
375
475
|
|
|
376
476
|
let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
|
|
377
477
|
Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
|
|
@@ -406,11 +506,23 @@ impl LocalActivityManager {
|
|
|
406
506
|
) -> LACompleteAction {
|
|
407
507
|
let mut dlock = self.dat.lock();
|
|
408
508
|
if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
|
|
509
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
|
510
|
+
// If workflows are already shut down, the results of all this don't matter.
|
|
511
|
+
// Just say we're done if there's nothing outstanding any more.
|
|
512
|
+
self.set_shutdown_complete_if_ready(&mut dlock);
|
|
513
|
+
}
|
|
514
|
+
|
|
409
515
|
let exec_id = ExecutingLAId {
|
|
410
516
|
run_id: info.la_info.workflow_exec_info.run_id.clone(),
|
|
411
517
|
seq_num: info.la_info.schedule_cmd.seq,
|
|
412
518
|
};
|
|
413
|
-
dlock.
|
|
519
|
+
let maybe_old_lai = dlock.la_info.remove(&exec_id);
|
|
520
|
+
if let Some(ref oldlai) = maybe_old_lai {
|
|
521
|
+
if let Some(ref bot) = oldlai.backing_off_task {
|
|
522
|
+
dbg_panic!("Just-resolved LA should not have backoff task");
|
|
523
|
+
bot.abort();
|
|
524
|
+
}
|
|
525
|
+
}
|
|
414
526
|
|
|
415
527
|
match status {
|
|
416
528
|
LocalActivityExecutionResult::Completed(_)
|
|
@@ -446,8 +558,6 @@ impl LocalActivityManager {
|
|
|
446
558
|
}
|
|
447
559
|
// Immediately create a new task token for the to-be-retried LA
|
|
448
560
|
let tt = dlock.gen_next_token();
|
|
449
|
-
dlock.id_to_tt.insert(exec_id.clone(), tt);
|
|
450
|
-
|
|
451
561
|
// Send the retry request after waiting the backoff duration
|
|
452
562
|
let send_chan = self.act_req_tx.clone();
|
|
453
563
|
let jh = tokio::spawn(async move {
|
|
@@ -460,7 +570,22 @@ impl LocalActivityManager {
|
|
|
460
570
|
})
|
|
461
571
|
.expect("Receive half of LA request channel cannot be dropped");
|
|
462
572
|
});
|
|
463
|
-
dlock.
|
|
573
|
+
dlock.la_info.insert(
|
|
574
|
+
exec_id,
|
|
575
|
+
LocalActivityInfo {
|
|
576
|
+
task_token: tt,
|
|
577
|
+
backing_off_task: Some(jh),
|
|
578
|
+
first_wft_has_ended: maybe_old_lai
|
|
579
|
+
.as_ref()
|
|
580
|
+
.map(|old| old.first_wft_has_ended)
|
|
581
|
+
.unwrap_or_default(),
|
|
582
|
+
attempts_in_wft: maybe_old_lai
|
|
583
|
+
.as_ref()
|
|
584
|
+
.map(|old| old.attempts_in_wft + 1)
|
|
585
|
+
.unwrap_or(1),
|
|
586
|
+
timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
|
|
587
|
+
},
|
|
588
|
+
);
|
|
464
589
|
|
|
465
590
|
LACompleteAction::WillBeRetried
|
|
466
591
|
} else {
|
|
@@ -473,11 +598,70 @@ impl LocalActivityManager {
|
|
|
473
598
|
}
|
|
474
599
|
}
|
|
475
600
|
|
|
476
|
-
pub(crate)
|
|
477
|
-
|
|
601
|
+
pub(crate) fn workflows_have_shutdown(&self) {
|
|
602
|
+
self.workflows_have_shut_down.cancel();
|
|
603
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
|
|
607
|
+
while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
|
|
478
608
|
self.complete_notify.notified().await;
|
|
479
609
|
}
|
|
480
|
-
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/// Try to close the activity stream as soon as worker shutdown is initiated. This is required
|
|
613
|
+
/// for activity-only workers where since workflows are not polled and the activity poller might
|
|
614
|
+
/// get "stuck".
|
|
615
|
+
pub(crate) fn shutdown_initiated(&self) {
|
|
616
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
|
|
620
|
+
let dlock = self.dat.lock();
|
|
621
|
+
dlock
|
|
622
|
+
.la_info
|
|
623
|
+
.iter()
|
|
624
|
+
.filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
|
|
625
|
+
.map(|(_, info)| info.attempts_in_wft)
|
|
626
|
+
.sum()
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
|
|
630
|
+
let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
|
|
631
|
+
if nothing_outstanding && self.workflows_have_shut_down.is_cancelled() {
|
|
632
|
+
self.shutdown_complete_tok.cancel();
|
|
633
|
+
}
|
|
634
|
+
nothing_outstanding
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
fn cancel_one_la(
|
|
638
|
+
&self,
|
|
639
|
+
seq: u32,
|
|
640
|
+
lai: &mut LocalActivityInfo,
|
|
641
|
+
) -> Option<LocalActivityResolution> {
|
|
642
|
+
// First check if this ID is currently backing off, if so abort the backoff
|
|
643
|
+
// task
|
|
644
|
+
if let Some(t) = lai.backing_off_task.take() {
|
|
645
|
+
t.abort();
|
|
646
|
+
return Some(LocalActivityResolution {
|
|
647
|
+
seq,
|
|
648
|
+
result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
|
|
649
|
+
runtime: Duration::from_secs(0),
|
|
650
|
+
attempt: 0,
|
|
651
|
+
backoff: None,
|
|
652
|
+
original_schedule_time: None,
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
self.cancels_req_tx
|
|
657
|
+
.send(CancelOrTimeout::Cancel(ActivityTask {
|
|
658
|
+
task_token: lai.task_token.0.clone(),
|
|
659
|
+
variant: Some(activity_task::Variant::Cancel(Cancel {
|
|
660
|
+
reason: ActivityCancelReason::Cancelled as i32,
|
|
661
|
+
})),
|
|
662
|
+
}))
|
|
663
|
+
.expect("Receive half of LA cancel channel cannot be dropped");
|
|
664
|
+
None
|
|
481
665
|
}
|
|
482
666
|
}
|
|
483
667
|
|
|
@@ -521,32 +705,45 @@ enum NewOrCancel {
|
|
|
521
705
|
Cancel(CancelOrTimeout),
|
|
522
706
|
}
|
|
523
707
|
|
|
708
|
+
#[pin_project::pin_project]
|
|
524
709
|
struct RcvChans {
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
/// Cancels to send to lang or apply internally
|
|
528
|
-
cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
|
|
529
|
-
shutdown: CancellationToken,
|
|
710
|
+
#[pin]
|
|
711
|
+
inner: BoxStream<'static, NewOrCancel>,
|
|
530
712
|
}
|
|
531
713
|
|
|
532
714
|
impl RcvChans {
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
715
|
+
fn new(
|
|
716
|
+
new_reqs: UnboundedReceiver<NewOrRetry>,
|
|
717
|
+
new_sem: MeteredSemaphore,
|
|
718
|
+
cancels: UnboundedReceiver<CancelOrTimeout>,
|
|
719
|
+
shutdown_completed: CancellationToken,
|
|
720
|
+
) -> Self {
|
|
721
|
+
let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
|
|
722
|
+
let new_stream = UnboundedReceiverStream::new(new_reqs)
|
|
723
|
+
// Get a permit for each new activity request
|
|
724
|
+
.zip(stream::unfold(new_sem, |new_sem| async move {
|
|
725
|
+
let permit = new_sem
|
|
726
|
+
.acquire_owned()
|
|
727
|
+
.await
|
|
728
|
+
.expect("Local activity semaphore is never closed");
|
|
729
|
+
Some((permit, new_sem))
|
|
730
|
+
}))
|
|
731
|
+
.map(|(req, permit)| NewOrCancel::New(req, permit));
|
|
732
|
+
Self {
|
|
733
|
+
inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
|
|
734
|
+
.take_until(async move { shutdown_completed.cancelled().await })
|
|
735
|
+
.boxed(),
|
|
547
736
|
}
|
|
548
737
|
}
|
|
549
738
|
}
|
|
739
|
+
impl Stream for RcvChans {
|
|
740
|
+
type Item = NewOrCancel;
|
|
741
|
+
|
|
742
|
+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
743
|
+
let this = self.project();
|
|
744
|
+
this.inner.poll_next(cx)
|
|
745
|
+
}
|
|
746
|
+
}
|
|
550
747
|
|
|
551
748
|
struct TimeoutBag {
|
|
552
749
|
sched_to_close_handle: JoinHandle<()>,
|
|
@@ -567,17 +764,21 @@ impl TimeoutBag {
|
|
|
567
764
|
let (schedule_to_close, start_to_close) =
|
|
568
765
|
new_la.schedule_cmd.close_timeouts.into_sched_and_start();
|
|
569
766
|
|
|
767
|
+
let sched_time = new_la
|
|
768
|
+
.schedule_cmd
|
|
769
|
+
.original_schedule_time
|
|
770
|
+
.unwrap_or(new_la.schedule_time);
|
|
570
771
|
let resolution = LocalActivityResolution {
|
|
571
772
|
seq: new_la.schedule_cmd.seq,
|
|
572
773
|
result: LocalActivityExecutionResult::timeout(TimeoutType::ScheduleToClose),
|
|
573
774
|
runtime: Default::default(),
|
|
574
775
|
attempt: new_la.schedule_cmd.attempt,
|
|
575
776
|
backoff: None,
|
|
576
|
-
original_schedule_time:
|
|
777
|
+
original_schedule_time: new_la.schedule_cmd.original_schedule_time,
|
|
577
778
|
};
|
|
578
779
|
// Remove any time already elapsed since the scheduling time
|
|
579
780
|
let schedule_to_close = schedule_to_close
|
|
580
|
-
.map(|s2c| s2c.saturating_sub(
|
|
781
|
+
.map(|s2c| s2c.saturating_sub(sched_time.elapsed().unwrap_or_default()));
|
|
581
782
|
if let Some(ref s2c) = schedule_to_close {
|
|
582
783
|
if s2c.is_zero() {
|
|
583
784
|
return Err(resolution);
|
|
@@ -640,18 +841,19 @@ impl Drop for TimeoutBag {
|
|
|
640
841
|
mod tests {
|
|
641
842
|
use super::*;
|
|
642
843
|
use crate::{prost_dur, protosext::LACloseTimeouts};
|
|
844
|
+
use futures_util::FutureExt;
|
|
643
845
|
use temporal_sdk_core_protos::temporal::api::{
|
|
644
846
|
common::v1::RetryPolicy,
|
|
645
847
|
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
|
646
848
|
};
|
|
647
|
-
use tokio::
|
|
849
|
+
use tokio::task::yield_now;
|
|
648
850
|
|
|
649
851
|
impl DispatchOrTimeoutLA {
|
|
650
852
|
fn unwrap(self) -> ActivityTask {
|
|
651
853
|
match self {
|
|
652
854
|
DispatchOrTimeoutLA::Dispatch(t) => t,
|
|
653
|
-
|
|
654
|
-
panic!("
|
|
855
|
+
_ => {
|
|
856
|
+
panic!("Non-dispatched action returned")
|
|
655
857
|
}
|
|
656
858
|
}
|
|
657
859
|
}
|
|
@@ -1026,18 +1228,66 @@ mod tests {
|
|
|
1026
1228
|
lam.next_pending().await.unwrap().unwrap();
|
|
1027
1229
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1028
1230
|
// There should be nothing else in the queue
|
|
1029
|
-
|
|
1030
|
-
lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
|
|
1031
|
-
TryRecvError::Empty
|
|
1032
|
-
);
|
|
1231
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1033
1232
|
|
|
1034
1233
|
// Verify that if we now enqueue the same act again, after the task is outstanding, we still
|
|
1035
1234
|
// don't add it.
|
|
1036
1235
|
lam.enqueue([new_la.into()]);
|
|
1037
1236
|
assert_eq!(lam.num_outstanding(), 1);
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1237
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
#[tokio::test]
|
|
1241
|
+
async fn nonfirst_la_attempt_count_is_accurate() {
|
|
1242
|
+
let run_id = "run_id";
|
|
1243
|
+
let lam = LocalActivityManager::test(10);
|
|
1244
|
+
let new_la = NewLocalAct {
|
|
1245
|
+
schedule_cmd: ValidScheduleLA {
|
|
1246
|
+
seq: 1,
|
|
1247
|
+
activity_id: 1.to_string(),
|
|
1248
|
+
retry_policy: RetryPolicy {
|
|
1249
|
+
initial_interval: Some(prost_dur!(from_millis(1))),
|
|
1250
|
+
backoff_coefficient: 1.0,
|
|
1251
|
+
..Default::default()
|
|
1252
|
+
},
|
|
1253
|
+
local_retry_threshold: Duration::from_secs(500),
|
|
1254
|
+
..Default::default()
|
|
1255
|
+
},
|
|
1256
|
+
workflow_type: "".to_string(),
|
|
1257
|
+
workflow_exec_info: WorkflowExecution {
|
|
1258
|
+
workflow_id: "".to_string(),
|
|
1259
|
+
run_id: run_id.to_string(),
|
|
1260
|
+
},
|
|
1261
|
+
schedule_time: SystemTime::now(),
|
|
1262
|
+
};
|
|
1263
|
+
lam.enqueue([new_la.clone().into()]);
|
|
1264
|
+
let spinfail = || async {
|
|
1265
|
+
for _ in 1..=10 {
|
|
1266
|
+
let next = lam.next_pending().await.unwrap().unwrap();
|
|
1267
|
+
let tt = TaskToken(next.task_token);
|
|
1268
|
+
lam.complete(
|
|
1269
|
+
&tt,
|
|
1270
|
+
&LocalActivityExecutionResult::Failed(Default::default()),
|
|
1271
|
+
);
|
|
1272
|
+
}
|
|
1273
|
+
};
|
|
1274
|
+
|
|
1275
|
+
// Fail a bunch of times
|
|
1276
|
+
spinfail().await;
|
|
1277
|
+
// Nonfirst attempt count should still be zero
|
|
1278
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
|
1279
|
+
assert_eq!(count, 0);
|
|
1280
|
+
|
|
1281
|
+
for _ in 1..=2 {
|
|
1282
|
+
// This should work over multiple WFTs
|
|
1283
|
+
// say the first wft was completed
|
|
1284
|
+
lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
|
|
1285
|
+
run_id.to_string(),
|
|
1286
|
+
)]);
|
|
1287
|
+
// Do some more attempts
|
|
1288
|
+
spinfail().await;
|
|
1289
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
|
1290
|
+
assert_eq!(count, 10);
|
|
1291
|
+
}
|
|
1042
1292
|
}
|
|
1043
1293
|
}
|