@temporalio/core-bridge 1.5.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/Cargo.lock +304 -112
  2. package/lib/index.d.ts +8 -6
  3. package/lib/index.js.map +1 -1
  4. package/package.json +9 -4
  5. package/releases/aarch64-apple-darwin/index.node +0 -0
  6. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  7. package/releases/x86_64-apple-darwin/index.node +0 -0
  8. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  9. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  10. package/sdk-core/.buildkite/docker/Dockerfile +2 -2
  11. package/sdk-core/.buildkite/docker/docker-compose.yaml +1 -1
  12. package/sdk-core/.buildkite/pipeline.yml +2 -4
  13. package/sdk-core/.cargo/config.toml +5 -2
  14. package/sdk-core/.github/workflows/heavy.yml +29 -0
  15. package/sdk-core/Cargo.toml +1 -1
  16. package/sdk-core/README.md +20 -10
  17. package/sdk-core/client/src/lib.rs +215 -39
  18. package/sdk-core/client/src/metrics.rs +17 -8
  19. package/sdk-core/client/src/raw.rs +4 -4
  20. package/sdk-core/client/src/retry.rs +32 -20
  21. package/sdk-core/core/Cargo.toml +25 -12
  22. package/sdk-core/core/src/abstractions/take_cell.rs +28 -0
  23. package/sdk-core/core/src/abstractions.rs +204 -14
  24. package/sdk-core/core/src/core_tests/activity_tasks.rs +143 -50
  25. package/sdk-core/core/src/core_tests/child_workflows.rs +6 -5
  26. package/sdk-core/core/src/core_tests/determinism.rs +165 -2
  27. package/sdk-core/core/src/core_tests/local_activities.rs +431 -43
  28. package/sdk-core/core/src/core_tests/queries.rs +34 -16
  29. package/sdk-core/core/src/core_tests/workers.rs +8 -5
  30. package/sdk-core/core/src/core_tests/workflow_tasks.rs +588 -55
  31. package/sdk-core/core/src/ephemeral_server/mod.rs +113 -12
  32. package/sdk-core/core/src/internal_flags.rs +155 -0
  33. package/sdk-core/core/src/lib.rs +16 -9
  34. package/sdk-core/core/src/protosext/mod.rs +1 -1
  35. package/sdk-core/core/src/replay/mod.rs +16 -27
  36. package/sdk-core/core/src/telemetry/log_export.rs +1 -1
  37. package/sdk-core/core/src/telemetry/metrics.rs +69 -35
  38. package/sdk-core/core/src/telemetry/mod.rs +60 -21
  39. package/sdk-core/core/src/telemetry/prometheus_server.rs +19 -13
  40. package/sdk-core/core/src/test_help/mod.rs +73 -14
  41. package/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +119 -160
  42. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  43. package/sdk-core/core/src/worker/activities/local_activities.rs +379 -129
  44. package/sdk-core/core/src/worker/activities.rs +350 -175
  45. package/sdk-core/core/src/worker/client/mocks.rs +22 -2
  46. package/sdk-core/core/src/worker/client.rs +18 -2
  47. package/sdk-core/core/src/worker/mod.rs +183 -64
  48. package/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  49. package/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
  50. package/sdk-core/core/src/worker/workflow/history_update.rs +916 -277
  51. package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +216 -183
  52. package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +9 -12
  53. package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +7 -9
  54. package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +160 -87
  55. package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +13 -14
  56. package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -9
  57. package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +14 -17
  58. package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +242 -110
  59. package/sdk-core/core/src/worker/workflow/machines/mod.rs +27 -19
  60. package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +9 -11
  61. package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +321 -206
  62. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +13 -18
  63. package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +20 -29
  64. package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
  65. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +257 -51
  66. package/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
  67. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +310 -150
  68. package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +17 -20
  69. package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +31 -15
  70. package/sdk-core/core/src/worker/workflow/managed_run.rs +1052 -380
  71. package/sdk-core/core/src/worker/workflow/mod.rs +598 -390
  72. package/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
  73. package/sdk-core/core/src/worker/workflow/wft_extraction.rs +137 -0
  74. package/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
  75. package/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
  76. package/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  77. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +469 -718
  78. package/sdk-core/core-api/Cargo.toml +2 -1
  79. package/sdk-core/core-api/src/errors.rs +1 -34
  80. package/sdk-core/core-api/src/lib.rs +19 -9
  81. package/sdk-core/core-api/src/telemetry.rs +4 -6
  82. package/sdk-core/core-api/src/worker.rs +19 -1
  83. package/sdk-core/etc/deps.svg +115 -140
  84. package/sdk-core/etc/regen-depgraph.sh +5 -0
  85. package/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +86 -61
  86. package/sdk-core/fsm/rustfsm_trait/src/lib.rs +29 -71
  87. package/sdk-core/histories/ends_empty_wft_complete.bin +0 -0
  88. package/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  89. package/sdk-core/histories/old_change_marker_format.bin +0 -0
  90. package/sdk-core/protos/api_upstream/.github/CODEOWNERS +2 -1
  91. package/sdk-core/protos/api_upstream/Makefile +6 -6
  92. package/sdk-core/protos/api_upstream/build/go.mod +7 -0
  93. package/sdk-core/protos/api_upstream/build/go.sum +5 -0
  94. package/sdk-core/protos/api_upstream/build/tools.go +29 -0
  95. package/sdk-core/protos/api_upstream/go.mod +6 -0
  96. package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +9 -2
  97. package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +7 -26
  98. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
  99. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
  100. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +3 -7
  101. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
  102. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +8 -8
  103. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +25 -2
  104. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
  105. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
  106. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
  107. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
  108. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
  109. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
  110. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
  111. package/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
  112. package/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  113. package/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
  114. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +49 -26
  115. package/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
  116. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +5 -2
  117. package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -2
  118. package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
  119. package/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
  120. package/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
  121. package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +2 -2
  122. package/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  123. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
  124. package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
  125. package/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
  126. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -2
  127. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +64 -28
  128. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +4 -4
  129. package/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +7 -8
  130. package/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +10 -7
  131. package/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +19 -30
  132. package/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  133. package/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  134. package/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +8 -0
  135. package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +67 -60
  136. package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +85 -84
  137. package/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +9 -3
  138. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
  139. package/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
  140. package/sdk-core/sdk/Cargo.toml +5 -4
  141. package/sdk-core/sdk/src/lib.rs +108 -26
  142. package/sdk-core/sdk/src/workflow_context/options.rs +7 -1
  143. package/sdk-core/sdk/src/workflow_context.rs +24 -17
  144. package/sdk-core/sdk/src/workflow_future.rs +16 -15
  145. package/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  146. package/sdk-core/sdk-core-protos/build.rs +36 -2
  147. package/sdk-core/sdk-core-protos/src/history_builder.rs +138 -106
  148. package/sdk-core/sdk-core-protos/src/history_info.rs +10 -1
  149. package/sdk-core/sdk-core-protos/src/lib.rs +272 -87
  150. package/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  151. package/sdk-core/test-utils/Cargo.toml +3 -1
  152. package/sdk-core/test-utils/src/canned_histories.rs +106 -296
  153. package/sdk-core/test-utils/src/histfetch.rs +1 -1
  154. package/sdk-core/test-utils/src/lib.rs +82 -23
  155. package/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  156. package/sdk-core/test-utils/src/workflows.rs +29 -0
  157. package/sdk-core/tests/fuzzy_workflow.rs +130 -0
  158. package/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
  159. package/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  160. package/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
  161. package/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
  162. package/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
  163. package/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
  164. package/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
  165. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +161 -72
  166. package/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  167. package/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
  168. package/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +80 -3
  169. package/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
  170. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
  171. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +94 -200
  172. package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +2 -4
  173. package/sdk-core/tests/integ_tests/workflow_tests/patches.rs +34 -28
  174. package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +76 -7
  175. package/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  176. package/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
  177. package/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
  178. package/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
  179. package/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +7 -8
  180. package/sdk-core/tests/integ_tests/workflow_tests.rs +13 -14
  181. package/sdk-core/tests/main.rs +3 -13
  182. package/sdk-core/tests/runner.rs +75 -36
  183. package/sdk-core/tests/wf_input_replay.rs +32 -0
  184. package/src/conversions.rs +14 -8
  185. package/src/runtime.rs +9 -8
  186. package/ts/index.ts +8 -6
  187. package/sdk-core/bridge-ffi/Cargo.toml +0 -24
  188. package/sdk-core/bridge-ffi/LICENSE.txt +0 -23
  189. package/sdk-core/bridge-ffi/build.rs +0 -25
  190. package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -224
  191. package/sdk-core/bridge-ffi/src/lib.rs +0 -746
  192. package/sdk-core/bridge-ffi/src/wrappers.rs +0 -221
  193. package/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
  194. package/sdk-core/sdk/src/conversions.rs +0 -8
@@ -1,13 +1,18 @@
1
1
  use crate::{
2
- abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
2
+ abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
3
3
  protosext::ValidScheduleLA,
4
4
  retry_logic::RetryPolicyExt,
5
+ worker::workflow::HeartbeatTimeoutMsg,
5
6
  MetricsContext, TaskToken,
6
7
  };
7
- use parking_lot::Mutex;
8
+ use futures::{stream::BoxStream, Stream};
9
+ use futures_util::{future, future::AbortRegistration, stream, StreamExt};
10
+ use parking_lot::{Mutex, MutexGuard};
8
11
  use std::{
9
- collections::HashMap,
12
+ collections::{hash_map::Entry, HashMap},
10
13
  fmt::{Debug, Formatter},
14
+ pin::Pin,
15
+ task::{Context, Poll},
11
16
  time::{Duration, Instant, SystemTime},
12
17
  };
13
18
  use temporal_sdk_core_protos::{
@@ -15,7 +20,11 @@ use temporal_sdk_core_protos::{
15
20
  activity_result::{Cancellation, Failure as ActFail, Success},
16
21
  activity_task::{activity_task, ActivityCancelReason, ActivityTask, Cancel, Start},
17
22
  },
18
- temporal::api::{common::v1::WorkflowExecution, enums::v1::TimeoutType},
23
+ temporal::api::{
24
+ common::v1::WorkflowExecution,
25
+ enums::v1::TimeoutType,
26
+ failure::v1::{failure, Failure as APIFailure, TimeoutFailureInfo},
27
+ },
19
28
  };
20
29
  use tokio::{
21
30
  sync::{
@@ -25,6 +34,7 @@ use tokio::{
25
34
  task::JoinHandle,
26
35
  time::sleep,
27
36
  };
37
+ use tokio_stream::wrappers::UnboundedReceiverStream;
28
38
  use tokio_util::sync::CancellationToken;
29
39
 
30
40
  #[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
@@ -45,10 +55,14 @@ pub(crate) struct LocalInFlightActInfo {
45
55
  pub la_info: NewLocalAct,
46
56
  pub dispatch_time: Instant,
47
57
  pub attempt: u32,
48
- _permit: OwnedMeteredSemPermit,
58
+ _permit: UsedMeteredSemPermit,
49
59
  }
50
60
 
51
61
  #[derive(Debug, Clone)]
62
+ #[cfg_attr(
63
+ feature = "save_wf_inputs",
64
+ derive(serde::Serialize, serde::Deserialize)
65
+ )]
52
66
  pub(crate) enum LocalActivityExecutionResult {
53
67
  Completed(Success),
54
68
  Failed(ActFail),
@@ -60,11 +74,26 @@ impl LocalActivityExecutionResult {
60
74
  Self::Cancelled(Cancellation::from_details(None))
61
75
  }
62
76
  pub(crate) fn timeout(tt: TimeoutType) -> Self {
63
- Self::TimedOut(ActFail::timeout(tt))
77
+ Self::TimedOut(ActFail {
78
+ failure: Some(APIFailure {
79
+ message: "Activity timed out".to_string(),
80
+ failure_info: Some(failure::FailureInfo::TimeoutFailureInfo(
81
+ TimeoutFailureInfo {
82
+ timeout_type: tt as i32,
83
+ last_heartbeat_details: None,
84
+ },
85
+ )),
86
+ ..Default::default()
87
+ }),
88
+ })
64
89
  }
65
90
  }
66
91
 
67
92
  #[derive(Debug, Clone)]
93
+ #[cfg_attr(
94
+ feature = "save_wf_inputs",
95
+ derive(serde::Serialize, serde::Deserialize)
96
+ )]
68
97
  pub(crate) struct LocalActivityResolution {
69
98
  pub seq: u32,
70
99
  pub result: LocalActivityExecutionResult,
@@ -96,6 +125,17 @@ impl Debug for NewLocalAct {
96
125
  pub(crate) enum LocalActRequest {
97
126
  New(NewLocalAct),
98
127
  Cancel(ExecutingLAId),
128
+ #[from(ignore)]
129
+ CancelAllInRun(String),
130
+ StartHeartbeatTimeout {
131
+ send_on_elapse: HeartbeatTimeoutMsg,
132
+ deadline: Instant,
133
+ abort_reg: AbortRegistration,
134
+ },
135
+ /// Tell the LA manager that a workflow task was responded to (completed or failed) for a
136
+ /// certain run id
137
+ #[from(ignore)]
138
+ IndicateWorkflowTaskCompleted(String),
99
139
  }
100
140
 
101
141
  #[derive(Debug, Clone, Eq, PartialEq, Hash)]
@@ -107,28 +147,43 @@ pub(crate) struct ExecutingLAId {
107
147
  pub(crate) struct LocalActivityManager {
108
148
  /// Just so we can provide activity tasks the same namespace as the worker
109
149
  namespace: String,
110
- /// Constrains number of currently executing local activities
111
- semaphore: MeteredSemaphore,
112
150
  /// Sink for new activity execution requests
113
151
  act_req_tx: UnboundedSender<NewOrRetry>,
114
152
  /// Cancels need a different queue since they should be taken first, and don't take a permit
115
153
  cancels_req_tx: UnboundedSender<CancelOrTimeout>,
154
+ /// For the emission of heartbeat timeouts, back into the workflow machines. This channel
155
+ /// needs to come in from above us, because we cannot rely on callers getting the next
156
+ /// activation as a way to deliver heartbeats.
157
+ heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
116
158
  /// Wakes every time a complete is processed
117
159
  complete_notify: Notify,
160
+ /// Set once workflows have finished shutting down, and thus we know we will no longer receive
161
+ /// any requests to spawn new LAs
162
+ workflows_have_shut_down: CancellationToken,
118
163
 
119
164
  rcvs: tokio::sync::Mutex<RcvChans>,
120
165
  shutdown_complete_tok: CancellationToken,
121
166
  dat: Mutex<LAMData>,
122
167
  }
123
168
 
169
+ struct LocalActivityInfo {
170
+ task_token: TaskToken,
171
+ /// Tasks for the current backoff until the next retry, if any.
172
+ backing_off_task: Option<JoinHandle<()>>,
173
+ /// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
174
+ /// while the LA id has been generated, but it has not yet been scheduled.
175
+ timeout_bag: Option<TimeoutBag>,
176
+ /// True once the first workflow task this LA started in has elapsed
177
+ first_wft_has_ended: bool,
178
+ /// Attempts at executing this LA during the current WFT
179
+ attempts_in_wft: usize,
180
+ }
181
+
124
182
  struct LAMData {
183
+ /// Maps local activity identifiers to information about them
184
+ la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
125
185
  /// Activities that have been issued to lang but not yet completed
126
186
  outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
127
- id_to_tt: HashMap<ExecutingLAId, TaskToken>,
128
- /// Tasks for activities which are currently backing off. May be used to cancel retrying them.
129
- backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
130
- /// Tasks for timing out activities which are currently in the queue or dispatched.
131
- timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
132
187
  next_tt_num: u32,
133
188
  }
134
189
 
@@ -143,42 +198,46 @@ impl LocalActivityManager {
143
198
  pub(crate) fn new(
144
199
  max_concurrent: usize,
145
200
  namespace: String,
201
+ heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
146
202
  metrics_context: MetricsContext,
147
203
  ) -> Self {
148
204
  let (act_req_tx, act_req_rx) = unbounded_channel();
149
205
  let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
150
206
  let shutdown_complete_tok = CancellationToken::new();
207
+ let semaphore = MeteredSemaphore::new(
208
+ max_concurrent,
209
+ metrics_context,
210
+ MetricsContext::available_task_slots,
211
+ );
151
212
  Self {
152
213
  namespace,
153
- semaphore: MeteredSemaphore::new(
154
- max_concurrent,
155
- metrics_context,
156
- MetricsContext::available_task_slots,
157
- ),
214
+ rcvs: tokio::sync::Mutex::new(RcvChans::new(
215
+ act_req_rx,
216
+ semaphore,
217
+ cancels_req_rx,
218
+ shutdown_complete_tok.clone(),
219
+ )),
158
220
  act_req_tx,
159
221
  cancels_req_tx,
222
+ heartbeat_timeout_tx,
160
223
  complete_notify: Notify::new(),
161
- rcvs: tokio::sync::Mutex::new(RcvChans {
162
- act_req_rx,
163
- cancels_req_rx,
164
- shutdown: shutdown_complete_tok.clone(),
165
- }),
166
224
  shutdown_complete_tok,
167
225
  dat: Mutex::new(LAMData {
168
226
  outstanding_activity_tasks: Default::default(),
169
- id_to_tt: Default::default(),
170
- backing_off_tasks: Default::default(),
171
- timeout_tasks: Default::default(),
227
+ la_info: Default::default(),
172
228
  next_tt_num: 0,
173
229
  }),
230
+ workflows_have_shut_down: Default::default(),
174
231
  }
175
232
  }
176
233
 
177
234
  #[cfg(test)]
178
235
  fn test(max_concurrent: usize) -> Self {
236
+ let (hb_tx, _hb_rx) = unbounded_channel();
179
237
  Self::new(
180
238
  max_concurrent,
181
239
  "fake_ns".to_string(),
240
+ hb_tx,
182
241
  MetricsContext::no_op(),
183
242
  )
184
243
  }
@@ -190,76 +249,116 @@ impl LocalActivityManager {
190
249
 
191
250
  #[cfg(test)]
192
251
  fn num_in_backoff(&self) -> usize {
193
- self.dat.lock().backing_off_tasks.len()
252
+ self.dat
253
+ .lock()
254
+ .la_info
255
+ .values()
256
+ .filter(|lai| lai.backing_off_task.is_some())
257
+ .count()
194
258
  }
195
259
 
196
260
  pub(crate) fn enqueue(
197
261
  &self,
198
262
  reqs: impl IntoIterator<Item = LocalActRequest>,
199
263
  ) -> Vec<LocalActivityResolution> {
264
+ if self.workflows_have_shut_down.is_cancelled() {
265
+ dbg_panic!("Tried to enqueue local activity after workflows were shut down");
266
+ return vec![];
267
+ }
200
268
  let mut immediate_resolutions = vec![];
201
269
  for req in reqs {
202
- debug!(local_activity = ?req, "Queuing local activity");
203
270
  match req {
204
271
  LocalActRequest::New(act) => {
272
+ debug!(local_activity=?act, "Queuing local activity");
205
273
  let id = ExecutingLAId {
206
274
  run_id: act.workflow_exec_info.run_id.clone(),
207
275
  seq_num: act.schedule_cmd.seq,
208
276
  };
209
277
  let mut dlock = self.dat.lock();
210
- if dlock.id_to_tt.contains_key(&id) {
211
- // Do not queue local activities which are in fact already executing.
212
- // This can happen during evictions.
213
- debug!("Tried to queue already-executing local activity {:?}", &id);
214
- continue;
215
- }
216
- // Pre-generate and insert the task token now, before we may or may not dispatch
217
- // the activity, so we can enforce idempotency. Prevents two identical LAs
218
- // ending up in the queue at once.
219
278
  let tt = dlock.gen_next_token();
220
- dlock.id_to_tt.insert(id.clone(), tt);
221
-
222
- // Set up timeouts for the new activity
223
- match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
224
- Ok(tb) => {
225
- dlock.timeout_tasks.insert(id, tb);
226
-
227
- self.act_req_tx
228
- .send(NewOrRetry::New(act))
229
- .expect("Receive half of LA request channel cannot be dropped");
279
+ match dlock.la_info.entry(id) {
280
+ Entry::Occupied(o) => {
281
+ // Do not queue local activities which are in fact already executing.
282
+ // This can happen during evictions.
283
+ debug!(
284
+ "Tried to queue already-executing local activity {:?}",
285
+ o.key()
286
+ );
287
+ continue;
288
+ }
289
+ Entry::Vacant(ve) => {
290
+ // Insert the task token now, before we may or may not dispatch the
291
+ // activity, so we can enforce idempotency. Prevents two identical LAs
292
+ // ending up in the queue at once.
293
+ let lai = ve.insert(LocalActivityInfo {
294
+ task_token: tt,
295
+ backing_off_task: None,
296
+ timeout_bag: None,
297
+ first_wft_has_ended: false,
298
+ attempts_in_wft: 0,
299
+ });
300
+
301
+ // Set up timeouts for the new activity
302
+ match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
303
+ Ok(tb) => {
304
+ lai.timeout_bag = Some(tb);
305
+
306
+ self.act_req_tx.send(NewOrRetry::New(act)).expect(
307
+ "Receive half of LA request channel cannot be dropped",
308
+ );
309
+ }
310
+ Err(res) => immediate_resolutions.push(res),
311
+ }
230
312
  }
231
- Err(res) => immediate_resolutions.push(res),
232
313
  }
233
314
  }
315
+ LocalActRequest::StartHeartbeatTimeout {
316
+ send_on_elapse,
317
+ deadline,
318
+ abort_reg,
319
+ } => {
320
+ let chan = self.heartbeat_timeout_tx.clone();
321
+ tokio::spawn(future::Abortable::new(
322
+ async move {
323
+ tokio::time::sleep_until(deadline.into()).await;
324
+ let _ = chan.send(send_on_elapse);
325
+ },
326
+ abort_reg,
327
+ ));
328
+ }
234
329
  LocalActRequest::Cancel(id) => {
330
+ debug!(id=?id, "Cancelling local activity");
235
331
  let mut dlock = self.dat.lock();
236
-
237
- // First check if this ID is currently backing off, if so abort the backoff
238
- // task
239
- if let Some(t) = dlock.backing_off_tasks.remove(&id) {
240
- t.abort();
241
- immediate_resolutions.push(LocalActivityResolution {
242
- seq: id.seq_num,
243
- result: LocalActivityExecutionResult::Cancelled(
244
- Cancellation::from_details(None),
245
- ),
246
- runtime: Duration::from_secs(0),
247
- attempt: 0,
248
- backoff: None,
249
- original_schedule_time: None,
250
- });
251
- continue;
332
+ if let Some(lai) = dlock.la_info.get_mut(&id) {
333
+ if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
334
+ immediate_resolutions.push(immediate_res);
335
+ }
252
336
  }
253
-
254
- if let Some(tt) = dlock.id_to_tt.get(&id) {
255
- self.cancels_req_tx
256
- .send(CancelOrTimeout::Cancel(ActivityTask {
257
- task_token: tt.0.clone(),
258
- variant: Some(activity_task::Variant::Cancel(Cancel {
259
- reason: ActivityCancelReason::Cancelled as i32,
260
- })),
261
- }))
262
- .expect("Receive half of LA cancel channel cannot be dropped");
337
+ }
338
+ LocalActRequest::CancelAllInRun(run_id) => {
339
+ debug!(run_id=%run_id, "Cancelling all local activities for run");
340
+ let mut dlock = self.dat.lock();
341
+ // Even if we've got 100k+ LAs this should only take a ms or two. Not worth
342
+ // adding another map to keep in sync.
343
+ let las_for_run = dlock
344
+ .la_info
345
+ .iter_mut()
346
+ .filter(|(id, _)| id.run_id == run_id);
347
+ for (laid, lainf) in las_for_run {
348
+ if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
349
+ immediate_resolutions.push(immediate_res);
350
+ }
351
+ }
352
+ }
353
+ LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
354
+ let mut dlock = self.dat.lock();
355
+ let las_for_run = dlock
356
+ .la_info
357
+ .iter_mut()
358
+ .filter(|(id, _)| id.run_id == run_id);
359
+ for (_, lainf) in las_for_run {
360
+ lainf.first_wft_has_ended = true;
361
+ lainf.attempts_in_wft = 0;
263
362
  }
264
363
  }
265
364
  }
@@ -270,7 +369,7 @@ impl LocalActivityManager {
270
369
  /// Returns the next pending local-activity related action, or None if shutdown has initiated
271
370
  /// and there are no more remaining actions to take.
272
371
  pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
273
- let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
372
+ let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
274
373
  NewOrCancel::Cancel(c) => {
275
374
  return match c {
276
375
  CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
@@ -283,12 +382,13 @@ impl LocalActivityManager {
283
382
  let tt = self
284
383
  .dat
285
384
  .lock()
286
- .id_to_tt
385
+ .la_info
287
386
  .get(&ExecutingLAId {
288
387
  run_id: run_id.clone(),
289
388
  seq_num: resolution.seq,
290
389
  })
291
- .map(Clone::clone);
390
+ .as_ref()
391
+ .map(|lai| lai.task_token.clone());
292
392
  if let Some(task_token) = tt {
293
393
  self.complete(&task_token, &resolution.result);
294
394
  Some(ActivityTask {
@@ -323,18 +423,21 @@ impl LocalActivityManager {
323
423
  }
324
424
  NewOrRetry::Retry { in_flight, attempt } => (in_flight, attempt),
325
425
  };
326
- let orig = new_la.clone();
426
+ let la_info_for_in_flight_map = new_la.clone();
327
427
  let id = ExecutingLAId {
328
428
  run_id: new_la.workflow_exec_info.run_id.clone(),
329
429
  seq_num: new_la.schedule_cmd.seq,
330
430
  };
431
+ let orig_sched_time = new_la.schedule_cmd.original_schedule_time;
331
432
  let sa = new_la.schedule_cmd;
332
433
 
333
434
  let mut dat = self.dat.lock();
334
435
  // If this request originated from a local backoff task, clear the entry for it. We
335
436
  // don't await the handle because we know it must already be done, and there's no
336
437
  // meaningful value.
337
- dat.backing_off_tasks.remove(&id);
438
+ dat.la_info
439
+ .get_mut(&id)
440
+ .map(|lai| lai.backing_off_task.take());
338
441
 
339
442
  // If this task sat in the queue for too long, return a timeout for it instead
340
443
  if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
@@ -348,30 +451,27 @@ impl LocalActivityManager {
348
451
  runtime: sat_for,
349
452
  attempt,
350
453
  backoff: None,
351
- original_schedule_time: Some(new_la.schedule_time),
454
+ original_schedule_time: orig_sched_time,
352
455
  },
353
456
  task: None,
354
457
  });
355
458
  }
356
459
  }
357
460
 
358
- let tt = dat
359
- .id_to_tt
360
- .get(&id)
361
- .expect("Task token must exist")
362
- .clone();
461
+ let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
462
+ let tt = la_info.task_token.clone();
463
+ if let Some(to) = la_info.timeout_bag.as_mut() {
464
+ to.mark_started();
465
+ }
363
466
  dat.outstanding_activity_tasks.insert(
364
467
  tt.clone(),
365
468
  LocalInFlightActInfo {
366
- la_info: orig,
469
+ la_info: la_info_for_in_flight_map,
367
470
  dispatch_time: Instant::now(),
368
471
  attempt,
369
- _permit: permit,
472
+ _permit: permit.into_used(),
370
473
  },
371
474
  );
372
- if let Some(to) = dat.timeout_tasks.get_mut(&id) {
373
- to.mark_started();
374
- }
375
475
 
376
476
  let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
377
477
  Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
@@ -406,11 +506,23 @@ impl LocalActivityManager {
406
506
  ) -> LACompleteAction {
407
507
  let mut dlock = self.dat.lock();
408
508
  if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
509
+ if self.workflows_have_shut_down.is_cancelled() {
510
+ // If workflows are already shut down, the results of all this don't matter.
511
+ // Just say we're done if there's nothing outstanding any more.
512
+ self.set_shutdown_complete_if_ready(&mut dlock);
513
+ }
514
+
409
515
  let exec_id = ExecutingLAId {
410
516
  run_id: info.la_info.workflow_exec_info.run_id.clone(),
411
517
  seq_num: info.la_info.schedule_cmd.seq,
412
518
  };
413
- dlock.id_to_tt.remove(&exec_id);
519
+ let maybe_old_lai = dlock.la_info.remove(&exec_id);
520
+ if let Some(ref oldlai) = maybe_old_lai {
521
+ if let Some(ref bot) = oldlai.backing_off_task {
522
+ dbg_panic!("Just-resolved LA should not have backoff task");
523
+ bot.abort();
524
+ }
525
+ }
414
526
 
415
527
  match status {
416
528
  LocalActivityExecutionResult::Completed(_)
@@ -446,8 +558,6 @@ impl LocalActivityManager {
446
558
  }
447
559
  // Immediately create a new task token for the to-be-retried LA
448
560
  let tt = dlock.gen_next_token();
449
- dlock.id_to_tt.insert(exec_id.clone(), tt);
450
-
451
561
  // Send the retry request after waiting the backoff duration
452
562
  let send_chan = self.act_req_tx.clone();
453
563
  let jh = tokio::spawn(async move {
@@ -460,7 +570,22 @@ impl LocalActivityManager {
460
570
  })
461
571
  .expect("Receive half of LA request channel cannot be dropped");
462
572
  });
463
- dlock.backing_off_tasks.insert(exec_id, jh);
573
+ dlock.la_info.insert(
574
+ exec_id,
575
+ LocalActivityInfo {
576
+ task_token: tt,
577
+ backing_off_task: Some(jh),
578
+ first_wft_has_ended: maybe_old_lai
579
+ .as_ref()
580
+ .map(|old| old.first_wft_has_ended)
581
+ .unwrap_or_default(),
582
+ attempts_in_wft: maybe_old_lai
583
+ .as_ref()
584
+ .map(|old| old.attempts_in_wft + 1)
585
+ .unwrap_or(1),
586
+ timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
587
+ },
588
+ );
464
589
 
465
590
  LACompleteAction::WillBeRetried
466
591
  } else {
@@ -473,11 +598,70 @@ impl LocalActivityManager {
473
598
  }
474
599
  }
475
600
 
476
- pub(crate) async fn shutdown_and_wait_all_finished(&self) {
477
- while !self.dat.lock().outstanding_activity_tasks.is_empty() {
601
+ pub(crate) fn workflows_have_shutdown(&self) {
602
+ self.workflows_have_shut_down.cancel();
603
+ self.set_shutdown_complete_if_ready(&mut self.dat.lock());
604
+ }
605
+
606
+ pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
607
+ while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
478
608
  self.complete_notify.notified().await;
479
609
  }
480
- self.shutdown_complete_tok.cancel();
610
+ }
611
+
612
+ /// Try to close the activity stream as soon as worker shutdown is initiated. This is required
613
+ /// for activity-only workers where since workflows are not polled and the activity poller might
614
+ /// get "stuck".
615
+ pub(crate) fn shutdown_initiated(&self) {
616
+ self.set_shutdown_complete_if_ready(&mut self.dat.lock());
617
+ }
618
+
619
+ pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
620
+ let dlock = self.dat.lock();
621
+ dlock
622
+ .la_info
623
+ .iter()
624
+ .filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
625
+ .map(|(_, info)| info.attempts_in_wft)
626
+ .sum()
627
+ }
628
+
629
+ fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
630
+ let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
631
+ if nothing_outstanding && self.workflows_have_shut_down.is_cancelled() {
632
+ self.shutdown_complete_tok.cancel();
633
+ }
634
+ nothing_outstanding
635
+ }
636
+
637
+ fn cancel_one_la(
638
+ &self,
639
+ seq: u32,
640
+ lai: &mut LocalActivityInfo,
641
+ ) -> Option<LocalActivityResolution> {
642
+ // First check if this ID is currently backing off, if so abort the backoff
643
+ // task
644
+ if let Some(t) = lai.backing_off_task.take() {
645
+ t.abort();
646
+ return Some(LocalActivityResolution {
647
+ seq,
648
+ result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
649
+ runtime: Duration::from_secs(0),
650
+ attempt: 0,
651
+ backoff: None,
652
+ original_schedule_time: None,
653
+ });
654
+ }
655
+
656
+ self.cancels_req_tx
657
+ .send(CancelOrTimeout::Cancel(ActivityTask {
658
+ task_token: lai.task_token.0.clone(),
659
+ variant: Some(activity_task::Variant::Cancel(Cancel {
660
+ reason: ActivityCancelReason::Cancelled as i32,
661
+ })),
662
+ }))
663
+ .expect("Receive half of LA cancel channel cannot be dropped");
664
+ None
481
665
  }
482
666
  }
483
667
 
@@ -521,32 +705,45 @@ enum NewOrCancel {
521
705
  Cancel(CancelOrTimeout),
522
706
  }
523
707
 
708
+ #[pin_project::pin_project]
524
709
  struct RcvChans {
525
- /// Activities that need to be executed by lang
526
- act_req_rx: UnboundedReceiver<NewOrRetry>,
527
- /// Cancels to send to lang or apply internally
528
- cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
529
- shutdown: CancellationToken,
710
+ #[pin]
711
+ inner: BoxStream<'static, NewOrCancel>,
530
712
  }
531
713
 
532
714
  impl RcvChans {
533
- async fn next(&mut self, new_sem: &MeteredSemaphore) -> Option<NewOrCancel> {
534
- tokio::select! {
535
- cancel = async { self.cancels_req_rx.recv().await } => {
536
- Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
537
- }
538
- (maybe_new_or_retry, perm) = async {
539
- // Wait for a permit to take a task and forget it. Permits are removed until a
540
- // completion.
541
- let perm = new_sem.acquire_owned().await.expect("is never closed");
542
- (self.act_req_rx.recv().await, perm)
543
- } => Some(NewOrCancel::New(
544
- maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
545
- )),
546
- _ = self.shutdown.cancelled() => None
715
+ fn new(
716
+ new_reqs: UnboundedReceiver<NewOrRetry>,
717
+ new_sem: MeteredSemaphore,
718
+ cancels: UnboundedReceiver<CancelOrTimeout>,
719
+ shutdown_completed: CancellationToken,
720
+ ) -> Self {
721
+ let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
722
+ let new_stream = UnboundedReceiverStream::new(new_reqs)
723
+ // Get a permit for each new activity request
724
+ .zip(stream::unfold(new_sem, |new_sem| async move {
725
+ let permit = new_sem
726
+ .acquire_owned()
727
+ .await
728
+ .expect("Local activity semaphore is never closed");
729
+ Some((permit, new_sem))
730
+ }))
731
+ .map(|(req, permit)| NewOrCancel::New(req, permit));
732
+ Self {
733
+ inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
734
+ .take_until(async move { shutdown_completed.cancelled().await })
735
+ .boxed(),
547
736
  }
548
737
  }
549
738
  }
739
+ impl Stream for RcvChans {
740
+ type Item = NewOrCancel;
741
+
742
+ fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
743
+ let this = self.project();
744
+ this.inner.poll_next(cx)
745
+ }
746
+ }
550
747
 
551
748
  struct TimeoutBag {
552
749
  sched_to_close_handle: JoinHandle<()>,
@@ -567,17 +764,21 @@ impl TimeoutBag {
567
764
  let (schedule_to_close, start_to_close) =
568
765
  new_la.schedule_cmd.close_timeouts.into_sched_and_start();
569
766
 
767
+ let sched_time = new_la
768
+ .schedule_cmd
769
+ .original_schedule_time
770
+ .unwrap_or(new_la.schedule_time);
570
771
  let resolution = LocalActivityResolution {
571
772
  seq: new_la.schedule_cmd.seq,
572
773
  result: LocalActivityExecutionResult::timeout(TimeoutType::ScheduleToClose),
573
774
  runtime: Default::default(),
574
775
  attempt: new_la.schedule_cmd.attempt,
575
776
  backoff: None,
576
- original_schedule_time: Some(new_la.schedule_time),
777
+ original_schedule_time: new_la.schedule_cmd.original_schedule_time,
577
778
  };
578
779
  // Remove any time already elapsed since the scheduling time
579
780
  let schedule_to_close = schedule_to_close
580
- .map(|s2c| s2c.saturating_sub(new_la.schedule_time.elapsed().unwrap_or_default()));
781
+ .map(|s2c| s2c.saturating_sub(sched_time.elapsed().unwrap_or_default()));
581
782
  if let Some(ref s2c) = schedule_to_close {
582
783
  if s2c.is_zero() {
583
784
  return Err(resolution);
@@ -640,18 +841,19 @@ impl Drop for TimeoutBag {
640
841
  mod tests {
641
842
  use super::*;
642
843
  use crate::{prost_dur, protosext::LACloseTimeouts};
844
+ use futures_util::FutureExt;
643
845
  use temporal_sdk_core_protos::temporal::api::{
644
846
  common::v1::RetryPolicy,
645
847
  failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
646
848
  };
647
- use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
849
+ use tokio::task::yield_now;
648
850
 
649
851
  impl DispatchOrTimeoutLA {
650
852
  fn unwrap(self) -> ActivityTask {
651
853
  match self {
652
854
  DispatchOrTimeoutLA::Dispatch(t) => t,
653
- DispatchOrTimeoutLA::Timeout { .. } => {
654
- panic!("Timeout returned when expected a task")
855
+ _ => {
856
+ panic!("Non-dispatched action returned")
655
857
  }
656
858
  }
657
859
  }
@@ -1026,18 +1228,66 @@ mod tests {
1026
1228
  lam.next_pending().await.unwrap().unwrap();
1027
1229
  assert_eq!(lam.num_outstanding(), 1);
1028
1230
  // There should be nothing else in the queue
1029
- assert_eq!(
1030
- lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
1031
- TryRecvError::Empty
1032
- );
1231
+ assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
1033
1232
 
1034
1233
  // Verify that if we now enqueue the same act again, after the task is outstanding, we still
1035
1234
  // don't add it.
1036
1235
  lam.enqueue([new_la.into()]);
1037
1236
  assert_eq!(lam.num_outstanding(), 1);
1038
- assert_eq!(
1039
- lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
1040
- TryRecvError::Empty
1041
- );
1237
+ assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
1238
+ }
1239
+
1240
+ #[tokio::test]
1241
+ async fn nonfirst_la_attempt_count_is_accurate() {
1242
+ let run_id = "run_id";
1243
+ let lam = LocalActivityManager::test(10);
1244
+ let new_la = NewLocalAct {
1245
+ schedule_cmd: ValidScheduleLA {
1246
+ seq: 1,
1247
+ activity_id: 1.to_string(),
1248
+ retry_policy: RetryPolicy {
1249
+ initial_interval: Some(prost_dur!(from_millis(1))),
1250
+ backoff_coefficient: 1.0,
1251
+ ..Default::default()
1252
+ },
1253
+ local_retry_threshold: Duration::from_secs(500),
1254
+ ..Default::default()
1255
+ },
1256
+ workflow_type: "".to_string(),
1257
+ workflow_exec_info: WorkflowExecution {
1258
+ workflow_id: "".to_string(),
1259
+ run_id: run_id.to_string(),
1260
+ },
1261
+ schedule_time: SystemTime::now(),
1262
+ };
1263
+ lam.enqueue([new_la.clone().into()]);
1264
+ let spinfail = || async {
1265
+ for _ in 1..=10 {
1266
+ let next = lam.next_pending().await.unwrap().unwrap();
1267
+ let tt = TaskToken(next.task_token);
1268
+ lam.complete(
1269
+ &tt,
1270
+ &LocalActivityExecutionResult::Failed(Default::default()),
1271
+ );
1272
+ }
1273
+ };
1274
+
1275
+ // Fail a bunch of times
1276
+ spinfail().await;
1277
+ // Nonfirst attempt count should still be zero
1278
+ let count = lam.get_nonfirst_attempt_count(run_id);
1279
+ assert_eq!(count, 0);
1280
+
1281
+ for _ in 1..=2 {
1282
+ // This should work over multiple WFTs
1283
+ // say the first wft was completed
1284
+ lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
1285
+ run_id.to_string(),
1286
+ )]);
1287
+ // Do some more attempts
1288
+ spinfail().await;
1289
+ let count = lam.get_nonfirst_attempt_count(run_id);
1290
+ assert_eq!(count, 10);
1291
+ }
1042
1292
  }
1043
1293
  }