@temporalio/core-bridge 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/Cargo.lock +500 -400
  2. package/package.json +3 -3
  3. package/releases/aarch64-apple-darwin/index.node +0 -0
  4. package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
  5. package/releases/x86_64-apple-darwin/index.node +0 -0
  6. package/releases/x86_64-pc-windows-msvc/index.node +0 -0
  7. package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
  8. package/sdk-core/client/src/lib.rs +23 -6
  9. package/sdk-core/client/src/raw.rs +15 -6
  10. package/sdk-core/core/Cargo.toml +1 -0
  11. package/sdk-core/core/src/core_tests/activity_tasks.rs +13 -5
  12. package/sdk-core/core/src/core_tests/determinism.rs +49 -2
  13. package/sdk-core/core/src/core_tests/workflow_tasks.rs +21 -39
  14. package/sdk-core/core/src/internal_flags.rs +132 -60
  15. package/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +10 -7
  16. package/sdk-core/core/src/worker/activities.rs +152 -142
  17. package/sdk-core/core/src/worker/client.rs +12 -8
  18. package/sdk-core/core/src/worker/mod.rs +8 -5
  19. package/sdk-core/core/src/worker/workflow/history_update.rs +86 -2
  20. package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +4 -1
  21. package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +23 -88
  22. package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +6 -6
  23. package/sdk-core/core/src/worker/workflow/managed_run.rs +9 -2
  24. package/sdk-core/core/src/worker/workflow/mod.rs +22 -8
  25. package/sdk-core/core/src/worker/workflow/workflow_stream.rs +29 -27
  26. package/sdk-core/protos/api_upstream/.github/workflows/publish-docs.yml +23 -0
  27. package/sdk-core/protos/api_upstream/Makefile +1 -1
  28. package/sdk-core/protos/api_upstream/buf.yaml +5 -0
  29. package/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +17 -0
  30. package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +2 -0
  31. package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +6 -3
  32. package/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +1 -1
  33. package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +12 -22
  34. package/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +2 -2
  35. package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +2 -0
  36. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +145 -48
  37. package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +19 -8
  38. package/sdk-core/test-utils/src/lib.rs +29 -7
  39. package/sdk-core/tests/integ_tests/activity_functions.rs +5 -0
  40. package/sdk-core/tests/integ_tests/workflow_tests/activities.rs +2 -4
  41. package/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +0 -1
  42. package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +5 -7
  43. package/sdk-core/tests/integ_tests/workflow_tests.rs +3 -7
  44. package/sdk-core/tests/main.rs +16 -24
@@ -37,7 +37,10 @@ use governor::{Quota, RateLimiter};
37
37
  use std::{
38
38
  convert::TryInto,
39
39
  future,
40
- sync::Arc,
40
+ sync::{
41
+ atomic::{AtomicBool, Ordering},
42
+ Arc,
43
+ },
41
44
  time::{Duration, Instant},
42
45
  };
43
46
  use temporal_sdk_core_protos::{
@@ -117,8 +120,8 @@ impl RemoteInFlightActInfo {
117
120
  }
118
121
 
119
122
  pub(crate) struct WorkerActivityTasks {
120
- /// Token used to signal the server task poller that shutdown is beginning
121
- poller_shutdown_token: CancellationToken,
123
+ /// Token which is cancelled once shutdown is beginning
124
+ shutdown_initiated_token: CancellationToken,
122
125
  /// Centralizes management of heartbeat issuing / throttling
123
126
  heartbeat_manager: ActivityHeartbeatManager,
124
127
  /// Combined stream for any ActivityTask producing source (polls, eager activities,
@@ -169,7 +172,7 @@ impl WorkerActivityTasks {
169
172
  metrics.with_new_attrs([activity_worker_type()]),
170
173
  MetricsContext::available_task_slots,
171
174
  ));
172
- let poller_shutdown_token = CancellationToken::new();
175
+ let shutdown_initiated_token = CancellationToken::new();
173
176
  let rate_limiter = max_worker_act_per_sec.and_then(|ps| {
174
177
  Quota::with_period(Duration::from_secs_f64(ps.recip())).map(RateLimiter::direct)
175
178
  });
@@ -179,7 +182,7 @@ impl WorkerActivityTasks {
179
182
  semaphore.clone(),
180
183
  rate_limiter,
181
184
  metrics.clone(),
182
- poller_shutdown_token.clone(),
185
+ shutdown_initiated_token.clone(),
183
186
  );
184
187
  let (eager_activities_tx, eager_activities_rx) = unbounded_channel();
185
188
  let eager_activities_semaphore = ClosableMeteredSemaphore::new_arc(semaphore);
@@ -199,22 +202,21 @@ impl WorkerActivityTasks {
199
202
  starts_stream.map(ActivityTaskSource::from),
200
203
  |_: &mut ()| PollNext::Left,
201
204
  );
202
- // Create a task stream composed of (in poll preference order):
203
- // cancels_stream ------------------------------+--- activity_task_stream
204
- // eager_activities_rx ---+--- starts_stream ---|
205
- // server_poll_stream ---|
206
- let activity_task_stream = Self::merge_source_streams(
205
+
206
+ let activity_task_stream = ActivityTaskStream {
207
207
  source_stream,
208
- outstanding_activity_tasks.clone(),
208
+ outstanding_tasks: outstanding_activity_tasks.clone(),
209
209
  start_tasks_stream_complete,
210
- complete_notify.clone(),
211
- graceful_shutdown,
210
+ complete_notify: complete_notify.clone(),
211
+ grace_period: graceful_shutdown,
212
212
  cancels_tx,
213
- metrics.clone(),
214
- );
213
+ shutdown_initiated_token: shutdown_initiated_token.clone(),
214
+ metrics: metrics.clone(),
215
+ }
216
+ .streamify();
215
217
 
216
218
  Self {
217
- poller_shutdown_token,
219
+ shutdown_initiated_token,
218
220
  eager_activities_tx,
219
221
  heartbeat_manager,
220
222
  activity_task_stream: Mutex::new(activity_task_stream.boxed()),
@@ -263,9 +265,7 @@ impl WorkerActivityTasks {
263
265
 
264
266
  // Prefer eager activities over polling the server
265
267
  stream::select_with_strategy(non_poll_stream, poller_stream, |_: &mut ()| PollNext::Left)
266
- .map(|res| res.map_err(|err| err.into()))
267
- // This map, chain, filter_map sequence is here to cancel the token when this stream ends.
268
- .map(Some)
268
+ .map(|res| Some(res.map_err(Into::into)))
269
269
  .chain(futures::stream::once(async move {
270
270
  on_complete_token.cancel();
271
271
  None
@@ -273,98 +273,8 @@ impl WorkerActivityTasks {
273
273
  .filter_map(future::ready)
274
274
  }
275
275
 
276
- /// Builds an [ActivityTask] stream for both cancellation tasks from cancels delivered from
277
- /// heartbeats as well as new activity starts
278
- fn merge_source_streams(
279
- source_stream: impl Stream<Item = ActivityTaskSource>,
280
- outstanding_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
281
- start_tasks_stream_complete: CancellationToken,
282
- complete_notify: Arc<Notify>,
283
- grace_period: Option<Duration>,
284
- cancels_tx: UnboundedSender<PendingActivityCancel>,
285
- metrics: MetricsContext,
286
- ) -> impl Stream<Item = Result<ActivityTask, PollActivityError>> {
287
- let outstanding_tasks_clone = outstanding_tasks.clone();
288
- source_stream
289
- .filter_map(move |source| {
290
- let outstanding_tasks = outstanding_tasks.clone();
291
- let metrics = metrics.clone();
292
- async move {
293
- match source {
294
- ActivityTaskSource::PendingCancel(next_pc) => {
295
- // It's possible that activity has been completed and we no longer have
296
- // an outstanding activity task. This is fine because it means that we
297
- // no longer need to cancel this activity, so we'll just ignore such
298
- // orphaned cancellations.
299
- if let Some(mut details) =
300
- outstanding_tasks.get_mut(&next_pc.task_token)
301
- {
302
- if details.issued_cancel_to_lang.is_some() {
303
- // Don't double-issue cancellations
304
- return None;
305
- }
306
-
307
- details.issued_cancel_to_lang = Some(next_pc.reason);
308
- if next_pc.reason == ActivityCancelReason::NotFound {
309
- details.known_not_found = true;
310
- }
311
- Some(Ok(ActivityTask::cancel_from_ids(
312
- next_pc.task_token.0,
313
- next_pc.reason,
314
- )))
315
- } else {
316
- debug!(task_token = ?next_pc.task_token,
317
- "Unknown activity task when issuing cancel");
318
- // If we can't find the activity here, it's already been completed,
319
- // in which case issuing a cancel again is pointless.
320
- None
321
- }
322
- }
323
- ActivityTaskSource::PendingStart(res) => {
324
- Some(res.map(|(task, is_eager)| {
325
- Self::about_to_issue_task(
326
- outstanding_tasks,
327
- task,
328
- is_eager,
329
- metrics,
330
- )
331
- }))
332
- }
333
- }
334
- }
335
- })
336
- .take_until(async move {
337
- start_tasks_stream_complete.cancelled().await;
338
- // Issue cancels for any still-living act tasks after the grace period
339
- let (grace_killer, stop_grace) = futures_util::future::abortable(async {
340
- if let Some(gp) = grace_period {
341
- // Make sure we've waited at least the grace period. This way if waiting for
342
- // starts to finish took a while, we subtract that from the grace period.
343
- tokio::time::sleep(gp).await;
344
- for mapref in outstanding_tasks_clone.iter() {
345
- let _ = cancels_tx.send(PendingActivityCancel::new(
346
- mapref.key().clone(),
347
- ActivityCancelReason::WorkerShutdown,
348
- ));
349
- }
350
- }
351
- });
352
- join!(
353
- async {
354
- while !outstanding_tasks_clone.is_empty() {
355
- complete_notify.notified().await
356
- }
357
- // If we were waiting for the grace period but everything already finished,
358
- // we don't need to keep waiting.
359
- stop_grace.abort();
360
- },
361
- grace_killer
362
- )
363
- })
364
- }
365
-
366
276
  pub(crate) fn initiate_shutdown(&self) {
367
- self.poller_shutdown_token.cancel();
277
+ self.shutdown_initiated_token.cancel();
368
278
  self.eager_activities_semaphore.close();
369
279
  }
370
280
 
@@ -518,42 +428,142 @@ impl WorkerActivityTasks {
518
428
  }
519
429
  }
520
430
 
521
- /// Called when there is a new [ActivityTask] about to be bubbled up out of the poller
522
- fn about_to_issue_task(
523
- outstanding_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
524
- task: PermittedTqResp,
525
- is_eager: bool,
526
- metrics: MetricsContext,
527
- ) -> ActivityTask {
528
- if let Some(ref act_type) = task.resp.activity_type {
529
- if let Some(ref wf_type) = task.resp.workflow_type {
530
- metrics
531
- .with_new_attrs([
532
- activity_type(act_type.name.clone()),
533
- workflow_type(wf_type.name.clone()),
534
- eager(is_eager),
535
- ])
536
- .act_task_received();
537
- }
538
- }
539
- // There could be an else statement here but since the response should always contain both
540
- // activity_type and workflow_type, we won't bother.
431
+ #[cfg(test)]
432
+ pub(crate) fn remaining_activity_capacity(&self) -> usize {
433
+ self.eager_activities_semaphore.available_permits()
434
+ }
435
+ }
541
436
 
542
- if let Some(dur) = task.resp.sched_to_start() {
543
- metrics.act_sched_to_start_latency(dur);
544
- };
437
+ struct ActivityTaskStream<SrcStrm> {
438
+ source_stream: SrcStrm,
439
+ outstanding_tasks: Arc<DashMap<TaskToken, RemoteInFlightActInfo>>,
440
+ start_tasks_stream_complete: CancellationToken,
441
+ complete_notify: Arc<Notify>,
442
+ grace_period: Option<Duration>,
443
+ cancels_tx: UnboundedSender<PendingActivityCancel>,
444
+ /// Token which is cancelled once shutdown is beginning
445
+ shutdown_initiated_token: CancellationToken,
446
+ metrics: MetricsContext,
447
+ }
545
448
 
546
- outstanding_tasks.insert(
547
- task.resp.task_token.clone().into(),
548
- RemoteInFlightActInfo::new(&task.resp, task.permit.into_used()),
549
- );
449
+ impl<SrcStrm> ActivityTaskStream<SrcStrm>
450
+ where
451
+ SrcStrm: Stream<Item = ActivityTaskSource>,
452
+ {
453
+ /// Create a task stream composed of (in poll preference order):
454
+ /// cancels_stream ------------------------------+--- activity_task_stream
455
+ /// eager_activities_rx ---+--- starts_stream ---|
456
+ /// server_poll_stream ---|
457
+ fn streamify(self) -> impl Stream<Item = Result<ActivityTask, PollActivityError>> {
458
+ let outstanding_tasks_clone = self.outstanding_tasks.clone();
459
+ let should_issue_immediate_cancel = Arc::new(AtomicBool::new(false));
460
+ let should_issue_immediate_cancel_clone = should_issue_immediate_cancel.clone();
461
+ let cancels_tx = self.cancels_tx.clone();
462
+ self.source_stream
463
+ .filter_map(move |source| {
464
+ let res = match source {
465
+ ActivityTaskSource::PendingCancel(next_pc) => {
466
+ // It's possible that activity has been completed and we no longer have
467
+ // an outstanding activity task. This is fine because it means that we
468
+ // no longer need to cancel this activity, so we'll just ignore such
469
+ // orphaned cancellations.
470
+ if let Some(mut details) =
471
+ self.outstanding_tasks.get_mut(&next_pc.task_token)
472
+ {
473
+ if details.issued_cancel_to_lang.is_some() {
474
+ // Don't double-issue cancellations
475
+ None
476
+ } else {
477
+ details.issued_cancel_to_lang = Some(next_pc.reason);
478
+ if next_pc.reason == ActivityCancelReason::NotFound {
479
+ details.known_not_found = true;
480
+ }
481
+ Some(Ok(ActivityTask::cancel_from_ids(
482
+ next_pc.task_token.0,
483
+ next_pc.reason,
484
+ )))
485
+ }
486
+ } else {
487
+ debug!(task_token = ?next_pc.task_token,
488
+ "Unknown activity task when issuing cancel");
489
+ // If we can't find the activity here, it's already been completed,
490
+ // in which case issuing a cancel again is pointless.
491
+ None
492
+ }
493
+ }
494
+ ActivityTaskSource::PendingStart(res) => {
495
+ Some(res.map(|(task, is_eager)| {
496
+ if let Some(ref act_type) = task.resp.activity_type {
497
+ if let Some(ref wf_type) = task.resp.workflow_type {
498
+ self.metrics
499
+ .with_new_attrs([
500
+ activity_type(act_type.name.clone()),
501
+ workflow_type(wf_type.name.clone()),
502
+ eager(is_eager),
503
+ ])
504
+ .act_task_received();
505
+ }
506
+ }
507
+ // There could be an else statement here but since the response
508
+ // should always contain both activity_type and workflow_type, we
509
+ // won't bother.
550
510
 
551
- ActivityTask::start_from_poll_resp(task.resp)
552
- }
511
+ if let Some(dur) = task.resp.sched_to_start() {
512
+ self.metrics.act_sched_to_start_latency(dur);
513
+ };
553
514
 
554
- #[cfg(test)]
555
- pub(crate) fn remaining_activity_capacity(&self) -> usize {
556
- self.eager_activities_semaphore.available_permits()
515
+ let tt: TaskToken = task.resp.task_token.clone().into();
516
+ self.outstanding_tasks.insert(
517
+ tt.clone(),
518
+ RemoteInFlightActInfo::new(&task.resp, task.permit.into_used()),
519
+ );
520
+ // If we have already waited the grace period and issued cancels,
521
+ // this will have been set true, indicating anything that happened
522
+ // to be buffered/in-flight/etc should get an immediate cancel. This
523
+ // is to allow the user to potentially decide to ignore cancels and
524
+ // do work on polls that got received during shutdown.
525
+ if should_issue_immediate_cancel.load(Ordering::Acquire) {
526
+ let _ = cancels_tx.send(PendingActivityCancel::new(
527
+ tt,
528
+ ActivityCancelReason::WorkerShutdown,
529
+ ));
530
+ }
531
+
532
+ ActivityTask::start_from_poll_resp(task.resp)
533
+ }))
534
+ }
535
+ };
536
+ async move { res }
537
+ })
538
+ .take_until(async move {
539
+ // Once we've been told to begin cancelling, wait the grace period and then start
540
+ // cancelling anything outstanding.
541
+ let (grace_killer, stop_grace) = futures_util::future::abortable(async {
542
+ if let Some(gp) = self.grace_period {
543
+ self.shutdown_initiated_token.cancelled().await;
544
+ tokio::time::sleep(gp).await;
545
+ should_issue_immediate_cancel_clone.store(true, Ordering::Release);
546
+ for mapref in outstanding_tasks_clone.iter() {
547
+ let _ = self.cancels_tx.send(PendingActivityCancel::new(
548
+ mapref.key().clone(),
549
+ ActivityCancelReason::WorkerShutdown,
550
+ ));
551
+ }
552
+ }
553
+ });
554
+ join!(
555
+ async {
556
+ self.start_tasks_stream_complete.cancelled().await;
557
+ while !outstanding_tasks_clone.is_empty() {
558
+ self.complete_notify.notified().await
559
+ }
560
+ // If we were waiting for the grace period but everything already finished,
561
+ // we don't need to keep waiting.
562
+ stop_grace.abort();
563
+ },
564
+ grace_killer
565
+ )
566
+ })
557
567
  }
558
568
  }
559
569
 
@@ -7,12 +7,15 @@ use temporal_sdk_core_protos::{
7
7
  coresdk::workflow_commands::QueryResult,
8
8
  temporal::api::{
9
9
  command::v1::Command,
10
- common::v1::{MeteringMetadata, Payloads, WorkflowExecution},
10
+ common::v1::{
11
+ MeteringMetadata, Payloads, WorkerVersionCapabilities, WorkerVersionStamp,
12
+ WorkflowExecution,
13
+ },
11
14
  enums::v1::{TaskQueueKind, WorkflowTaskFailedCause},
12
15
  failure::v1::Failure,
13
16
  query::v1::WorkflowQueryResult,
14
17
  sdk::v1::WorkflowTaskCompletedMetadata,
15
- taskqueue::v1::{StickyExecutionAttributes, TaskQueue, TaskQueueMetadata, VersionId},
18
+ taskqueue::v1::{StickyExecutionAttributes, TaskQueue, TaskQueueMetadata},
16
19
  workflowservice::v1::{get_system_info_response::Capabilities, *},
17
20
  },
18
21
  TaskToken,
@@ -138,8 +141,8 @@ impl WorkerClient for WorkerClientBag {
138
141
  } else {
139
142
  self.worker_build_id.clone()
140
143
  },
141
- worker_versioning_id: Some(VersionId {
142
- worker_build_id: self.versioning_build_id(),
144
+ worker_version_capabilities: Some(WorkerVersionCapabilities {
145
+ build_id: self.versioning_build_id(),
143
146
  }),
144
147
  };
145
148
 
@@ -166,8 +169,8 @@ impl WorkerClient for WorkerClientBag {
166
169
  task_queue_metadata: max_tasks_per_sec.map(|tps| TaskQueueMetadata {
167
170
  max_tasks_per_second: Some(tps),
168
171
  }),
169
- worker_versioning_id: Some(VersionId {
170
- worker_build_id: self.versioning_build_id(),
172
+ worker_version_capabilities: Some(WorkerVersionCapabilities {
173
+ build_id: self.versioning_build_id(),
171
174
  }),
172
175
  };
173
176
 
@@ -190,8 +193,9 @@ impl WorkerClient for WorkerClientBag {
190
193
  sticky_attributes: request.sticky_attributes,
191
194
  return_new_workflow_task: request.return_new_workflow_task,
192
195
  force_create_new_workflow_task: request.force_create_new_workflow_task,
193
- worker_versioning_id: Some(VersionId {
194
- worker_build_id: self.versioning_build_id(),
196
+ worker_version_stamp: Some(WorkerVersionStamp {
197
+ build_id: self.versioning_build_id(),
198
+ bundle_id: "".to_string(),
195
199
  }),
196
200
  messages: vec![],
197
201
  binary_checksum: self.worker_build_id.clone(),
@@ -146,6 +146,13 @@ impl WorkerTrait for Worker {
146
146
 
147
147
  /// Begins the shutdown process, tells pollers they should stop. Is idempotent.
148
148
  fn initiate_shutdown(&self) {
149
+ if !self.shutdown_token.is_cancelled() {
150
+ info!(
151
+ task_queue=%self.config.task_queue,
152
+ namespace=%self.config.namespace,
153
+ "Initiated shutdown",
154
+ );
155
+ }
149
156
  self.shutdown_token.cancel();
150
157
  // First, we want to stop polling of both activity and workflow tasks
151
158
  if let Some(atm) = self.at_task_mgr.as_ref() {
@@ -157,11 +164,6 @@ impl WorkerTrait for Worker {
157
164
  if !self.workflows.ever_polled() {
158
165
  self.local_act_mgr.workflows_have_shutdown();
159
166
  }
160
- info!(
161
- task_queue=%self.config.task_queue,
162
- namespace=%self.config.namespace,
163
- "Initiated shutdown",
164
- );
165
167
  }
166
168
 
167
169
  async fn shutdown(&self) {
@@ -538,6 +540,7 @@ impl Worker {
538
540
  self.workflows
539
541
  .activation_completed(
540
542
  completion,
543
+ false,
541
544
  self.post_activate_hook
542
545
  .as_ref()
543
546
  .map(|h| |data: PostActivateHookData| h(self, data)),
@@ -84,6 +84,7 @@ pub struct HistoryPaginator {
84
84
  pub(crate) run_id: String,
85
85
  pub(crate) previous_wft_started_id: i64,
86
86
  pub(crate) wft_started_event_id: i64,
87
+ id_of_last_event_in_last_extracted_update: Option<i64>,
87
88
 
88
89
  #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
89
90
  client: Arc<dyn WorkerClient>,
@@ -175,6 +176,7 @@ impl HistoryPaginator {
175
176
  run_id: req.original_wft.work.execution.run_id.clone(),
176
177
  previous_wft_started_id: req.original_wft.work.update.previous_wft_started_id,
177
178
  wft_started_event_id: req.original_wft.work.update.wft_started_id,
179
+ id_of_last_event_in_last_extracted_update: None,
178
180
  client,
179
181
  event_queue: Default::default(),
180
182
  next_page_token: NextPageToken::FetchFromStart,
@@ -211,6 +213,7 @@ impl HistoryPaginator {
211
213
  final_events,
212
214
  previous_wft_started_id,
213
215
  wft_started_event_id,
216
+ id_of_last_event_in_last_extracted_update: None,
214
217
  }
215
218
  }
216
219
 
@@ -226,6 +229,7 @@ impl HistoryPaginator {
226
229
  final_events: vec![],
227
230
  previous_wft_started_id: -2,
228
231
  wft_started_event_id: -2,
232
+ id_of_last_event_in_last_extracted_update: None,
229
233
  }
230
234
  }
231
235
 
@@ -240,14 +244,45 @@ impl HistoryPaginator {
240
244
  /// we have two, or until we are at the end of history.
241
245
  pub(crate) async fn extract_next_update(&mut self) -> Result<HistoryUpdate, tonic::Status> {
242
246
  loop {
243
- let no_next_page = !self.get_next_page().await?;
247
+ let fetch_happened = !self.get_next_page().await?;
244
248
  let current_events = mem::take(&mut self.event_queue);
245
249
  let seen_enough_events = current_events
246
250
  .back()
247
251
  .map(|e| e.event_id)
248
252
  .unwrap_or_default()
249
253
  >= self.wft_started_event_id;
250
- if current_events.is_empty() || (no_next_page && !seen_enough_events) {
254
+
255
+ // This handles a special case where the server might send us a page token along with
256
+ // a real page which ends at the current end of history. The page token then points to
257
+ // en empty page. We need to detect this, and consider it the end of history.
258
+ //
259
+ // This case unfortunately cannot be handled earlier, because we might fetch a page
260
+ // from the server which contains two complete WFTs, and thus we are happy to return
261
+ // an update at that time. But, if the page has a next page token, we *cannot* conclude
262
+ // we are done with replay until we fetch that page. So, we have to wait until the next
263
+ // extraction to determine (after fetching the next page and finding it to be empty)
264
+ // that we are done. Fetching the page eagerly is another option, but would be wasteful
265
+ // the overwhelming majority of the time.
266
+ let already_sent_update_with_enough_events = self
267
+ .id_of_last_event_in_last_extracted_update
268
+ .unwrap_or_default()
269
+ >= self.wft_started_event_id;
270
+ if current_events.is_empty()
271
+ && !fetch_happened
272
+ && already_sent_update_with_enough_events
273
+ {
274
+ // We must return an empty update which also says is contains the final WFT so we
275
+ // know we're done with replay.
276
+ return Ok(HistoryUpdate::from_events(
277
+ [],
278
+ self.previous_wft_started_id,
279
+ self.wft_started_event_id,
280
+ true,
281
+ )
282
+ .0);
283
+ }
284
+
285
+ if current_events.is_empty() || (fetch_happened && !seen_enough_events) {
251
286
  // If next page fetching happened, and we still ended up with no or insufficient
252
287
  // events, something is wrong. We're expecting there to be more events to be able to
253
288
  // extract this update, but server isn't giving us any. We have no choice except to
@@ -278,6 +313,8 @@ impl HistoryPaginator {
278
313
  // There was not a meaningful WFT in the whole page. We must fetch more.
279
314
  continue;
280
315
  }
316
+ self.id_of_last_event_in_last_extracted_update =
317
+ update.events.last().map(|e| e.event_id);
281
318
  return Ok(update);
282
319
  }
283
320
  }
@@ -1168,4 +1205,51 @@ pub mod tests {
1168
1205
 
1169
1206
  // TODO: Test we dont re-feed pointless updates if fetching returns <= events we already
1170
1207
  // processed
1208
+
1209
+ #[tokio::test]
1210
+ async fn handles_fetching_page_with_complete_wft_and_page_token_to_empty_page() {
1211
+ let timer_hist = canned_histories::single_timer("t");
1212
+ let workflow_task = timer_hist.get_full_history_info().unwrap();
1213
+ let prev_started_wft_id = workflow_task.previous_started_event_id();
1214
+ let wft_started_id = workflow_task.workflow_task_started_event_id();
1215
+
1216
+ let mut full_resp_with_npt: GetWorkflowExecutionHistoryResponse =
1217
+ timer_hist.get_full_history_info().unwrap().into();
1218
+ full_resp_with_npt.next_page_token = vec![1];
1219
+
1220
+ let mut mock_client = mock_workflow_client();
1221
+ mock_client
1222
+ .expect_get_workflow_execution_history()
1223
+ .returning(move |_, _, _| Ok(full_resp_with_npt.clone()))
1224
+ .times(1);
1225
+ mock_client
1226
+ .expect_get_workflow_execution_history()
1227
+ .returning(move |_, _, _| {
1228
+ Ok(GetWorkflowExecutionHistoryResponse {
1229
+ history: Some(History { events: vec![] }),
1230
+ raw_history: vec![],
1231
+ next_page_token: vec![],
1232
+ archived: false,
1233
+ })
1234
+ })
1235
+ .times(1);
1236
+
1237
+ let mut paginator = HistoryPaginator::new(
1238
+ workflow_task.into(),
1239
+ prev_started_wft_id,
1240
+ wft_started_id,
1241
+ "wfid".to_string(),
1242
+ "runid".to_string(),
1243
+ NextPageToken::FetchFromStart,
1244
+ Arc::new(mock_client),
1245
+ );
1246
+ let mut update = paginator.extract_next_update().await.unwrap();
1247
+ let seq = update.take_next_wft_sequence(0).unwrap_events();
1248
+ assert_eq!(seq.last().unwrap().event_id, 3);
1249
+ let seq = update.take_next_wft_sequence(3).unwrap_events();
1250
+ assert_eq!(seq.last().unwrap().event_id, 8);
1251
+ assert_matches!(update.take_next_wft_sequence(8), NextWFT::NeedFetch);
1252
+ let mut update = paginator.extract_next_update().await.unwrap();
1253
+ assert_matches!(update.take_next_wft_sequence(8), NextWFT::ReplayOver);
1254
+ }
1171
1255
  }
@@ -234,7 +234,10 @@ impl WFMachinesAdapter for SignalExternalMachine {
234
234
  SignalExternalWorkflowExecutionFailedCause::Unspecified => "unknown",
235
235
  SignalExternalWorkflowExecutionFailedCause::ExternalWorkflowExecutionNotFound
236
236
  | SignalExternalWorkflowExecutionFailedCause::NamespaceNotFound =>
237
- "it was not found"
237
+ "it was not found",
238
+ SignalExternalWorkflowExecutionFailedCause::SignalCountLimitExceeded => {
239
+ "The per-workflow signal limit was exceeded"
240
+ }
238
241
  };
239
242
  vec![ResolveSignalExternalWorkflow {
240
243
  seq: self.shared_state.seq,