@temporalio/core-bridge 1.4.4 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +327 -419
- package/Cargo.toml +1 -1
- package/index.js +25 -2
- package/lib/errors.d.ts +22 -0
- package/lib/errors.js +65 -0
- package/lib/errors.js.map +1 -0
- package/lib/index.d.ts +440 -0
- package/lib/index.js +8 -0
- package/lib/index.js.map +1 -0
- package/package.json +11 -5
- package/releases/aarch64-apple-darwin/index.node +0 -0
- package/releases/aarch64-unknown-linux-gnu/index.node +0 -0
- package/releases/x86_64-apple-darwin/index.node +0 -0
- package/releases/x86_64-pc-windows-msvc/index.node +0 -0
- package/releases/x86_64-unknown-linux-gnu/index.node +0 -0
- package/sdk-core/.buildkite/docker/Dockerfile +1 -1
- package/sdk-core/.buildkite/docker/docker-compose.yaml +2 -2
- package/sdk-core/bridge-ffi/Cargo.toml +1 -1
- package/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -25
- package/sdk-core/bridge-ffi/src/lib.rs +29 -108
- package/sdk-core/bridge-ffi/src/wrappers.rs +35 -25
- package/sdk-core/client/Cargo.toml +1 -1
- package/sdk-core/client/src/lib.rs +12 -20
- package/sdk-core/client/src/raw.rs +9 -8
- package/sdk-core/client/src/retry.rs +100 -23
- package/sdk-core/core/Cargo.toml +5 -5
- package/sdk-core/core/benches/workflow_replay.rs +13 -10
- package/sdk-core/core/src/abstractions.rs +22 -22
- package/sdk-core/core/src/core_tests/activity_tasks.rs +1 -1
- package/sdk-core/core/src/core_tests/local_activities.rs +228 -6
- package/sdk-core/core/src/core_tests/queries.rs +247 -89
- package/sdk-core/core/src/core_tests/workers.rs +2 -2
- package/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- package/sdk-core/core/src/core_tests/workflow_tasks.rs +46 -27
- package/sdk-core/core/src/lib.rs +139 -32
- package/sdk-core/core/src/replay/mod.rs +185 -41
- package/sdk-core/core/src/telemetry/log_export.rs +190 -0
- package/sdk-core/core/src/telemetry/metrics.rs +184 -139
- package/sdk-core/core/src/telemetry/mod.rs +296 -318
- package/sdk-core/core/src/telemetry/prometheus_server.rs +4 -3
- package/sdk-core/core/src/test_help/mod.rs +9 -7
- package/sdk-core/core/src/worker/activities/local_activities.rs +2 -1
- package/sdk-core/core/src/worker/activities.rs +40 -23
- package/sdk-core/core/src/worker/client/mocks.rs +1 -1
- package/sdk-core/core/src/worker/client.rs +30 -4
- package/sdk-core/core/src/worker/mod.rs +22 -18
- package/sdk-core/core/src/worker/workflow/driven_workflow.rs +10 -19
- package/sdk-core/core/src/worker/workflow/history_update.rs +99 -25
- package/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +2 -6
- package/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +18 -21
- package/sdk-core/core/src/worker/workflow/machines/mod.rs +12 -38
- package/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +178 -0
- package/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +8 -2
- package/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +1 -5
- package/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +232 -216
- package/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +1 -6
- package/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +4 -4
- package/sdk-core/core/src/worker/workflow/managed_run.rs +13 -5
- package/sdk-core/core/src/worker/workflow/mod.rs +61 -9
- package/sdk-core/core/src/worker/workflow/wft_poller.rs +2 -2
- package/sdk-core/core/src/worker/workflow/workflow_stream.rs +56 -11
- package/sdk-core/core-api/Cargo.toml +4 -3
- package/sdk-core/core-api/src/lib.rs +1 -43
- package/sdk-core/core-api/src/telemetry.rs +147 -0
- package/sdk-core/core-api/src/worker.rs +13 -0
- package/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
- package/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
- package/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
- package/sdk-core/protos/api_upstream/.github/CODEOWNERS +1 -1
- package/sdk-core/protos/api_upstream/buf.yaml +0 -3
- package/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +3 -7
- package/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +8 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -2
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +2 -0
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +3 -0
- package/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +13 -0
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +19 -59
- package/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +0 -19
- package/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +108 -29
- package/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +2 -2
- package/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +1 -0
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +47 -8
- package/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +15 -1
- package/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +2 -0
- package/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +8 -1
- package/sdk-core/sdk/src/interceptors.rs +36 -3
- package/sdk-core/sdk/src/lib.rs +7 -4
- package/sdk-core/sdk/src/workflow_context.rs +13 -2
- package/sdk-core/sdk-core-protos/src/history_builder.rs +47 -1
- package/sdk-core/sdk-core-protos/src/history_info.rs +22 -22
- package/sdk-core/sdk-core-protos/src/lib.rs +49 -27
- package/sdk-core/test-utils/Cargo.toml +1 -0
- package/sdk-core/test-utils/src/lib.rs +81 -29
- package/sdk-core/tests/integ_tests/metrics_tests.rs +37 -0
- package/sdk-core/tests/integ_tests/polling_tests.rs +0 -13
- package/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +145 -4
- package/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +53 -0
- package/sdk-core/tests/integ_tests/workflow_tests/replay.rs +106 -20
- package/sdk-core/tests/integ_tests/workflow_tests.rs +18 -8
- package/sdk-core/tests/main.rs +6 -4
- package/src/conversions.rs +52 -47
- package/src/errors.rs +28 -86
- package/src/helpers.rs +3 -4
- package/src/lib.rs +2 -2
- package/src/runtime.rs +132 -61
- package/src/testing.rs +7 -4
- package/src/worker.rs +67 -50
- package/ts/errors.ts +55 -0
- package/{index.d.ts → ts/index.ts} +121 -15
- package/sdk-core/core/src/log_export.rs +0 -62
- package/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
- package/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
- package/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
- package/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +0 -40
|
@@ -12,7 +12,7 @@ use super::{
|
|
|
12
12
|
patch_state_machine::has_change, signal_external_state_machine::new_external_signal,
|
|
13
13
|
timer_state_machine::new_timer, upsert_search_attributes_state_machine::upsert_search_attrs,
|
|
14
14
|
workflow_machines::local_acts::LocalActivityData,
|
|
15
|
-
workflow_task_state_machine::WorkflowTaskMachine,
|
|
15
|
+
workflow_task_state_machine::WorkflowTaskMachine, Machines, NewMachineWithCommand,
|
|
16
16
|
TemporalStateMachine,
|
|
17
17
|
};
|
|
18
18
|
use crate::{
|
|
@@ -20,8 +20,9 @@ use crate::{
|
|
|
20
20
|
telemetry::{metrics::MetricsContext, VecDisplayer},
|
|
21
21
|
worker::{
|
|
22
22
|
workflow::{
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
machines::modify_workflow_properties_state_machine::modify_workflow_properties,
|
|
24
|
+
CommandID, DrivenWorkflow, HistoryUpdate, LocalResolution, OutgoingJob, WFCommand,
|
|
25
|
+
WorkflowFetcher, WorkflowStartedInfo,
|
|
25
26
|
},
|
|
26
27
|
ExecutingLAId, LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
|
|
27
28
|
},
|
|
@@ -38,6 +39,7 @@ use std::{
|
|
|
38
39
|
use temporal_sdk_core_protos::{
|
|
39
40
|
coresdk::{
|
|
40
41
|
common::NamespacedWorkflowExecution,
|
|
42
|
+
workflow_activation,
|
|
41
43
|
workflow_activation::{
|
|
42
44
|
workflow_activation_job, NotifyHasPatch, UpdateRandomSeed, WorkflowActivation,
|
|
43
45
|
},
|
|
@@ -153,9 +155,9 @@ struct ChangeInfo {
|
|
|
153
155
|
#[derive(Debug, derive_more::Display)]
|
|
154
156
|
#[must_use]
|
|
155
157
|
#[allow(clippy::large_enum_variant)]
|
|
156
|
-
pub enum MachineResponse {
|
|
158
|
+
pub(super) enum MachineResponse {
|
|
157
159
|
#[display(fmt = "PushWFJob({})", "_0")]
|
|
158
|
-
PushWFJob(
|
|
160
|
+
PushWFJob(OutgoingJob),
|
|
159
161
|
|
|
160
162
|
/// Pushes a new command into the list that will be sent to server once we respond with the
|
|
161
163
|
/// workflow task completion
|
|
@@ -196,7 +198,7 @@ where
|
|
|
196
198
|
T: Into<workflow_activation_job::Variant>,
|
|
197
199
|
{
|
|
198
200
|
fn from(v: T) -> Self {
|
|
199
|
-
Self::PushWFJob(v.into())
|
|
201
|
+
Self::PushWFJob(v.into().into())
|
|
200
202
|
}
|
|
201
203
|
}
|
|
202
204
|
|
|
@@ -315,6 +317,148 @@ impl WorkflowMachines {
|
|
|
315
317
|
self.drive_me.get_started_info()
|
|
316
318
|
}
|
|
317
319
|
|
|
320
|
+
/// Fetches commands which are ready for processing from the state machines, generally to be
|
|
321
|
+
/// sent off to the server. They are not removed from the internal queue, that happens when
|
|
322
|
+
/// corresponding history events from the server are being handled.
|
|
323
|
+
pub(crate) fn get_commands(&self) -> Vec<ProtoCommand> {
|
|
324
|
+
self.commands
|
|
325
|
+
.iter()
|
|
326
|
+
.filter_map(|c| {
|
|
327
|
+
if !self.machine(c.machine).is_final_state() {
|
|
328
|
+
match &c.command {
|
|
329
|
+
MachineAssociatedCommand::Real(cmd) => Some((**cmd).clone()),
|
|
330
|
+
MachineAssociatedCommand::FakeLocalActivityMarker(_) => None,
|
|
331
|
+
}
|
|
332
|
+
} else {
|
|
333
|
+
None
|
|
334
|
+
}
|
|
335
|
+
})
|
|
336
|
+
.collect()
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/// Returns the next activation that needs to be performed by the lang sdk. Things like unblock
|
|
340
|
+
/// timer, etc. This does *not* cause any advancement of the state machines, it merely drains
|
|
341
|
+
/// from the outgoing queue of activation jobs.
|
|
342
|
+
///
|
|
343
|
+
/// The job list may be empty, in which case it is expected the caller handles what to do in a
|
|
344
|
+
/// "no work" situation. Possibly, it may know about some work the machines don't, like queries.
|
|
345
|
+
pub(crate) fn get_wf_activation(&mut self) -> WorkflowActivation {
|
|
346
|
+
let jobs = self.drive_me.drain_jobs();
|
|
347
|
+
WorkflowActivation {
|
|
348
|
+
timestamp: self.current_wf_time.map(Into::into),
|
|
349
|
+
is_replaying: self.replaying,
|
|
350
|
+
run_id: self.run_id.clone(),
|
|
351
|
+
history_length: self.last_processed_event as u32,
|
|
352
|
+
jobs,
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
pub(crate) fn has_pending_jobs(&self) -> bool {
|
|
357
|
+
!self.drive_me.peek_pending_jobs().is_empty()
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
pub(crate) fn has_pending_la_resolutions(&self) -> bool {
|
|
361
|
+
self.drive_me
|
|
362
|
+
.peek_pending_jobs()
|
|
363
|
+
.iter()
|
|
364
|
+
.any(|v| v.is_la_resolution)
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/// Iterate the state machines, which consists of grabbing any pending outgoing commands from
|
|
368
|
+
/// the workflow code, handling them, and preparing them to be sent off to the server.
|
|
369
|
+
pub(crate) async fn iterate_machines(&mut self) -> Result<()> {
|
|
370
|
+
let results = self.drive_me.fetch_workflow_iteration_output().await;
|
|
371
|
+
self.handle_driven_results(results)?;
|
|
372
|
+
self.prepare_commands()?;
|
|
373
|
+
if self.workflow_is_finished() {
|
|
374
|
+
if let Some(rt) = self.total_runtime() {
|
|
375
|
+
self.metrics.wf_e2e_latency(rt);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
Ok(())
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/// Apply the next (unapplied) entire workflow task from history to these machines. Will replay
|
|
382
|
+
/// any events that need to be replayed until caught up to the newest WFT. May also fetch
|
|
383
|
+
/// history from server if needed.
|
|
384
|
+
pub(crate) async fn apply_next_wft_from_history(&mut self) -> Result<usize> {
|
|
385
|
+
// If we have already seen the terminal event for the entire workflow in a previous WFT,
|
|
386
|
+
// then we don't need to do anything here, and in fact we need to avoid re-applying the
|
|
387
|
+
// final WFT.
|
|
388
|
+
if self.have_seen_terminal_event {
|
|
389
|
+
return Ok(0);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
let last_handled_wft_started_id = self.current_started_event_id;
|
|
393
|
+
let events = {
|
|
394
|
+
let mut evts = self
|
|
395
|
+
.last_history_from_server
|
|
396
|
+
.take_next_wft_sequence(last_handled_wft_started_id)
|
|
397
|
+
.await
|
|
398
|
+
.map_err(WFMachinesError::HistoryFetchingError)?;
|
|
399
|
+
// Do not re-process events we have already processed
|
|
400
|
+
evts.retain(|e| e.event_id > self.last_processed_event);
|
|
401
|
+
evts
|
|
402
|
+
};
|
|
403
|
+
let num_events_to_process = events.len();
|
|
404
|
+
|
|
405
|
+
// We're caught up on reply if there are no new events to process
|
|
406
|
+
if events.is_empty() {
|
|
407
|
+
self.replaying = false;
|
|
408
|
+
}
|
|
409
|
+
let replay_start = Instant::now();
|
|
410
|
+
|
|
411
|
+
if let Some(last_event) = events.last() {
|
|
412
|
+
if last_event.event_type == EventType::WorkflowTaskStarted as i32 {
|
|
413
|
+
self.next_started_event_id = last_event.event_id;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
let mut history = events.into_iter().peekable();
|
|
418
|
+
while let Some(event) = history.next() {
|
|
419
|
+
if event.event_id != self.last_processed_event + 1 {
|
|
420
|
+
return Err(WFMachinesError::Fatal(format!(
|
|
421
|
+
"History is out of order. Last processed event: {}, event id: {}",
|
|
422
|
+
self.last_processed_event, event.event_id
|
|
423
|
+
)));
|
|
424
|
+
}
|
|
425
|
+
let next_event = history.peek();
|
|
426
|
+
let eid = event.event_id;
|
|
427
|
+
let etype = event.event_type();
|
|
428
|
+
self.handle_event(event, next_event.is_some())?;
|
|
429
|
+
self.last_processed_event = eid;
|
|
430
|
+
if etype == EventType::WorkflowTaskStarted && next_event.is_none() {
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Scan through to the next WFT, searching for any patch / la markers, so that we can
|
|
436
|
+
// pre-resolve them.
|
|
437
|
+
for e in self.last_history_from_server.peek_next_wft_sequence() {
|
|
438
|
+
if let Some((patch_id, _)) = e.get_patch_marker_details() {
|
|
439
|
+
self.encountered_change_markers.insert(
|
|
440
|
+
patch_id.clone(),
|
|
441
|
+
ChangeInfo {
|
|
442
|
+
created_command: false,
|
|
443
|
+
},
|
|
444
|
+
);
|
|
445
|
+
// Found a patch marker
|
|
446
|
+
self.drive_me.send_job(
|
|
447
|
+
workflow_activation_job::Variant::NotifyHasPatch(NotifyHasPatch { patch_id })
|
|
448
|
+
.into(),
|
|
449
|
+
);
|
|
450
|
+
} else if e.is_local_activity_marker() {
|
|
451
|
+
self.local_activity_data.process_peekahead_marker(e)?;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if !self.replaying {
|
|
456
|
+
self.metrics.wf_task_replay_latency(replay_start.elapsed());
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
Ok(num_events_to_process)
|
|
460
|
+
}
|
|
461
|
+
|
|
318
462
|
/// Handle a single event from the workflow history. `has_next_event` should be false if `event`
|
|
319
463
|
/// is the last event in the history.
|
|
320
464
|
///
|
|
@@ -323,14 +467,8 @@ impl WorkflowMachines {
|
|
|
323
467
|
/// event is applied to the machine, which may also return a nondeterminism error if the machine
|
|
324
468
|
/// does not match the expected type. A fatal error may be returned if the machine is in an
|
|
325
469
|
/// invalid state.
|
|
326
|
-
#[instrument(
|
|
470
|
+
#[instrument(skip(self, event), fields(event=%event))]
|
|
327
471
|
fn handle_event(&mut self, event: HistoryEvent, has_next_event: bool) -> Result<()> {
|
|
328
|
-
if event.event_type() == EventType::Unspecified {
|
|
329
|
-
return Err(WFMachinesError::Fatal(format!(
|
|
330
|
-
"Event type is unspecified! This history is invalid. Event detail: {:?}",
|
|
331
|
-
event
|
|
332
|
-
)));
|
|
333
|
-
}
|
|
334
472
|
if event.is_final_wf_execution_event() {
|
|
335
473
|
self.have_seen_terminal_event = true;
|
|
336
474
|
}
|
|
@@ -348,11 +486,6 @@ impl WorkflowMachines {
|
|
|
348
486
|
Ok(())
|
|
349
487
|
};
|
|
350
488
|
}
|
|
351
|
-
|
|
352
|
-
if event.is_command_event() {
|
|
353
|
-
self.handle_command_event(event)?;
|
|
354
|
-
return Ok(());
|
|
355
|
-
}
|
|
356
489
|
if self.replaying
|
|
357
490
|
&& self.current_started_event_id
|
|
358
491
|
>= self.last_history_from_server.previous_started_event_id
|
|
@@ -361,59 +494,50 @@ impl WorkflowMachines {
|
|
|
361
494
|
// Replay is finished
|
|
362
495
|
self.replaying = false;
|
|
363
496
|
}
|
|
497
|
+
if event.event_type() == EventType::Unspecified || event.attributes.is_none() {
|
|
498
|
+
return if !event.worker_may_ignore {
|
|
499
|
+
Err(WFMachinesError::Fatal(format!(
|
|
500
|
+
"Event type is unspecified! This history is invalid. Event detail: {:?}",
|
|
501
|
+
event
|
|
502
|
+
)))
|
|
503
|
+
} else {
|
|
504
|
+
debug!("Event is ignorable");
|
|
505
|
+
Ok(())
|
|
506
|
+
};
|
|
507
|
+
}
|
|
364
508
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
509
|
+
if event.is_command_event() {
|
|
510
|
+
self.handle_command_event(event)?;
|
|
511
|
+
return Ok(());
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
if let Some(initial_cmd_id) = event.get_initial_command_event_id() {
|
|
515
|
+
// We remove the machine while we it handles events, then return it, to avoid
|
|
516
|
+
// borrowing from ourself mutably.
|
|
517
|
+
let maybe_machine = self.machines_by_event_id.remove(&initial_cmd_id);
|
|
518
|
+
match maybe_machine {
|
|
519
|
+
Some(sm) => {
|
|
520
|
+
self.submachine_handle_event(sm, event, has_next_event)?;
|
|
521
|
+
// Restore machine if not in it's final state
|
|
522
|
+
if !self.machine(sm).is_final_state() {
|
|
523
|
+
self.machines_by_event_id.insert(initial_cmd_id, sm);
|
|
377
524
|
}
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
525
|
+
}
|
|
526
|
+
None => {
|
|
527
|
+
return Err(WFMachinesError::Nondeterminism(format!(
|
|
528
|
+
"During event handling, this event had an initial command ID but we \
|
|
381
529
|
could not find a matching command for it: {:?}",
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
}
|
|
530
|
+
event
|
|
531
|
+
)));
|
|
385
532
|
}
|
|
386
533
|
}
|
|
387
|
-
|
|
534
|
+
} else {
|
|
535
|
+
self.handle_non_stateful_event(event, has_next_event)?;
|
|
388
536
|
}
|
|
389
537
|
|
|
390
538
|
Ok(())
|
|
391
539
|
}
|
|
392
540
|
|
|
393
|
-
/// Called when a workflow task started event has triggered. Ensures we are tracking the ID
|
|
394
|
-
/// of the current started event as well as workflow time properly.
|
|
395
|
-
fn task_started(&mut self, task_started_event_id: i64, time: SystemTime) -> Result<()> {
|
|
396
|
-
self.current_started_event_id = task_started_event_id;
|
|
397
|
-
self.wft_start_time = Some(time);
|
|
398
|
-
self.set_current_time(time);
|
|
399
|
-
|
|
400
|
-
// Notify local activity machines that we started a non-replay WFT, which will allow any
|
|
401
|
-
// which were waiting for a marker to instead decide to execute the LA since it clearly
|
|
402
|
-
// will not be resolved via marker.
|
|
403
|
-
if !self.replaying {
|
|
404
|
-
let mut resps = vec![];
|
|
405
|
-
for (k, mach) in self.all_machines.iter_mut() {
|
|
406
|
-
if let Machines::LocalActivityMachine(lam) = mach {
|
|
407
|
-
resps.push((k, lam.encountered_non_replay_wft()?));
|
|
408
|
-
}
|
|
409
|
-
}
|
|
410
|
-
for (mkey, resp_set) in resps {
|
|
411
|
-
self.process_machine_responses(mkey, resp_set)?;
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
Ok(())
|
|
415
|
-
}
|
|
416
|
-
|
|
417
541
|
/// A command event is an event which is generated from a command emitted as a result of
|
|
418
542
|
/// performing a workflow task. Each command has a corresponding event. For example
|
|
419
543
|
/// ScheduleActivityTaskCommand is recorded to the history as ActivityTaskScheduledEvent.
|
|
@@ -504,7 +628,7 @@ impl WorkflowMachines {
|
|
|
504
628
|
attrs,
|
|
505
629
|
)) = event.attributes
|
|
506
630
|
{
|
|
507
|
-
if let Some(st) = event.event_time {
|
|
631
|
+
if let Some(st) = event.event_time.clone() {
|
|
508
632
|
let as_systime: SystemTime = st.try_into()?;
|
|
509
633
|
self.workflow_start_time = Some(as_systime);
|
|
510
634
|
// Set the workflow time to be the event time of the first event, so that
|
|
@@ -516,6 +640,7 @@ impl WorkflowMachines {
|
|
|
516
640
|
self.drive_me.start(
|
|
517
641
|
self.workflow_id.clone(),
|
|
518
642
|
str_to_randomness_seed(&attrs.original_execution_run_id),
|
|
643
|
+
event.event_time.unwrap_or_default(),
|
|
519
644
|
attrs,
|
|
520
645
|
);
|
|
521
646
|
} else {
|
|
@@ -536,7 +661,8 @@ impl WorkflowMachines {
|
|
|
536
661
|
attrs,
|
|
537
662
|
)) = event.attributes
|
|
538
663
|
{
|
|
539
|
-
self.drive_me
|
|
664
|
+
self.drive_me
|
|
665
|
+
.send_job(workflow_activation::SignalWorkflow::from(attrs).into());
|
|
540
666
|
} else {
|
|
541
667
|
// err
|
|
542
668
|
}
|
|
@@ -548,7 +674,8 @@ impl WorkflowMachines {
|
|
|
548
674
|
),
|
|
549
675
|
) = event.attributes
|
|
550
676
|
{
|
|
551
|
-
self.drive_me
|
|
677
|
+
self.drive_me
|
|
678
|
+
.send_job(workflow_activation::CancelWorkflow::from(attrs).into());
|
|
552
679
|
} else {
|
|
553
680
|
// err
|
|
554
681
|
}
|
|
@@ -563,46 +690,6 @@ impl WorkflowMachines {
|
|
|
563
690
|
Ok(())
|
|
564
691
|
}
|
|
565
692
|
|
|
566
|
-
/// Fetches commands which are ready for processing from the state machines, generally to be
|
|
567
|
-
/// sent off to the server. They are not removed from the internal queue, that happens when
|
|
568
|
-
/// corresponding history events from the server are being handled.
|
|
569
|
-
pub(crate) fn get_commands(&self) -> Vec<ProtoCommand> {
|
|
570
|
-
self.commands
|
|
571
|
-
.iter()
|
|
572
|
-
.filter_map(|c| {
|
|
573
|
-
if !self.machine(c.machine).is_final_state() {
|
|
574
|
-
match &c.command {
|
|
575
|
-
MachineAssociatedCommand::Real(cmd) => Some((**cmd).clone()),
|
|
576
|
-
MachineAssociatedCommand::FakeLocalActivityMarker(_) => None,
|
|
577
|
-
}
|
|
578
|
-
} else {
|
|
579
|
-
None
|
|
580
|
-
}
|
|
581
|
-
})
|
|
582
|
-
.collect()
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
/// Returns the next activation that needs to be performed by the lang sdk. Things like unblock
|
|
586
|
-
/// timer, etc. This does *not* cause any advancement of the state machines, it merely drains
|
|
587
|
-
/// from the outgoing queue of activation jobs.
|
|
588
|
-
///
|
|
589
|
-
/// The job list may be empty, in which case it is expected the caller handles what to do in a
|
|
590
|
-
/// "no work" situation. Possibly, it may know about some work the machines don't, like queries.
|
|
591
|
-
pub(crate) fn get_wf_activation(&mut self) -> WorkflowActivation {
|
|
592
|
-
let jobs = self.drive_me.drain_jobs();
|
|
593
|
-
WorkflowActivation {
|
|
594
|
-
timestamp: self.current_wf_time.map(Into::into),
|
|
595
|
-
is_replaying: self.replaying,
|
|
596
|
-
run_id: self.run_id.clone(),
|
|
597
|
-
history_length: self.last_processed_event as u32,
|
|
598
|
-
jobs,
|
|
599
|
-
}
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
pub(crate) fn has_pending_jobs(&self) -> bool {
|
|
603
|
-
!self.drive_me.peek_pending_jobs().is_empty()
|
|
604
|
-
}
|
|
605
|
-
|
|
606
693
|
fn set_current_time(&mut self, time: SystemTime) -> SystemTime {
|
|
607
694
|
if self.current_wf_time.map_or(true, |t| t < time) {
|
|
608
695
|
self.current_wf_time = Some(time);
|
|
@@ -611,102 +698,6 @@ impl WorkflowMachines {
|
|
|
611
698
|
.expect("We have just ensured this is populated")
|
|
612
699
|
}
|
|
613
700
|
|
|
614
|
-
/// Iterate the state machines, which consists of grabbing any pending outgoing commands from
|
|
615
|
-
/// the workflow code, handling them, and preparing them to be sent off to the server.
|
|
616
|
-
pub(crate) async fn iterate_machines(&mut self) -> Result<()> {
|
|
617
|
-
let results = self.drive_me.fetch_workflow_iteration_output().await;
|
|
618
|
-
self.handle_driven_results(results)?;
|
|
619
|
-
self.prepare_commands()?;
|
|
620
|
-
if self.workflow_is_finished() {
|
|
621
|
-
if let Some(rt) = self.total_runtime() {
|
|
622
|
-
self.metrics.wf_e2e_latency(rt);
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
Ok(())
|
|
626
|
-
}
|
|
627
|
-
|
|
628
|
-
/// Apply the next (unapplied) entire workflow task from history to these machines. Will replay
|
|
629
|
-
/// any events that need to be replayed until caught up to the newest WFT. May also fetch
|
|
630
|
-
/// history from server if needed.
|
|
631
|
-
pub(crate) async fn apply_next_wft_from_history(&mut self) -> Result<usize> {
|
|
632
|
-
// If we have already seen the terminal event for the entire workflow in a previous WFT,
|
|
633
|
-
// then we don't need to do anything here, and in fact we need to avoid re-applying the
|
|
634
|
-
// final WFT.
|
|
635
|
-
if self.have_seen_terminal_event {
|
|
636
|
-
return Ok(0);
|
|
637
|
-
}
|
|
638
|
-
|
|
639
|
-
let last_handled_wft_started_id = self.current_started_event_id;
|
|
640
|
-
let events = {
|
|
641
|
-
let mut evts = self
|
|
642
|
-
.last_history_from_server
|
|
643
|
-
.take_next_wft_sequence(last_handled_wft_started_id)
|
|
644
|
-
.await
|
|
645
|
-
.map_err(WFMachinesError::HistoryFetchingError)?;
|
|
646
|
-
// Do not re-process events we have already processed
|
|
647
|
-
evts.retain(|e| e.event_id > self.last_processed_event);
|
|
648
|
-
evts
|
|
649
|
-
};
|
|
650
|
-
let num_events_to_process = events.len();
|
|
651
|
-
|
|
652
|
-
// We're caught up on reply if there are no new events to process
|
|
653
|
-
// TODO: Probably this is unneeded if we evict whenever history is from non-sticky queue
|
|
654
|
-
if events.is_empty() {
|
|
655
|
-
self.replaying = false;
|
|
656
|
-
}
|
|
657
|
-
let replay_start = Instant::now();
|
|
658
|
-
|
|
659
|
-
if let Some(last_event) = events.last() {
|
|
660
|
-
if last_event.event_type == EventType::WorkflowTaskStarted as i32 {
|
|
661
|
-
self.next_started_event_id = last_event.event_id;
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
let mut history = events.into_iter().peekable();
|
|
666
|
-
while let Some(event) = history.next() {
|
|
667
|
-
if event.event_id != self.last_processed_event + 1 {
|
|
668
|
-
return Err(WFMachinesError::Fatal(format!(
|
|
669
|
-
"History is out of order. Last processed event: {}, event id: {}",
|
|
670
|
-
self.last_processed_event, event.event_id
|
|
671
|
-
)));
|
|
672
|
-
}
|
|
673
|
-
let next_event = history.peek();
|
|
674
|
-
let eid = event.event_id;
|
|
675
|
-
let etype = event.event_type;
|
|
676
|
-
self.handle_event(event, next_event.is_some())?;
|
|
677
|
-
self.last_processed_event = eid;
|
|
678
|
-
if etype == EventType::WorkflowTaskStarted as i32 && next_event.is_none() {
|
|
679
|
-
break;
|
|
680
|
-
}
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
// Scan through to the next WFT, searching for any patch markers, so that we can
|
|
684
|
-
// pre-resolve them.
|
|
685
|
-
for e in self.last_history_from_server.peek_next_wft_sequence() {
|
|
686
|
-
if let Some((patch_id, _)) = e.get_patch_marker_details() {
|
|
687
|
-
self.encountered_change_markers.insert(
|
|
688
|
-
patch_id.clone(),
|
|
689
|
-
ChangeInfo {
|
|
690
|
-
created_command: false,
|
|
691
|
-
},
|
|
692
|
-
);
|
|
693
|
-
// Found a patch marker
|
|
694
|
-
self.drive_me
|
|
695
|
-
.send_job(workflow_activation_job::Variant::NotifyHasPatch(
|
|
696
|
-
NotifyHasPatch { patch_id },
|
|
697
|
-
));
|
|
698
|
-
} else if e.is_local_activity_marker() {
|
|
699
|
-
self.local_activity_data.process_peekahead_marker(e)?;
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
if !self.replaying {
|
|
704
|
-
self.metrics.wf_task_replay_latency(replay_start.elapsed());
|
|
705
|
-
}
|
|
706
|
-
|
|
707
|
-
Ok(num_events_to_process)
|
|
708
|
-
}
|
|
709
|
-
|
|
710
701
|
/// Wrapper for calling [TemporalStateMachine::handle_event] which appropriately takes action
|
|
711
702
|
/// on the returned machine responses
|
|
712
703
|
fn submachine_handle_event(
|
|
@@ -765,7 +756,7 @@ impl WorkflowMachines {
|
|
|
765
756
|
) -> Result<()> {
|
|
766
757
|
let sm = self.machine(smk);
|
|
767
758
|
if !machine_responses.is_empty() {
|
|
768
|
-
debug!(responses = %machine_responses.display(), machine_name = %sm.
|
|
759
|
+
debug!(responses = %machine_responses.display(), machine_name = %sm.name(),
|
|
769
760
|
"Machine produced responses");
|
|
770
761
|
}
|
|
771
762
|
self.process_machine_resps_impl(smk, machine_responses)
|
|
@@ -791,14 +782,12 @@ impl WorkflowMachines {
|
|
|
791
782
|
self.task_started(task_started_event_id, time)?;
|
|
792
783
|
}
|
|
793
784
|
MachineResponse::UpdateRunIdOnWorkflowReset { run_id: new_run_id } => {
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
},
|
|
801
|
-
));
|
|
785
|
+
self.drive_me.send_job(
|
|
786
|
+
workflow_activation_job::Variant::UpdateRandomSeed(UpdateRandomSeed {
|
|
787
|
+
randomness_seed: str_to_randomness_seed(&new_run_id),
|
|
788
|
+
})
|
|
789
|
+
.into(),
|
|
790
|
+
);
|
|
802
791
|
}
|
|
803
792
|
MachineResponse::IssueNewCommand(c) => {
|
|
804
793
|
self.current_wf_task_commands.push_back(CommandAndMachine {
|
|
@@ -888,6 +877,30 @@ impl WorkflowMachines {
|
|
|
888
877
|
Ok(())
|
|
889
878
|
}
|
|
890
879
|
|
|
880
|
+
/// Called when a workflow task started event has triggered. Ensures we are tracking the ID
|
|
881
|
+
/// of the current started event as well as workflow time properly.
|
|
882
|
+
fn task_started(&mut self, task_started_event_id: i64, time: SystemTime) -> Result<()> {
|
|
883
|
+
self.current_started_event_id = task_started_event_id;
|
|
884
|
+
self.wft_start_time = Some(time);
|
|
885
|
+
self.set_current_time(time);
|
|
886
|
+
|
|
887
|
+
// Notify local activity machines that we started a non-replay WFT, which will allow any
|
|
888
|
+
// which were waiting for a marker to instead decide to execute the LA since it clearly
|
|
889
|
+
// will not be resolved via marker.
|
|
890
|
+
if !self.replaying {
|
|
891
|
+
let mut resps = vec![];
|
|
892
|
+
for (k, mach) in self.all_machines.iter_mut() {
|
|
893
|
+
if let Machines::LocalActivityMachine(lam) = mach {
|
|
894
|
+
resps.push((k, lam.encountered_non_replay_wft()?));
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
for (mkey, resp_set) in resps {
|
|
898
|
+
self.process_machine_responses(mkey, resp_set)?;
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
Ok(())
|
|
902
|
+
}
|
|
903
|
+
|
|
891
904
|
/// Handles results of the workflow activation, delegating work to the appropriate state
|
|
892
905
|
/// machine. Returns a list of workflow jobs that should be queued in the pending activation for
|
|
893
906
|
/// the next poll. This list will be populated only if state machine produced lang activations
|
|
@@ -1037,6 +1050,12 @@ impl WorkflowMachines {
|
|
|
1037
1050
|
// Nothing to do here, queries are handled above the machine level
|
|
1038
1051
|
unimplemented!("Query responses should not make it down into the machines")
|
|
1039
1052
|
}
|
|
1053
|
+
WFCommand::ModifyWorkflowProperties(attrs) => {
|
|
1054
|
+
self.add_cmd_to_wf_task(
|
|
1055
|
+
modify_workflow_properties(attrs),
|
|
1056
|
+
CommandIdKind::NeverResolves,
|
|
1057
|
+
);
|
|
1058
|
+
}
|
|
1040
1059
|
WFCommand::NoCommandsFromLang => (),
|
|
1041
1060
|
}
|
|
1042
1061
|
}
|
|
@@ -1143,10 +1162,7 @@ enum ChangeMarkerOutcome {
|
|
|
1143
1162
|
|
|
1144
1163
|
/// Special handling for patch markers, when handling command events as in
|
|
1145
1164
|
/// [WorkflowMachines::handle_command_event]
|
|
1146
|
-
fn change_marker_handling(
|
|
1147
|
-
event: &HistoryEvent,
|
|
1148
|
-
mach: &dyn TemporalStateMachine,
|
|
1149
|
-
) -> Result<ChangeMarkerOutcome> {
|
|
1165
|
+
fn change_marker_handling(event: &HistoryEvent, mach: &Machines) -> Result<ChangeMarkerOutcome> {
|
|
1150
1166
|
if !mach.matches_event(event) {
|
|
1151
1167
|
// Version markers can be skipped in the event they are deprecated
|
|
1152
1168
|
if let Some((patch_name, deprecated)) = event.get_patch_marker_details() {
|
|
@@ -1162,11 +1178,11 @@ fn change_marker_handling(
|
|
|
1162
1178
|
patch_name
|
|
1163
1179
|
)));
|
|
1164
1180
|
}
|
|
1165
|
-
//
|
|
1181
|
+
// Patch machines themselves may also not *have* matching markers, where non-deprecated
|
|
1166
1182
|
// calls take the old path, and deprecated calls assume history is produced by a new-code
|
|
1167
1183
|
// worker.
|
|
1168
|
-
if mach
|
|
1169
|
-
debug!("Skipping non-matching event against
|
|
1184
|
+
if matches!(mach, Machines::PatchMachine(_)) {
|
|
1185
|
+
debug!("Skipping non-matching event against patch machine");
|
|
1170
1186
|
return Ok(ChangeMarkerOutcome::SkipCommand);
|
|
1171
1187
|
}
|
|
1172
1188
|
}
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
#![allow(clippy::enum_variant_names)]
|
|
2
2
|
|
|
3
3
|
use super::{
|
|
4
|
-
workflow_machines::MachineResponse, Cancellable, EventInfo,
|
|
5
|
-
WFMachinesError,
|
|
4
|
+
workflow_machines::MachineResponse, Cancellable, EventInfo, WFMachinesAdapter, WFMachinesError,
|
|
6
5
|
};
|
|
7
6
|
use rustfsm::{fsm, TransitionResult};
|
|
8
7
|
use std::{
|
|
@@ -101,10 +100,6 @@ impl WFMachinesAdapter for WorkflowTaskMachine {
|
|
|
101
100
|
| EventType::WorkflowTaskFailed
|
|
102
101
|
)
|
|
103
102
|
}
|
|
104
|
-
|
|
105
|
-
fn kind(&self) -> MachineKind {
|
|
106
|
-
MachineKind::WorkflowTask
|
|
107
|
-
}
|
|
108
103
|
}
|
|
109
104
|
|
|
110
105
|
impl TryFrom<HistoryEvent> for WorkflowTaskMachineEvents {
|
|
@@ -87,7 +87,7 @@ impl ManagedWFFunc {
|
|
|
87
87
|
"runid".to_string(),
|
|
88
88
|
hist,
|
|
89
89
|
Box::new(driver).into(),
|
|
90
|
-
|
|
90
|
+
MetricsContext::no_op(),
|
|
91
91
|
);
|
|
92
92
|
let mgr = WorkflowManager::new_from_machines(state_machines);
|
|
93
93
|
Self {
|
|
@@ -98,7 +98,7 @@ impl ManagedWFFunc {
|
|
|
98
98
|
}
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
#[instrument(
|
|
101
|
+
#[instrument(skip(self))]
|
|
102
102
|
pub(crate) async fn get_next_activation(&mut self) -> Result<WorkflowActivation> {
|
|
103
103
|
let res = self.mgr.get_next_activation().await?;
|
|
104
104
|
debug!("Managed wf next activation: {}", &res);
|
|
@@ -116,7 +116,7 @@ impl ManagedWFFunc {
|
|
|
116
116
|
}
|
|
117
117
|
|
|
118
118
|
/// Feed new history, as if received a new poll result. Returns new activation
|
|
119
|
-
#[instrument(
|
|
119
|
+
#[instrument(skip(self, update))]
|
|
120
120
|
pub(crate) async fn new_history(
|
|
121
121
|
&mut self,
|
|
122
122
|
update: HistoryUpdate,
|
|
@@ -174,7 +174,7 @@ impl ManagedWFFunc {
|
|
|
174
174
|
self.future_handle.take().unwrap().await.unwrap()
|
|
175
175
|
}
|
|
176
176
|
|
|
177
|
-
#[instrument(
|
|
177
|
+
#[instrument(skip(self, res))]
|
|
178
178
|
async fn push_activation_to_wf(&mut self, res: &WorkflowActivation) -> Result<()> {
|
|
179
179
|
if res.jobs.is_empty() {
|
|
180
180
|
// Nothing to do here
|