temporalio 0.0.2 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -23
- data/bridge/Cargo.lock +185 -76
- data/bridge/Cargo.toml +6 -4
- data/bridge/sdk-core/README.md +19 -6
- data/bridge/sdk-core/client/src/lib.rs +215 -39
- data/bridge/sdk-core/client/src/metrics.rs +17 -8
- data/bridge/sdk-core/client/src/raw.rs +4 -4
- data/bridge/sdk-core/client/src/retry.rs +32 -20
- data/bridge/sdk-core/core/Cargo.toml +22 -9
- data/bridge/sdk-core/core/src/abstractions.rs +203 -14
- data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +76 -41
- data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
- data/bridge/sdk-core/core/src/core_tests/local_activities.rs +204 -83
- data/bridge/sdk-core/core/src/core_tests/queries.rs +3 -4
- data/bridge/sdk-core/core/src/core_tests/workers.rs +1 -3
- data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +397 -54
- data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
- data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
- data/bridge/sdk-core/core/src/lib.rs +16 -9
- data/bridge/sdk-core/core/src/telemetry/log_export.rs +1 -1
- data/bridge/sdk-core/core/src/telemetry/metrics.rs +69 -35
- data/bridge/sdk-core/core/src/telemetry/mod.rs +29 -13
- data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +17 -12
- data/bridge/sdk-core/core/src/test_help/mod.rs +62 -12
- data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
- data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +352 -122
- data/bridge/sdk-core/core/src/worker/activities.rs +233 -157
- data/bridge/sdk-core/core/src/worker/client/mocks.rs +22 -2
- data/bridge/sdk-core/core/src/worker/client.rs +18 -2
- data/bridge/sdk-core/core/src/worker/mod.rs +165 -58
- data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +3 -5
- data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +856 -277
- data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +100 -43
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +87 -27
- data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +5 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +137 -62
- data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +25 -17
- data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +7 -6
- data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +103 -152
- data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +7 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +2 -2
- data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -7
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +5 -16
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +201 -121
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +11 -14
- data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +30 -15
- data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1026 -376
- data/bridge/sdk-core/core/src/worker/workflow/mod.rs +460 -384
- data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +1 -4
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +448 -718
- data/bridge/sdk-core/core-api/Cargo.toml +2 -1
- data/bridge/sdk-core/core-api/src/errors.rs +1 -34
- data/bridge/sdk-core/core-api/src/lib.rs +6 -2
- data/bridge/sdk-core/core-api/src/telemetry.rs +0 -6
- data/bridge/sdk-core/core-api/src/worker.rs +14 -1
- data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
- data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +5 -17
- data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +11 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +1 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +6 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +5 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +22 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +48 -19
- data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +2 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +3 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/{enums/v1/interaction_type.proto → protocol/v1/message.proto} +29 -11
- data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +111 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +59 -28
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +2 -2
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +7 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
- data/bridge/sdk-core/sdk/Cargo.toml +3 -2
- data/bridge/sdk-core/sdk/src/lib.rs +87 -20
- data/bridge/sdk-core/sdk/src/workflow_future.rs +9 -8
- data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- data/bridge/sdk-core/sdk-core-protos/build.rs +36 -1
- data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +100 -87
- data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +5 -1
- data/bridge/sdk-core/sdk-core-protos/src/lib.rs +175 -57
- data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- data/bridge/sdk-core/test-utils/Cargo.toml +3 -1
- data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
- data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
- data/bridge/sdk-core/test-utils/src/lib.rs +82 -23
- data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
- data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
- data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +5 -3
- data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +218 -16
- data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -47
- data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +72 -191
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +12 -7
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
- data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +10 -11
- data/bridge/sdk-core/tests/main.rs +3 -13
- data/bridge/sdk-core/tests/runner.rs +75 -36
- data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
- data/bridge/src/connection.rs +41 -25
- data/bridge/src/lib.rs +269 -14
- data/bridge/src/runtime.rs +1 -1
- data/bridge/src/test_server.rs +153 -0
- data/bridge/src/worker.rs +89 -16
- data/lib/gen/temporal/api/command/v1/message_pb.rb +4 -18
- data/lib/gen/temporal/api/common/v1/message_pb.rb +4 -0
- data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +1 -3
- data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +3 -3
- data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +2 -0
- data/lib/gen/temporal/api/enums/v1/update_pb.rb +6 -4
- data/lib/gen/temporal/api/history/v1/message_pb.rb +27 -19
- data/lib/gen/temporal/api/namespace/v1/message_pb.rb +1 -0
- data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +3 -0
- data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
- data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
- data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
- data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
- data/lib/gen/temporal/api/update/v1/message_pb.rb +72 -0
- data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +26 -16
- data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
- data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
- data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +27 -21
- data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +28 -24
- data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
- data/lib/temporalio/activity/context.rb +13 -8
- data/lib/temporalio/activity/info.rb +1 -1
- data/lib/temporalio/bridge/connect_options.rb +15 -0
- data/lib/temporalio/bridge/retry_config.rb +24 -0
- data/lib/temporalio/bridge/tls_options.rb +19 -0
- data/lib/temporalio/bridge.rb +1 -1
- data/lib/temporalio/client/implementation.rb +8 -8
- data/lib/temporalio/connection/retry_config.rb +44 -0
- data/lib/temporalio/connection/service.rb +20 -0
- data/lib/temporalio/connection/test_service.rb +92 -0
- data/lib/temporalio/connection/tls_options.rb +51 -0
- data/lib/temporalio/connection/workflow_service.rb +731 -0
- data/lib/temporalio/connection.rb +55 -720
- data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
- data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
- data/lib/temporalio/interceptor/chain.rb +5 -5
- data/lib/temporalio/interceptor/client.rb +8 -4
- data/lib/temporalio/interceptor.rb +22 -0
- data/lib/temporalio/retry_policy.rb +13 -3
- data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
- data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
- data/lib/temporalio/testing/workflow_environment.rb +112 -0
- data/lib/temporalio/testing.rb +175 -0
- data/lib/temporalio/version.rb +1 -1
- data/lib/temporalio/worker/activity_runner.rb +26 -4
- data/lib/temporalio/worker/activity_worker.rb +44 -18
- data/lib/temporalio/worker/sync_worker.rb +47 -11
- data/lib/temporalio/worker.rb +27 -21
- data/lib/temporalio/workflow/async.rb +46 -0
- data/lib/temporalio/workflow/future.rb +138 -0
- data/lib/temporalio/workflow/info.rb +76 -0
- data/lib/thermite_patch.rb +10 -0
- data/sig/async.rbs +17 -0
- data/sig/protobuf.rbs +16 -0
- data/sig/protos/dependencies/gogoproto/gogo.rbs +914 -0
- data/sig/protos/google/protobuf/any.rbs +157 -0
- data/sig/protos/google/protobuf/descriptor.rbs +2825 -0
- data/sig/protos/google/protobuf/duration.rbs +114 -0
- data/sig/protos/google/protobuf/empty.rbs +36 -0
- data/sig/protos/google/protobuf/timestamp.rbs +145 -0
- data/sig/protos/google/protobuf/wrappers.rbs +358 -0
- data/sig/protos/temporal/api/batch/v1/message.rbs +300 -0
- data/sig/protos/temporal/api/command/v1/message.rbs +1399 -0
- data/sig/protos/temporal/api/common/v1/message.rbs +528 -0
- data/sig/protos/temporal/api/enums/v1/batch_operation.rbs +79 -0
- data/sig/protos/temporal/api/enums/v1/command_type.rbs +68 -0
- data/sig/protos/temporal/api/enums/v1/common.rbs +118 -0
- data/sig/protos/temporal/api/enums/v1/event_type.rbs +264 -0
- data/sig/protos/temporal/api/enums/v1/failed_cause.rbs +277 -0
- data/sig/protos/temporal/api/enums/v1/namespace.rbs +108 -0
- data/sig/protos/temporal/api/enums/v1/query.rbs +81 -0
- data/sig/protos/temporal/api/enums/v1/reset.rbs +44 -0
- data/sig/protos/temporal/api/enums/v1/schedule.rbs +72 -0
- data/sig/protos/temporal/api/enums/v1/task_queue.rbs +92 -0
- data/sig/protos/temporal/api/enums/v1/update.rbs +64 -0
- data/sig/protos/temporal/api/enums/v1/workflow.rbs +371 -0
- data/sig/protos/temporal/api/errordetails/v1/message.rbs +551 -0
- data/sig/protos/temporal/api/failure/v1/message.rbs +581 -0
- data/sig/protos/temporal/api/filter/v1/message.rbs +171 -0
- data/sig/protos/temporal/api/history/v1/message.rbs +4609 -0
- data/sig/protos/temporal/api/namespace/v1/message.rbs +410 -0
- data/sig/protos/temporal/api/operatorservice/v1/request_response.rbs +643 -0
- data/sig/protos/temporal/api/operatorservice/v1/service.rbs +17 -0
- data/sig/protos/temporal/api/protocol/v1/message.rbs +84 -0
- data/sig/protos/temporal/api/query/v1/message.rbs +182 -0
- data/sig/protos/temporal/api/replication/v1/message.rbs +148 -0
- data/sig/protos/temporal/api/schedule/v1/message.rbs +1488 -0
- data/sig/protos/temporal/api/sdk/v1/task_complete_metadata.rbs +110 -0
- data/sig/protos/temporal/api/taskqueue/v1/message.rbs +486 -0
- data/sig/protos/temporal/api/testservice/v1/request_response.rbs +249 -0
- data/sig/protos/temporal/api/testservice/v1/service.rbs +15 -0
- data/sig/protos/temporal/api/update/v1/message.rbs +489 -0
- data/sig/protos/temporal/api/version/v1/message.rbs +184 -0
- data/sig/protos/temporal/api/workflow/v1/message.rbs +824 -0
- data/sig/protos/temporal/api/workflowservice/v1/request_response.rbs +7250 -0
- data/sig/protos/temporal/api/workflowservice/v1/service.rbs +22 -0
- data/sig/protos/temporal/sdk/core/activity_result/activity_result.rbs +380 -0
- data/sig/protos/temporal/sdk/core/activity_task/activity_task.rbs +386 -0
- data/sig/protos/temporal/sdk/core/child_workflow/child_workflow.rbs +323 -0
- data/sig/protos/temporal/sdk/core/common/common.rbs +62 -0
- data/sig/protos/temporal/sdk/core/core_interface.rbs +101 -0
- data/sig/protos/temporal/sdk/core/external_data/external_data.rbs +119 -0
- data/sig/protos/temporal/sdk/core/workflow_activation/workflow_activation.rbs +1473 -0
- data/sig/protos/temporal/sdk/core/workflow_commands/workflow_commands.rbs +1784 -0
- data/sig/protos/temporal/sdk/core/workflow_completion/workflow_completion.rbs +180 -0
- data/sig/ruby.rbs +12 -0
- data/sig/temporalio/activity/context.rbs +29 -0
- data/sig/temporalio/activity/info.rbs +43 -0
- data/sig/temporalio/activity.rbs +19 -0
- data/sig/temporalio/bridge/connect_options.rbs +19 -0
- data/sig/temporalio/bridge/error.rbs +8 -0
- data/sig/temporalio/bridge/retry_config.rbs +21 -0
- data/sig/temporalio/bridge/tls_options.rbs +17 -0
- data/sig/temporalio/bridge.rbs +71 -0
- data/sig/temporalio/client/implementation.rbs +38 -0
- data/sig/temporalio/client/workflow_handle.rbs +41 -0
- data/sig/temporalio/client.rbs +35 -0
- data/sig/temporalio/connection/retry_config.rbs +37 -0
- data/sig/temporalio/connection/service.rbs +14 -0
- data/sig/temporalio/connection/test_service.rbs +13 -0
- data/sig/temporalio/connection/tls_options.rbs +43 -0
- data/sig/temporalio/connection/workflow_service.rbs +48 -0
- data/sig/temporalio/connection.rbs +30 -0
- data/sig/temporalio/data_converter.rbs +35 -0
- data/sig/temporalio/error/failure.rbs +121 -0
- data/sig/temporalio/error/workflow_failure.rbs +9 -0
- data/sig/temporalio/errors.rbs +36 -0
- data/sig/temporalio/failure_converter/base.rbs +12 -0
- data/sig/temporalio/failure_converter/basic.rbs +86 -0
- data/sig/temporalio/failure_converter.rbs +5 -0
- data/sig/temporalio/interceptor/activity_inbound.rbs +21 -0
- data/sig/temporalio/interceptor/activity_outbound.rbs +10 -0
- data/sig/temporalio/interceptor/chain.rbs +24 -0
- data/sig/temporalio/interceptor/client.rbs +148 -0
- data/sig/temporalio/interceptor.rbs +6 -0
- data/sig/temporalio/payload_codec/base.rbs +12 -0
- data/sig/temporalio/payload_converter/base.rbs +12 -0
- data/sig/temporalio/payload_converter/bytes.rbs +9 -0
- data/sig/temporalio/payload_converter/composite.rbs +19 -0
- data/sig/temporalio/payload_converter/encoding_base.rbs +14 -0
- data/sig/temporalio/payload_converter/json.rbs +9 -0
- data/sig/temporalio/payload_converter/nil.rbs +9 -0
- data/sig/temporalio/payload_converter.rbs +5 -0
- data/sig/temporalio/retry_policy.rbs +25 -0
- data/sig/temporalio/retry_state.rbs +20 -0
- data/sig/temporalio/runtime.rbs +12 -0
- data/sig/temporalio/testing/time_skipping_handle.rbs +15 -0
- data/sig/temporalio/testing/time_skipping_interceptor.rbs +13 -0
- data/sig/temporalio/testing/workflow_environment.rbs +22 -0
- data/sig/temporalio/testing.rbs +35 -0
- data/sig/temporalio/timeout_type.rbs +15 -0
- data/sig/temporalio/version.rbs +3 -0
- data/sig/temporalio/worker/activity_runner.rbs +35 -0
- data/sig/temporalio/worker/activity_worker.rbs +44 -0
- data/sig/temporalio/worker/reactor.rbs +22 -0
- data/sig/temporalio/worker/runner.rbs +21 -0
- data/sig/temporalio/worker/sync_worker.rbs +23 -0
- data/sig/temporalio/worker/thread_pool_executor.rbs +23 -0
- data/sig/temporalio/worker.rbs +46 -0
- data/sig/temporalio/workflow/async.rbs +9 -0
- data/sig/temporalio/workflow/execution_info.rbs +55 -0
- data/sig/temporalio/workflow/execution_status.rbs +21 -0
- data/sig/temporalio/workflow/future.rbs +40 -0
- data/sig/temporalio/workflow/id_reuse_policy.rbs +15 -0
- data/sig/temporalio/workflow/info.rbs +55 -0
- data/sig/temporalio/workflow/query_reject_condition.rbs +14 -0
- data/sig/temporalio.rbs +2 -0
- data/sig/thermite_patch.rbs +15 -0
- data/temporalio.gemspec +6 -4
- metadata +183 -17
- data/bridge/sdk-core/Cargo.lock +0 -2606
- data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +0 -87
- data/lib/bridge.so +0 -0
- data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +0 -25
- data/lib/gen/temporal/api/interaction/v1/message_pb.rb +0 -49
- data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
@@ -1,188 +1,217 @@
|
|
1
1
|
#[cfg(test)]
|
2
2
|
mod managed_wf_test;
|
3
3
|
|
4
|
+
#[cfg(test)]
|
5
|
+
pub(crate) use managed_wf_test::ManagedWFFunc;
|
6
|
+
|
4
7
|
use crate::{
|
8
|
+
abstractions::dbg_panic,
|
9
|
+
protosext::WorkflowActivationExt,
|
5
10
|
worker::{
|
6
11
|
workflow::{
|
7
|
-
machines::WorkflowMachines, ActivationAction,
|
8
|
-
|
9
|
-
|
10
|
-
|
12
|
+
history_update::HistoryPaginator, machines::WorkflowMachines, ActivationAction,
|
13
|
+
ActivationCompleteOutcome, ActivationCompleteResult, ActivationOrAuto,
|
14
|
+
EvictionRequestResult, FailedActivationWFTReport, HeartbeatTimeoutMsg, HistoryUpdate,
|
15
|
+
LocalActivityRequestSink, LocalResolution, NextPageReq, OutgoingServerCommands,
|
16
|
+
OutstandingActivation, OutstandingTask, PermittedWFT, RequestEvictMsg, RunBasics,
|
17
|
+
ServerCommandsWithWorkflowInfo, WFCommand, WFMachinesError, WFTReportStatus,
|
18
|
+
WorkflowBridge, WorkflowTaskInfo, WFT_HEARTBEAT_TIMEOUT_FRACTION,
|
11
19
|
},
|
12
|
-
LocalActRequest,
|
20
|
+
LocalActRequest, LEGACY_QUERY_ID,
|
13
21
|
},
|
14
22
|
MetricsContext,
|
15
23
|
};
|
16
|
-
use
|
24
|
+
use futures_util::future::AbortHandle;
|
17
25
|
use std::{
|
26
|
+
collections::HashSet,
|
18
27
|
ops::Add,
|
28
|
+
rc::Rc,
|
19
29
|
sync::mpsc::Sender,
|
20
30
|
time::{Duration, Instant},
|
21
31
|
};
|
22
|
-
use
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
oneshot,
|
32
|
+
use temporal_sdk_core_protos::{
|
33
|
+
coresdk::{
|
34
|
+
workflow_activation::{
|
35
|
+
create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
|
36
|
+
workflow_activation_job, RemoveFromCache, WorkflowActivation,
|
37
|
+
},
|
38
|
+
workflow_commands::QueryResult,
|
39
|
+
workflow_completion,
|
31
40
|
},
|
32
|
-
|
33
|
-
|
41
|
+
temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure},
|
42
|
+
TaskToken,
|
34
43
|
};
|
35
|
-
use
|
44
|
+
use tokio::sync::oneshot;
|
36
45
|
use tracing::Span;
|
37
|
-
use tracing_futures::Instrument;
|
38
|
-
|
39
|
-
use crate::worker::workflow::{
|
40
|
-
ActivationCompleteResult, ActivationOrAuto, FailRunUpdate, FulfillableActivationComplete,
|
41
|
-
GoodRunUpdate, LocalActivityRequestSink, RunAction, RunUpdateResponseKind,
|
42
|
-
};
|
43
|
-
use temporal_sdk_core_protos::TaskToken;
|
44
|
-
|
45
|
-
use crate::abstractions::dbg_panic;
|
46
|
-
#[cfg(test)]
|
47
|
-
pub(crate) use managed_wf_test::ManagedWFFunc;
|
48
46
|
|
49
47
|
type Result<T, E = WFMachinesError> = std::result::Result<T, E>;
|
50
|
-
|
51
|
-
/// necessary.
|
52
|
-
const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
|
48
|
+
pub(super) type RunUpdateAct = Option<ActivationOrAuto>;
|
53
49
|
|
50
|
+
/// Manages access to a specific workflow run. Everything inside is entirely synchronous and should
|
51
|
+
/// remain that way.
|
52
|
+
#[derive(derive_more::DebugCustom)]
|
53
|
+
#[debug(
|
54
|
+
fmt = "ManagedRun {{ wft: {:?}, activation: {:?}, buffered_resp: {:?} \
|
55
|
+
trying_to_evict: {} }}",
|
56
|
+
wft,
|
57
|
+
activation,
|
58
|
+
buffered_resp,
|
59
|
+
"trying_to_evict.is_some()"
|
60
|
+
)]
|
54
61
|
pub(super) struct ManagedRun {
|
55
62
|
wfm: WorkflowManager,
|
56
|
-
|
57
|
-
|
63
|
+
/// Called when the machines need to produce local activity requests. This can't be lifted up
|
64
|
+
/// easily as return values, because sometimes local activity requests trigger immediate
|
65
|
+
/// resolutions (ex: too many attempts). Thus lifting it up creates a lot of unneeded complexity
|
66
|
+
/// pushing things out and then directly back in. The downside is this is the only "impure" part
|
67
|
+
/// of the in/out nature of workflow state management. If there's ever a sensible way to lift it
|
68
|
+
/// up, that'd be nice.
|
69
|
+
local_activity_request_sink: Rc<dyn LocalActivityRequestSink>,
|
70
|
+
/// Set if the run is currently waiting on the execution of some local activities.
|
58
71
|
waiting_on_la: Option<WaitingOnLAs>,
|
59
|
-
|
60
|
-
|
72
|
+
/// Is set to true if the machines encounter an error and the only subsequent thing we should
|
73
|
+
/// do is be evicted.
|
61
74
|
am_broken: bool,
|
62
|
-
|
75
|
+
/// If set, the WFT this run is currently/will be processing.
|
76
|
+
wft: Option<OutstandingTask>,
|
77
|
+
/// An outstanding activation to lang
|
78
|
+
activation: Option<OutstandingActivation>,
|
79
|
+
/// If set, it indicates there is a buffered poll response from the server that applies to this
|
80
|
+
/// run. This can happen when lang takes too long to complete a task and the task times out, for
|
81
|
+
/// example. Upon next completion, the buffered response will be removed and can be made ready
|
82
|
+
/// to be returned from polling
|
83
|
+
buffered_resp: Option<PermittedWFT>,
|
84
|
+
/// Is set if an eviction has been requested for this run
|
85
|
+
trying_to_evict: Option<RequestEvictMsg>,
|
63
86
|
|
64
|
-
///
|
65
|
-
///
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
///
|
70
|
-
|
71
|
-
|
72
|
-
CompletionDataForWFT,
|
73
|
-
oneshot::Sender<ActivationCompleteResult>,
|
74
|
-
)>,
|
75
|
-
hb_chan: UnboundedSender<Span>,
|
76
|
-
heartbeat_timeout_task: JoinHandle<()>,
|
77
|
-
}
|
78
|
-
|
79
|
-
#[derive(Debug)]
|
80
|
-
struct CompletionDataForWFT {
|
81
|
-
task_token: TaskToken,
|
82
|
-
query_responses: Vec<QueryResult>,
|
83
|
-
has_pending_query: bool,
|
84
|
-
activation_was_only_eviction: bool,
|
87
|
+
/// We track if we have recorded useful debugging values onto a certain span yet, to overcome
|
88
|
+
/// duplicating field values. Remove this once https://github.com/tokio-rs/tracing/issues/2334
|
89
|
+
/// is fixed.
|
90
|
+
recorded_span_ids: HashSet<tracing::Id>,
|
91
|
+
metrics: MetricsContext,
|
92
|
+
/// We store the paginator used for our own run's history fetching
|
93
|
+
paginator: Option<HistoryPaginator>,
|
94
|
+
completion_waiting_on_page_fetch: Option<RunActivationCompletion>,
|
85
95
|
}
|
86
|
-
|
87
96
|
impl ManagedRun {
|
88
97
|
pub(super) fn new(
|
89
|
-
|
90
|
-
|
91
|
-
local_activity_request_sink: LocalActivityRequestSink,
|
98
|
+
basics: RunBasics,
|
99
|
+
local_activity_request_sink: Rc<dyn LocalActivityRequestSink>,
|
92
100
|
) -> Self {
|
101
|
+
let metrics = basics.metrics.clone();
|
102
|
+
let wfm = WorkflowManager::new(basics);
|
93
103
|
Self {
|
94
104
|
wfm,
|
95
|
-
update_tx,
|
96
105
|
local_activity_request_sink,
|
97
106
|
waiting_on_la: None,
|
98
107
|
am_broken: false,
|
108
|
+
wft: None,
|
109
|
+
activation: None,
|
110
|
+
buffered_resp: None,
|
111
|
+
trying_to_evict: None,
|
112
|
+
recorded_span_ids: Default::default(),
|
113
|
+
metrics,
|
114
|
+
paginator: None,
|
115
|
+
completion_waiting_on_page_fetch: None,
|
99
116
|
}
|
100
117
|
}
|
101
118
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
let mut no_wft = false;
|
115
|
-
async move {
|
116
|
-
let res = match action {
|
117
|
-
RunActions::NewIncomingWFT(wft) => me
|
118
|
-
.incoming_wft(wft)
|
119
|
-
.await
|
120
|
-
.map(RunActionOutcome::AfterNewWFT),
|
121
|
-
RunActions::ActivationCompletion(completion) => me
|
122
|
-
.completion(completion, &heartbeat_tx)
|
123
|
-
.await
|
124
|
-
.map(RunActionOutcome::AfterCompletion),
|
125
|
-
RunActions::CheckMoreWork {
|
126
|
-
want_to_evict,
|
127
|
-
has_pending_queries,
|
128
|
-
has_wft,
|
129
|
-
} => {
|
130
|
-
if !has_wft {
|
131
|
-
no_wft = true;
|
132
|
-
}
|
133
|
-
me.check_more_work(want_to_evict, has_pending_queries, has_wft)
|
134
|
-
.await
|
135
|
-
.map(RunActionOutcome::AfterCheckWork)
|
136
|
-
}
|
137
|
-
RunActions::LocalResolution(r) => me
|
138
|
-
.local_resolution(r)
|
139
|
-
.await
|
140
|
-
.map(RunActionOutcome::AfterLocalResolution),
|
141
|
-
RunActions::HeartbeatTimeout => {
|
142
|
-
let maybe_act = if me.heartbeat_timeout() {
|
143
|
-
Some(ActivationOrAuto::Autocomplete {
|
144
|
-
run_id: me.wfm.machines.run_id.clone(),
|
145
|
-
})
|
146
|
-
} else {
|
147
|
-
None
|
148
|
-
};
|
149
|
-
Ok(RunActionOutcome::AfterHeartbeatTimeout(maybe_act))
|
150
|
-
}
|
151
|
-
};
|
152
|
-
match res {
|
153
|
-
Ok(outcome) => {
|
154
|
-
me.send_update_response(outcome, no_wft);
|
155
|
-
}
|
156
|
-
Err(e) => {
|
157
|
-
error!(error=?e, "Error in run machines");
|
158
|
-
me.am_broken = true;
|
159
|
-
me.update_tx
|
160
|
-
.send(RunUpdateResponse {
|
161
|
-
kind: RunUpdateResponseKind::Fail(FailRunUpdate {
|
162
|
-
run_id: me.wfm.machines.run_id.clone(),
|
163
|
-
err: e.source,
|
164
|
-
completion_resp: e.complete_resp_chan,
|
165
|
-
}),
|
166
|
-
span: Span::current(),
|
167
|
-
})
|
168
|
-
.expect("Machine can send update");
|
169
|
-
}
|
170
|
-
}
|
171
|
-
(me, heartbeat_tx)
|
172
|
-
}
|
173
|
-
.instrument(span)
|
174
|
-
})
|
175
|
-
.await;
|
119
|
+
/// Returns true if there are pending jobs that need to be sent to lang.
|
120
|
+
pub(super) fn more_pending_work(&self) -> bool {
|
121
|
+
// We don't want to consider there to be more local-only work to be done if there is
|
122
|
+
// no workflow task associated with the run right now. This can happen if, ex, we
|
123
|
+
// complete a local activity while waiting for server to send us the next WFT.
|
124
|
+
// Activating lang would be harmful at this stage, as there might be work returned
|
125
|
+
// in that next WFT which should be part of the next activation.
|
126
|
+
self.wft.is_some() && self.wfm.machines.has_pending_jobs()
|
127
|
+
}
|
128
|
+
|
129
|
+
pub(super) fn have_seen_terminal_event(&self) -> bool {
|
130
|
+
self.wfm.machines.have_seen_terminal_event
|
176
131
|
}
|
177
132
|
|
178
|
-
|
133
|
+
/// Returns a ref to info about the currently tracked workflow task, if any.
|
134
|
+
pub(super) fn wft(&self) -> Option<&OutstandingTask> {
|
135
|
+
self.wft.as_ref()
|
136
|
+
}
|
137
|
+
|
138
|
+
/// Returns a ref to info about the currently tracked workflow activation, if any.
|
139
|
+
pub(super) fn activation(&self) -> Option<&OutstandingActivation> {
|
140
|
+
self.activation.as_ref()
|
141
|
+
}
|
142
|
+
|
143
|
+
/// Returns true if this run has already been told it will be evicted.
|
144
|
+
pub(super) fn is_trying_to_evict(&self) -> bool {
|
145
|
+
self.trying_to_evict.is_some()
|
146
|
+
}
|
147
|
+
|
148
|
+
/// Called whenever a new workflow task is obtained for this run
|
149
|
+
pub(super) fn incoming_wft(&mut self, pwft: PermittedWFT) -> RunUpdateAct {
|
150
|
+
let res = self._incoming_wft(pwft);
|
151
|
+
self.update_to_acts(res.map(Into::into), true)
|
152
|
+
}
|
153
|
+
|
154
|
+
fn _incoming_wft(
|
179
155
|
&mut self,
|
180
|
-
|
156
|
+
pwft: PermittedWFT,
|
181
157
|
) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
|
182
|
-
|
183
|
-
|
158
|
+
if self.wft.is_some() {
|
159
|
+
dbg_panic!("Trying to send a new WFT for a run which already has one!");
|
160
|
+
}
|
161
|
+
let start_time = Instant::now();
|
162
|
+
|
163
|
+
let work = pwft.work;
|
164
|
+
let did_miss_cache = !work.is_incremental() || !work.update.is_real();
|
165
|
+
debug!(
|
166
|
+
run_id = %work.execution.run_id,
|
167
|
+
task_token = %&work.task_token,
|
168
|
+
update = ?work.update,
|
169
|
+
has_legacy_query = %work.legacy_query.is_some(),
|
170
|
+
attempt = %work.attempt,
|
171
|
+
"Applying new workflow task from server"
|
172
|
+
);
|
173
|
+
let wft_info = WorkflowTaskInfo {
|
174
|
+
attempt: work.attempt,
|
175
|
+
task_token: work.task_token,
|
176
|
+
wf_id: work.execution.workflow_id.clone(),
|
177
|
+
};
|
178
|
+
|
179
|
+
let legacy_query_from_poll = work
|
180
|
+
.legacy_query
|
181
|
+
.map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
|
182
|
+
|
183
|
+
let mut pending_queries = work.query_requests;
|
184
|
+
if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
|
185
|
+
error!(
|
186
|
+
"Server issued both normal and legacy queries. This should not happen. Please \
|
187
|
+
file a bug report."
|
188
|
+
);
|
189
|
+
return Err(RunUpdateErr {
|
190
|
+
source: WFMachinesError::Fatal(
|
191
|
+
"Server issued both normal and legacy query".to_string(),
|
192
|
+
),
|
193
|
+
complete_resp_chan: None,
|
194
|
+
});
|
195
|
+
}
|
196
|
+
if let Some(lq) = legacy_query_from_poll {
|
197
|
+
pending_queries.push(lq);
|
198
|
+
}
|
199
|
+
|
200
|
+
self.paginator = Some(pwft.paginator);
|
201
|
+
self.wft = Some(OutstandingTask {
|
202
|
+
info: wft_info,
|
203
|
+
hit_cache: !did_miss_cache,
|
204
|
+
pending_queries,
|
205
|
+
start_time,
|
206
|
+
permit: pwft.permit,
|
207
|
+
});
|
208
|
+
|
209
|
+
// The update field is only populated in the event we hit the cache
|
210
|
+
let activation = if work.update.is_real() {
|
211
|
+
self.metrics.sticky_cache_hit();
|
212
|
+
self.wfm.feed_history_from_server(work.update)?
|
184
213
|
} else {
|
185
|
-
let r = self.wfm.get_next_activation()
|
214
|
+
let r = self.wfm.get_next_activation()?;
|
186
215
|
if r.jobs.is_empty() {
|
187
216
|
return Err(RunUpdateErr {
|
188
217
|
source: WFMachinesError::Fatal(format!(
|
@@ -197,16 +226,17 @@ impl ManagedRun {
|
|
197
226
|
|
198
227
|
if activation.jobs.is_empty() {
|
199
228
|
if self.wfm.machines.outstanding_local_activity_count() > 0 {
|
200
|
-
// If the activation has no jobs but there are outstanding LAs, we need to restart
|
201
|
-
// WFT heartbeat.
|
229
|
+
// If the activation has no jobs but there are outstanding LAs, we need to restart
|
230
|
+
// the WFT heartbeat.
|
202
231
|
if let Some(ref mut lawait) = self.waiting_on_la {
|
203
232
|
if lawait.completion_dat.is_some() {
|
204
233
|
panic!("Should not have completion dat when getting new wft & empty jobs")
|
205
234
|
}
|
206
|
-
lawait.
|
207
|
-
lawait.
|
208
|
-
|
209
|
-
|
235
|
+
lawait.hb_timeout_handle.abort();
|
236
|
+
lawait.hb_timeout_handle = sink_heartbeat_timeout_start(
|
237
|
+
self.wfm.machines.run_id.clone(),
|
238
|
+
self.local_activity_request_sink.as_ref(),
|
239
|
+
start_time,
|
210
240
|
lawait.wft_timeout,
|
211
241
|
);
|
212
242
|
// No activation needs to be sent to lang. We just need to wait for another
|
@@ -228,41 +258,340 @@ impl ManagedRun {
|
|
228
258
|
Ok(Some(ActivationOrAuto::LangActivation(activation)))
|
229
259
|
}
|
230
260
|
|
231
|
-
|
261
|
+
/// Deletes the currently tracked WFT & records latency metrics. Should be called after it has
|
262
|
+
/// been responded to (server has been told). Returns the WFT if there was one.
|
263
|
+
pub(super) fn mark_wft_complete(
|
232
264
|
&mut self,
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
let
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
// Send commands from lang into the machines then check if the workflow run
|
243
|
-
// needs another activation and mark it if so
|
244
|
-
self.wfm.push_commands(completion.commands).await?;
|
245
|
-
// Don't bother applying the next task if we're evicting at the end of
|
246
|
-
// this activation
|
247
|
-
if !completion.activation_was_eviction {
|
248
|
-
self.wfm.apply_next_task_if_ready().await?;
|
265
|
+
report_status: WFTReportStatus,
|
266
|
+
) -> Option<OutstandingTask> {
|
267
|
+
debug!("Marking WFT completed");
|
268
|
+
let retme = self.wft.take();
|
269
|
+
|
270
|
+
// Only record latency metrics if we genuinely reported to server
|
271
|
+
if matches!(report_status, WFTReportStatus::Reported) {
|
272
|
+
if let Some(ot) = &retme {
|
273
|
+
self.metrics.wf_task_latency(ot.start_time.elapsed());
|
249
274
|
}
|
250
|
-
|
275
|
+
// Tell the LA manager that we're done with the WFT
|
276
|
+
self.local_activity_request_sink.sink_reqs(vec![
|
277
|
+
LocalActRequest::IndicateWorkflowTaskCompleted(self.wfm.machines.run_id.clone()),
|
278
|
+
]);
|
279
|
+
}
|
251
280
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
281
|
+
retme
|
282
|
+
}
|
283
|
+
|
284
|
+
/// Checks if any further activations need to go out for this run and produces them if so.
|
285
|
+
pub(super) fn check_more_activations(&mut self) -> RunUpdateAct {
|
286
|
+
let res = self._check_more_activations();
|
287
|
+
self.update_to_acts(res.map(Into::into), false)
|
288
|
+
}
|
289
|
+
|
290
|
+
fn _check_more_activations(&mut self) -> Result<Option<ActivationOrAuto>, RunUpdateErr> {
|
291
|
+
// No point in checking for more activations if there's already an outstanding activation.
|
292
|
+
if self.activation.is_some() {
|
293
|
+
return Ok(None);
|
294
|
+
}
|
295
|
+
// In the event it's time to evict this run, cancel any outstanding LAs
|
296
|
+
if self.trying_to_evict.is_some() {
|
297
|
+
self.sink_la_requests(vec![LocalActRequest::CancelAllInRun(
|
298
|
+
self.wfm.machines.run_id.clone(),
|
299
|
+
)])?;
|
300
|
+
}
|
301
|
+
|
302
|
+
if self.wft.is_none() {
|
303
|
+
// It doesn't make sense to do workflow work unless we have a WFT
|
304
|
+
return Ok(None);
|
305
|
+
}
|
306
|
+
|
307
|
+
if self.wfm.machines.has_pending_jobs() && !self.am_broken {
|
308
|
+
Ok(Some(ActivationOrAuto::LangActivation(
|
309
|
+
self.wfm.get_next_activation()?,
|
310
|
+
)))
|
311
|
+
} else {
|
312
|
+
if !self.am_broken {
|
313
|
+
let has_pending_queries = self
|
314
|
+
.wft
|
315
|
+
.as_ref()
|
316
|
+
.map(|wft| !wft.pending_queries.is_empty())
|
317
|
+
.unwrap_or_default();
|
318
|
+
if has_pending_queries {
|
319
|
+
return Ok(Some(ActivationOrAuto::ReadyForQueries(
|
320
|
+
self.wfm.machines.get_wf_activation(),
|
321
|
+
)));
|
322
|
+
}
|
323
|
+
}
|
324
|
+
if let Some(wte) = self.trying_to_evict.clone() {
|
325
|
+
let mut act = self.wfm.machines.get_wf_activation();
|
326
|
+
// No other jobs make any sense to send if we encountered an error.
|
327
|
+
if self.am_broken {
|
328
|
+
act.jobs = vec![];
|
329
|
+
}
|
330
|
+
act.append_evict_job(RemoveFromCache {
|
331
|
+
message: wte.message,
|
332
|
+
reason: wte.reason as i32,
|
333
|
+
});
|
334
|
+
Ok(Some(ActivationOrAuto::LangActivation(act)))
|
335
|
+
} else {
|
336
|
+
Ok(None)
|
256
337
|
}
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
/// Called whenever lang successfully completes a workflow activation. Commands produced by the
|
342
|
+
/// activation are passed in. `resp_chan` will be used to unblock the completion call when
|
343
|
+
/// everything we need to do to fulfill it has happened.
|
344
|
+
///
|
345
|
+
/// Can return an error in the event that another page of history needs to be fetched before
|
346
|
+
/// the completion can proceed.
|
347
|
+
pub(super) fn successful_completion(
|
348
|
+
&mut self,
|
349
|
+
mut commands: Vec<WFCommand>,
|
350
|
+
used_flags: Vec<u32>,
|
351
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
352
|
+
) -> Result<RunUpdateAct, NextPageReq> {
|
353
|
+
let activation_was_only_eviction = self.activation_has_only_eviction();
|
354
|
+
let (task_token, has_pending_query, start_time) = if let Some(entry) = self.wft.as_ref() {
|
355
|
+
(
|
356
|
+
entry.info.task_token.clone(),
|
357
|
+
!entry.pending_queries.is_empty(),
|
358
|
+
entry.start_time,
|
359
|
+
)
|
360
|
+
} else {
|
361
|
+
if !activation_was_only_eviction {
|
362
|
+
// Not an error if this was an eviction, since it's normal to issue eviction
|
363
|
+
// activations without an associated workflow task in that case.
|
364
|
+
dbg_panic!(
|
365
|
+
"Attempted to complete activation for run {} without associated workflow task",
|
366
|
+
self.run_id()
|
367
|
+
);
|
368
|
+
}
|
369
|
+
self.reply_to_complete(ActivationCompleteOutcome::DoNothing, resp_chan);
|
370
|
+
return Ok(None);
|
371
|
+
};
|
257
372
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
373
|
+
// If the only command from the activation is a legacy query response, that means we need
|
374
|
+
// to respond differently than a typical activation.
|
375
|
+
if matches!(&commands.as_slice(),
|
376
|
+
&[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
|
377
|
+
{
|
378
|
+
let qr = match commands.remove(0) {
|
379
|
+
WFCommand::QueryResponse(qr) => qr,
|
380
|
+
_ => unreachable!("We just verified this is the only command"),
|
263
381
|
};
|
382
|
+
self.reply_to_complete(
|
383
|
+
ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
|
384
|
+
task_token,
|
385
|
+
action: ActivationAction::RespondLegacyQuery {
|
386
|
+
result: Box::new(qr),
|
387
|
+
},
|
388
|
+
}),
|
389
|
+
resp_chan,
|
390
|
+
);
|
391
|
+
Ok(None)
|
392
|
+
} else {
|
393
|
+
// First strip out query responses from other commands that actually affect machines
|
394
|
+
// Would be prettier with `drain_filter`
|
395
|
+
let mut i = 0;
|
396
|
+
let mut query_responses = vec![];
|
397
|
+
while i < commands.len() {
|
398
|
+
if matches!(commands[i], WFCommand::QueryResponse(_)) {
|
399
|
+
if let WFCommand::QueryResponse(qr) = commands.remove(i) {
|
400
|
+
query_responses.push(qr);
|
401
|
+
}
|
402
|
+
} else {
|
403
|
+
i += 1;
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
if activation_was_only_eviction && !commands.is_empty() {
|
408
|
+
dbg_panic!("Reply to an eviction only containing an eviction included commands");
|
409
|
+
}
|
410
|
+
|
411
|
+
let rac = RunActivationCompletion {
|
412
|
+
task_token,
|
413
|
+
start_time,
|
414
|
+
commands,
|
415
|
+
activation_was_eviction: self.activation_has_eviction(),
|
416
|
+
activation_was_only_eviction,
|
417
|
+
has_pending_query,
|
418
|
+
query_responses,
|
419
|
+
used_flags,
|
420
|
+
resp_chan,
|
421
|
+
};
|
422
|
+
|
423
|
+
// Verify we can actually apply the next workflow task, which will happen as part of
|
424
|
+
// applying the completion to machines. If we can't, return early indicating we need
|
425
|
+
// to fetch a page.
|
426
|
+
if !self.wfm.ready_to_apply_next_wft() {
|
427
|
+
return if let Some(paginator) = self.paginator.take() {
|
428
|
+
debug!("Need to fetch a history page before next WFT can be applied");
|
429
|
+
self.completion_waiting_on_page_fetch = Some(rac);
|
430
|
+
Err(NextPageReq {
|
431
|
+
paginator,
|
432
|
+
span: Span::current(),
|
433
|
+
})
|
434
|
+
} else {
|
435
|
+
Ok(self.update_to_acts(
|
436
|
+
Err(RunUpdateErr {
|
437
|
+
source: WFMachinesError::Fatal(
|
438
|
+
"Run's paginator was absent when attempting to fetch next history \
|
439
|
+
page. This is a Core SDK bug."
|
440
|
+
.to_string(),
|
441
|
+
),
|
442
|
+
complete_resp_chan: rac.resp_chan,
|
443
|
+
}),
|
444
|
+
false,
|
445
|
+
))
|
446
|
+
};
|
447
|
+
}
|
448
|
+
|
449
|
+
Ok(self.process_completion(rac))
|
450
|
+
}
|
451
|
+
}
|
452
|
+
|
453
|
+
/// Called after the higher-up machinery has fetched more pages of event history needed to apply
|
454
|
+
/// the next workflow task. The history update and paginator used to perform the fetch are
|
455
|
+
/// passed in, with the update being used to apply the task, and the paginator stored to be
|
456
|
+
/// attached with another fetch request if needed.
|
457
|
+
pub(super) fn fetched_page_completion(
|
458
|
+
&mut self,
|
459
|
+
update: HistoryUpdate,
|
460
|
+
paginator: HistoryPaginator,
|
461
|
+
) -> RunUpdateAct {
|
462
|
+
let res = self._fetched_page_completion(update, paginator);
|
463
|
+
self.update_to_acts(res.map(Into::into), false)
|
464
|
+
}
|
465
|
+
fn _fetched_page_completion(
|
466
|
+
&mut self,
|
467
|
+
update: HistoryUpdate,
|
468
|
+
paginator: HistoryPaginator,
|
469
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
470
|
+
self.paginator = Some(paginator);
|
471
|
+
if let Some(d) = self.completion_waiting_on_page_fetch.take() {
|
472
|
+
self._process_completion(d, Some(update))
|
473
|
+
} else {
|
474
|
+
dbg_panic!(
|
475
|
+
"Shouldn't be possible to be applying a next-page-fetch update when \
|
476
|
+
doing anything other than completing an activation."
|
477
|
+
);
|
478
|
+
Err(RunUpdateErr::from(WFMachinesError::Fatal(
|
479
|
+
"Tried to apply next-page-fetch update to a run that wasn't handling a completion"
|
480
|
+
.to_string(),
|
481
|
+
)))
|
482
|
+
}
|
483
|
+
}
|
484
|
+
|
485
|
+
/// Called whenever either core lang cannot complete a workflow activation. EX: Nondeterminism
|
486
|
+
/// or user code threw/panicked, respectively. The `cause` and `reason` fields are determined
|
487
|
+
/// inside core always. The `failure` field may come from lang. `resp_chan` will be used to
|
488
|
+
/// unblock the completion call when everything we need to do to fulfill it has happened.
|
489
|
+
pub(super) fn failed_completion(
|
490
|
+
&mut self,
|
491
|
+
cause: WorkflowTaskFailedCause,
|
492
|
+
reason: EvictionReason,
|
493
|
+
failure: workflow_completion::Failure,
|
494
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
495
|
+
) -> RunUpdateAct {
|
496
|
+
let tt = if let Some(tt) = self.wft.as_ref().map(|t| t.info.task_token.clone()) {
|
497
|
+
tt
|
498
|
+
} else {
|
499
|
+
dbg_panic!(
|
500
|
+
"No workflow task for run id {} found when trying to fail activation",
|
501
|
+
self.run_id()
|
502
|
+
);
|
503
|
+
self.reply_to_complete(ActivationCompleteOutcome::DoNothing, resp_chan);
|
504
|
+
return None;
|
505
|
+
};
|
506
|
+
|
507
|
+
self.metrics.wf_task_failed();
|
508
|
+
let message = format!("Workflow activation completion failed: {:?}", &failure);
|
509
|
+
// Blow up any cached data associated with the workflow
|
510
|
+
let evict_req_outcome = self.request_eviction(RequestEvictMsg {
|
511
|
+
run_id: self.run_id().to_string(),
|
512
|
+
message,
|
513
|
+
reason,
|
514
|
+
});
|
515
|
+
let should_report = match &evict_req_outcome {
|
516
|
+
EvictionRequestResult::EvictionRequested(Some(attempt), _)
|
517
|
+
| EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => *attempt <= 1,
|
518
|
+
_ => false,
|
519
|
+
};
|
520
|
+
let rur = evict_req_outcome.into_run_update_resp();
|
521
|
+
// If the outstanding WFT is a legacy query task, report that we need to fail it
|
522
|
+
let outcome = if self.pending_work_is_legacy_query() {
|
523
|
+
ActivationCompleteOutcome::ReportWFTFail(
|
524
|
+
FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
|
525
|
+
)
|
526
|
+
} else if should_report {
|
527
|
+
ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
|
528
|
+
tt, cause, failure,
|
529
|
+
))
|
530
|
+
} else {
|
531
|
+
ActivationCompleteOutcome::WFTFailedDontReport
|
532
|
+
};
|
533
|
+
self.reply_to_complete(outcome, resp_chan);
|
534
|
+
rur
|
535
|
+
}
|
536
|
+
|
537
|
+
/// Delete the currently tracked workflow activation and return it, if any. Should be called
|
538
|
+
/// after the processing of the activation completion, and WFT reporting.
|
539
|
+
pub(super) fn delete_activation(&mut self) -> Option<OutstandingActivation> {
|
540
|
+
self.activation.take()
|
541
|
+
}
|
542
|
+
|
543
|
+
/// Called when local activities resolve
|
544
|
+
pub(super) fn local_resolution(&mut self, res: LocalResolution) -> RunUpdateAct {
|
545
|
+
let res = self._local_resolution(res);
|
546
|
+
self.update_to_acts(res.map(Into::into), false)
|
547
|
+
}
|
548
|
+
|
549
|
+
fn process_completion(&mut self, completion: RunActivationCompletion) -> RunUpdateAct {
|
550
|
+
let res = self._process_completion(completion, None);
|
551
|
+
self.update_to_acts(res.map(Into::into), false)
|
552
|
+
}
|
553
|
+
|
554
|
+
fn _process_completion(
|
555
|
+
&mut self,
|
556
|
+
completion: RunActivationCompletion,
|
557
|
+
new_update: Option<HistoryUpdate>,
|
558
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
559
|
+
let data = CompletionDataForWFT {
|
560
|
+
task_token: completion.task_token,
|
561
|
+
query_responses: completion.query_responses,
|
562
|
+
has_pending_query: completion.has_pending_query,
|
563
|
+
activation_was_only_eviction: completion.activation_was_only_eviction,
|
564
|
+
};
|
565
|
+
|
566
|
+
self.wfm.machines.add_lang_used_flags(completion.used_flags);
|
567
|
+
|
568
|
+
// If this is just bookkeeping after a reply to an only-eviction activation, we can bypass
|
569
|
+
// everything, since there is no reason to continue trying to update machines.
|
570
|
+
if completion.activation_was_only_eviction {
|
571
|
+
return Ok(Some(self.prepare_complete_resp(
|
572
|
+
completion.resp_chan,
|
573
|
+
data,
|
574
|
+
false,
|
575
|
+
)));
|
576
|
+
}
|
577
|
+
|
578
|
+
let outcome = (|| {
|
579
|
+
// Send commands from lang into the machines then check if the workflow run needs
|
580
|
+
// another activation and mark it if so
|
581
|
+
self.wfm.push_commands_and_iterate(completion.commands)?;
|
582
|
+
// If there was a new update included as part of the completion, apply it.
|
583
|
+
if let Some(update) = new_update {
|
584
|
+
self.wfm.feed_history_from_new_page(update)?;
|
585
|
+
}
|
586
|
+
// Don't bother applying the next task if we're evicting at the end of this activation
|
587
|
+
if !completion.activation_was_eviction {
|
588
|
+
self.wfm.apply_next_task_if_ready()?;
|
589
|
+
}
|
590
|
+
let new_local_acts = self.wfm.drain_queued_local_activities();
|
591
|
+
self.sink_la_requests(new_local_acts)?;
|
592
|
+
|
264
593
|
if self.wfm.machines.outstanding_local_activity_count() == 0 {
|
265
|
-
Ok(
|
594
|
+
Ok(None)
|
266
595
|
} else {
|
267
596
|
let wft_timeout: Duration = self
|
268
597
|
.wfm
|
@@ -275,28 +604,26 @@ impl ManagedRun {
|
|
275
604
|
.to_string(),
|
276
605
|
)
|
277
606
|
})?;
|
278
|
-
|
279
|
-
Ok((
|
280
|
-
Some((heartbeat_tx, completion.start_time, wft_timeout)),
|
281
|
-
data,
|
282
|
-
self,
|
283
|
-
))
|
607
|
+
Ok(Some((completion.start_time, wft_timeout)))
|
284
608
|
}
|
285
|
-
}
|
286
|
-
.await;
|
609
|
+
})();
|
287
610
|
|
288
611
|
match outcome {
|
289
|
-
Ok(
|
290
|
-
|
291
|
-
|
292
|
-
|
612
|
+
Ok(None) => Ok(Some(self.prepare_complete_resp(
|
613
|
+
completion.resp_chan,
|
614
|
+
data,
|
615
|
+
false,
|
616
|
+
))),
|
617
|
+
Ok(Some((start_t, wft_timeout))) => {
|
618
|
+
if let Some(wola) = self.waiting_on_la.as_mut() {
|
619
|
+
wola.hb_timeout_handle.abort();
|
293
620
|
}
|
294
|
-
|
621
|
+
self.waiting_on_la = Some(WaitingOnLAs {
|
295
622
|
wft_timeout,
|
296
|
-
completion_dat: Some((data, resp_chan)),
|
297
|
-
|
298
|
-
|
299
|
-
|
623
|
+
completion_dat: Some((data, completion.resp_chan)),
|
624
|
+
hb_timeout_handle: sink_heartbeat_timeout_start(
|
625
|
+
self.run_id().to_string(),
|
626
|
+
self.local_activity_request_sink.as_ref(),
|
300
627
|
start_t,
|
301
628
|
wft_timeout,
|
302
629
|
),
|
@@ -305,72 +632,347 @@ impl ManagedRun {
|
|
305
632
|
}
|
306
633
|
Err(e) => Err(RunUpdateErr {
|
307
634
|
source: e,
|
308
|
-
complete_resp_chan:
|
635
|
+
complete_resp_chan: completion.resp_chan,
|
309
636
|
}),
|
310
637
|
}
|
311
638
|
}
|
312
639
|
|
313
|
-
|
640
|
+
fn _local_resolution(
|
314
641
|
&mut self,
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
if
|
320
|
-
|
321
|
-
|
642
|
+
res: LocalResolution,
|
643
|
+
) -> Result<Option<FulfillableActivationComplete>, RunUpdateErr> {
|
644
|
+
debug!(resolution=?res, "Applying local resolution");
|
645
|
+
self.wfm.notify_of_local_result(res)?;
|
646
|
+
if self.wfm.machines.outstanding_local_activity_count() == 0 {
|
647
|
+
if let Some(mut wait_dat) = self.waiting_on_la.take() {
|
648
|
+
// Cancel the heartbeat timeout
|
649
|
+
wait_dat.hb_timeout_handle.abort();
|
650
|
+
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
651
|
+
return Ok(Some(self.prepare_complete_resp(
|
652
|
+
resp_chan,
|
653
|
+
completion_dat,
|
654
|
+
false,
|
655
|
+
)));
|
656
|
+
}
|
657
|
+
}
|
322
658
|
}
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
659
|
+
Ok(None)
|
660
|
+
}
|
661
|
+
|
662
|
+
pub(super) fn heartbeat_timeout(&mut self) -> RunUpdateAct {
|
663
|
+
let maybe_act = if self._heartbeat_timeout() {
|
664
|
+
Some(ActivationOrAuto::Autocomplete {
|
665
|
+
run_id: self.wfm.machines.run_id.clone(),
|
666
|
+
})
|
327
667
|
} else {
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
668
|
+
None
|
669
|
+
};
|
670
|
+
self.update_to_acts(Ok(maybe_act).map(Into::into), false)
|
671
|
+
}
|
672
|
+
/// Returns `true` if autocompletion should be issued, which will actually cause us to end up
|
673
|
+
/// in [completion] again, at which point we'll start a new heartbeat timeout, which will
|
674
|
+
/// immediately trigger and thus finish the completion, forcing a new task as it should.
|
675
|
+
fn _heartbeat_timeout(&mut self) -> bool {
|
676
|
+
if let Some(ref mut wait_dat) = self.waiting_on_la {
|
677
|
+
// Cancel the heartbeat timeout
|
678
|
+
wait_dat.hb_timeout_handle.abort();
|
679
|
+
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
680
|
+
let compl = self.prepare_complete_resp(resp_chan, completion_dat, true);
|
681
|
+
// Immediately fulfill the completion since the run update will already have
|
682
|
+
// been replied to
|
683
|
+
compl.fulfill();
|
684
|
+
} else {
|
685
|
+
// Auto-reply WFT complete
|
686
|
+
return true;
|
332
687
|
}
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
688
|
+
}
|
689
|
+
false
|
690
|
+
}
|
691
|
+
|
692
|
+
/// Returns true if the managed run has any form of pending work
|
693
|
+
/// If `ignore_evicts` is true, pending evictions do not count as pending work.
|
694
|
+
/// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
|
695
|
+
pub(super) fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
|
696
|
+
let evict_work = if ignore_evicts {
|
697
|
+
false
|
698
|
+
} else {
|
699
|
+
self.trying_to_evict.is_some()
|
700
|
+
};
|
701
|
+
let act_work = if ignore_evicts {
|
702
|
+
if let Some(ref act) = self.activation {
|
703
|
+
!act.has_only_eviction()
|
344
704
|
} else {
|
345
|
-
|
705
|
+
false
|
706
|
+
}
|
707
|
+
} else {
|
708
|
+
self.activation.is_some()
|
709
|
+
};
|
710
|
+
let buffered = if ignore_buffered {
|
711
|
+
false
|
712
|
+
} else {
|
713
|
+
self.buffered_resp.is_some()
|
714
|
+
};
|
715
|
+
trace!(wft=self.wft.is_some(), buffered=?buffered, more_work=?self.more_pending_work(),
|
716
|
+
act_work, evict_work, "Does run have pending work?");
|
717
|
+
self.wft.is_some() || buffered || self.more_pending_work() || act_work || evict_work
|
718
|
+
}
|
719
|
+
|
720
|
+
/// Stores some work if there is any outstanding WFT or activation for the run. If there was
|
721
|
+
/// not, returns the work back out inside the option.
|
722
|
+
pub(super) fn buffer_wft_if_outstanding_work(
|
723
|
+
&mut self,
|
724
|
+
work: PermittedWFT,
|
725
|
+
) -> Option<PermittedWFT> {
|
726
|
+
let about_to_issue_evict = self.trying_to_evict.is_some();
|
727
|
+
let has_wft = self.wft().is_some();
|
728
|
+
let has_activation = self.activation().is_some();
|
729
|
+
if has_wft || has_activation || about_to_issue_evict || self.more_pending_work() {
|
730
|
+
debug!(run_id = %self.run_id(),
|
731
|
+
"Got new WFT for a run with outstanding work, buffering it");
|
732
|
+
self.buffered_resp = Some(work);
|
733
|
+
None
|
734
|
+
} else {
|
735
|
+
Some(work)
|
736
|
+
}
|
737
|
+
}
|
738
|
+
|
739
|
+
/// Returns true if there is a buffered workflow task for this run.
|
740
|
+
pub(super) fn has_buffered_wft(&self) -> bool {
|
741
|
+
self.buffered_resp.is_some()
|
742
|
+
}
|
743
|
+
|
744
|
+
/// Removes and returns the buffered workflow task, if any.
|
745
|
+
pub(super) fn take_buffered_wft(&mut self) -> Option<PermittedWFT> {
|
746
|
+
self.buffered_resp.take()
|
747
|
+
}
|
748
|
+
|
749
|
+
pub(super) fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
|
750
|
+
let attempts = self.wft.as_ref().map(|wt| wt.info.attempt);
|
751
|
+
|
752
|
+
// If we were waiting on a page fetch and we're getting evicted because fetching failed,
|
753
|
+
// then make sure we allow the completion to proceed, otherwise we're stuck waiting forever.
|
754
|
+
if self.completion_waiting_on_page_fetch.is_some()
|
755
|
+
&& matches!(info.reason, EvictionReason::PaginationOrHistoryFetch)
|
756
|
+
{
|
757
|
+
// We just checked it is some, unwrap OK.
|
758
|
+
let c = self.completion_waiting_on_page_fetch.take().unwrap();
|
759
|
+
let run_upd = self.failed_completion(
|
760
|
+
WorkflowTaskFailedCause::Unspecified,
|
761
|
+
info.reason,
|
762
|
+
Failure::application_failure(info.message, false).into(),
|
763
|
+
c.resp_chan,
|
764
|
+
);
|
765
|
+
return EvictionRequestResult::EvictionRequested(attempts, run_upd);
|
766
|
+
}
|
767
|
+
|
768
|
+
if !self.activation_has_eviction() && self.trying_to_evict.is_none() {
|
769
|
+
debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
|
770
|
+
self.trying_to_evict = Some(info);
|
771
|
+
EvictionRequestResult::EvictionRequested(attempts, self.check_more_activations())
|
772
|
+
} else {
|
773
|
+
EvictionRequestResult::EvictionAlreadyRequested(attempts)
|
774
|
+
}
|
775
|
+
}
|
776
|
+
|
777
|
+
pub(super) fn record_span_fields(&mut self, span: &Span) {
|
778
|
+
if let Some(spid) = span.id() {
|
779
|
+
if self.recorded_span_ids.contains(&spid) {
|
780
|
+
return;
|
781
|
+
}
|
782
|
+
self.recorded_span_ids.insert(spid);
|
783
|
+
|
784
|
+
if let Some(wid) = self.wft().map(|wft| &wft.info.wf_id) {
|
785
|
+
span.record("workflow_id", wid.as_str());
|
786
|
+
}
|
787
|
+
}
|
788
|
+
}
|
789
|
+
|
790
|
+
/// Take the result of some update to ourselves and turn it into a return value of zero or more
|
791
|
+
/// actions
|
792
|
+
fn update_to_acts(
|
793
|
+
&mut self,
|
794
|
+
outcome: Result<ActOrFulfill, RunUpdateErr>,
|
795
|
+
in_response_to_wft: bool,
|
796
|
+
) -> RunUpdateAct {
|
797
|
+
match outcome {
|
798
|
+
Ok(act_or_fulfill) => {
|
799
|
+
let (mut maybe_act, maybe_fulfill) = match act_or_fulfill {
|
800
|
+
ActOrFulfill::OutgoingAct(a) => (a, None),
|
801
|
+
ActOrFulfill::FulfillableComplete(c) => (None, c),
|
802
|
+
};
|
803
|
+
// If there's no activation but is pending work, check and possibly generate one
|
804
|
+
if self.more_pending_work() && maybe_act.is_none() {
|
805
|
+
match self._check_more_activations() {
|
806
|
+
Ok(oa) => maybe_act = oa,
|
807
|
+
Err(e) => {
|
808
|
+
return self.update_to_acts(Err(e), in_response_to_wft);
|
809
|
+
}
|
810
|
+
}
|
811
|
+
}
|
812
|
+
let r = match maybe_act {
|
813
|
+
Some(ActivationOrAuto::LangActivation(mut activation)) => {
|
814
|
+
if in_response_to_wft {
|
815
|
+
let wft = self
|
816
|
+
.wft
|
817
|
+
.as_mut()
|
818
|
+
.expect("WFT must exist for run just updated with one");
|
819
|
+
// If there are in-poll queries, insert jobs for those queries into the
|
820
|
+
// activation, but only if we hit the cache. If we didn't, those queries
|
821
|
+
// will need to be dealt with once replay is over
|
822
|
+
if wft.hit_cache {
|
823
|
+
put_queries_in_act(&mut activation, wft);
|
824
|
+
}
|
825
|
+
}
|
826
|
+
|
827
|
+
if activation.jobs.is_empty() {
|
828
|
+
dbg_panic!("Should not send lang activation with no jobs");
|
829
|
+
}
|
830
|
+
Some(ActivationOrAuto::LangActivation(activation))
|
831
|
+
}
|
832
|
+
Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
|
833
|
+
if let Some(wft) = self.wft.as_mut() {
|
834
|
+
put_queries_in_act(&mut act, wft);
|
835
|
+
Some(ActivationOrAuto::LangActivation(act))
|
836
|
+
} else {
|
837
|
+
dbg_panic!("Ready for queries but no WFT!");
|
838
|
+
None
|
839
|
+
}
|
840
|
+
}
|
841
|
+
a @ Some(
|
842
|
+
ActivationOrAuto::Autocomplete { .. } | ActivationOrAuto::AutoFail { .. },
|
843
|
+
) => a,
|
844
|
+
None => {
|
845
|
+
if let Some(reason) = self.trying_to_evict.as_ref() {
|
846
|
+
// If we had nothing to do, but we're trying to evict, just do that now
|
847
|
+
// as long as there's no other outstanding work.
|
848
|
+
if self.activation.is_none() && !self.more_pending_work() {
|
849
|
+
let mut evict_act = create_evict_activation(
|
850
|
+
self.run_id().to_string(),
|
851
|
+
reason.message.clone(),
|
852
|
+
reason.reason,
|
853
|
+
);
|
854
|
+
evict_act.history_length =
|
855
|
+
self.most_recently_processed_event_number() as u32;
|
856
|
+
Some(ActivationOrAuto::LangActivation(evict_act))
|
857
|
+
} else {
|
858
|
+
None
|
859
|
+
}
|
860
|
+
} else {
|
861
|
+
None
|
862
|
+
}
|
863
|
+
}
|
864
|
+
};
|
865
|
+
if let Some(f) = maybe_fulfill {
|
866
|
+
f.fulfill();
|
867
|
+
}
|
868
|
+
|
869
|
+
match r {
|
870
|
+
// After each run update, check if it's ready to handle any buffered poll
|
871
|
+
None | Some(ActivationOrAuto::Autocomplete { .. })
|
872
|
+
if !self.has_any_pending_work(false, true) =>
|
873
|
+
{
|
874
|
+
if let Some(bufft) = self.buffered_resp.take() {
|
875
|
+
self.incoming_wft(bufft)
|
876
|
+
} else {
|
877
|
+
None
|
878
|
+
}
|
879
|
+
}
|
880
|
+
Some(r) => {
|
881
|
+
self.insert_outstanding_activation(&r);
|
882
|
+
Some(r)
|
883
|
+
}
|
884
|
+
None => None,
|
885
|
+
}
|
886
|
+
}
|
887
|
+
Err(fail) => {
|
888
|
+
self.am_broken = true;
|
889
|
+
let rur = if let Some(resp_chan) = fail.complete_resp_chan {
|
890
|
+
// Automatically fail the workflow task in the event we couldn't update machines
|
891
|
+
let fail_cause = if matches!(&fail.source, WFMachinesError::Nondeterminism(_)) {
|
892
|
+
WorkflowTaskFailedCause::NonDeterministicError
|
893
|
+
} else {
|
894
|
+
WorkflowTaskFailedCause::Unspecified
|
895
|
+
};
|
896
|
+
let wft_fail_str = format!("{:?}", fail.source);
|
897
|
+
self.failed_completion(
|
898
|
+
fail_cause,
|
899
|
+
fail.source.evict_reason(),
|
900
|
+
Failure::application_failure(wft_fail_str, false).into(),
|
901
|
+
Some(resp_chan),
|
902
|
+
)
|
903
|
+
} else {
|
904
|
+
warn!(error=?fail.source, "Error while updating workflow");
|
905
|
+
Some(ActivationOrAuto::AutoFail {
|
906
|
+
run_id: self.run_id().to_owned(),
|
907
|
+
machines_err: fail.source,
|
908
|
+
})
|
909
|
+
};
|
910
|
+
rur
|
346
911
|
}
|
347
912
|
}
|
348
913
|
}
|
349
914
|
|
915
|
+
fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
|
916
|
+
let act_type = match &act {
|
917
|
+
ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
|
918
|
+
if act.is_legacy_query() {
|
919
|
+
OutstandingActivation::LegacyQuery
|
920
|
+
} else {
|
921
|
+
OutstandingActivation::Normal {
|
922
|
+
contains_eviction: act.eviction_index().is_some(),
|
923
|
+
num_jobs: act.jobs.len(),
|
924
|
+
}
|
925
|
+
}
|
926
|
+
}
|
927
|
+
ActivationOrAuto::Autocomplete { .. } | ActivationOrAuto::AutoFail { .. } => {
|
928
|
+
OutstandingActivation::Autocomplete
|
929
|
+
}
|
930
|
+
};
|
931
|
+
if let Some(old_act) = self.activation {
|
932
|
+
// This is a panic because we have screwed up core logic if this is violated. It must be
|
933
|
+
// upheld.
|
934
|
+
panic!(
|
935
|
+
"Attempted to insert a new outstanding activation {act:?}, but there already was \
|
936
|
+
one outstanding: {old_act:?}"
|
937
|
+
);
|
938
|
+
}
|
939
|
+
self.activation = Some(act_type);
|
940
|
+
}
|
941
|
+
|
350
942
|
fn prepare_complete_resp(
|
351
943
|
&mut self,
|
352
|
-
resp_chan: oneshot::Sender<ActivationCompleteResult
|
944
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
353
945
|
data: CompletionDataForWFT,
|
354
946
|
due_to_heartbeat_timeout: bool,
|
355
947
|
) -> FulfillableActivationComplete {
|
356
|
-
let outgoing_cmds = self.wfm.get_server_commands();
|
948
|
+
let mut outgoing_cmds = self.wfm.get_server_commands();
|
949
|
+
if data.activation_was_only_eviction && !outgoing_cmds.commands.is_empty() {
|
950
|
+
if self.am_broken {
|
951
|
+
// If we broke there could be commands in the pipe that we didn't get a chance to
|
952
|
+
// handle properly during replay, just wipe them all out.
|
953
|
+
outgoing_cmds.commands = vec![];
|
954
|
+
} else {
|
955
|
+
dbg_panic!(
|
956
|
+
"There should not be any outgoing commands when preparing a completion response \
|
957
|
+
if the activation was only an eviction. This is an SDK bug."
|
958
|
+
);
|
959
|
+
}
|
960
|
+
}
|
961
|
+
|
357
962
|
let query_responses = data.query_responses;
|
358
963
|
let has_query_responses = !query_responses.is_empty();
|
359
964
|
let is_query_playback = data.has_pending_query && !has_query_responses;
|
360
965
|
let mut force_new_wft = due_to_heartbeat_timeout;
|
361
966
|
|
362
|
-
// We only actually want to send commands back to the server if there are no more
|
363
|
-
//
|
364
|
-
//
|
365
|
-
//
|
366
|
-
// eviction, and there were no commands produced during iteration, don't send that
|
967
|
+
// We only actually want to send commands back to the server if there are no more pending
|
968
|
+
// activations and we are caught up on replay. We don't want to complete a wft if we already
|
969
|
+
// saw the final event in the workflow, or if we are playing back for the express purpose of
|
970
|
+
// fulfilling a query. If the activation we sent was *only* an eviction, don't send that
|
367
971
|
// either.
|
368
|
-
let no_commands_and_evicting =
|
369
|
-
outgoing_cmds.commands.is_empty() && data.activation_was_only_eviction;
|
370
972
|
let should_respond = !(self.wfm.machines.has_pending_jobs()
|
371
973
|
|| outgoing_cmds.replaying
|
372
974
|
|| is_query_playback
|
373
|
-
||
|
975
|
+
|| data.activation_was_only_eviction);
|
374
976
|
// If there are pending LA resolutions, and we're responding to a query here,
|
375
977
|
// we want to make sure to force a new task, as otherwise once we tell lang about
|
376
978
|
// the LA resolution there wouldn't be any task to reply to with the result of iterating
|
@@ -378,17 +980,17 @@ impl ManagedRun {
|
|
378
980
|
if has_query_responses && self.wfm.machines.has_pending_la_resolutions() {
|
379
981
|
force_new_wft = true;
|
380
982
|
}
|
381
|
-
let to_be_sent = ServerCommandsWithWorkflowInfo {
|
382
|
-
task_token: data.task_token,
|
383
|
-
action: ActivationAction::WftComplete {
|
384
|
-
force_new_wft,
|
385
|
-
commands: outgoing_cmds.commands,
|
386
|
-
query_responses,
|
387
|
-
},
|
388
|
-
};
|
389
983
|
|
390
984
|
let outcome = if should_respond || has_query_responses {
|
391
|
-
ActivationCompleteOutcome::ReportWFTSuccess(
|
985
|
+
ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
|
986
|
+
task_token: data.task_token,
|
987
|
+
action: ActivationAction::WftComplete {
|
988
|
+
force_new_wft,
|
989
|
+
commands: outgoing_cmds.commands,
|
990
|
+
query_responses,
|
991
|
+
sdk_metadata: self.wfm.machines.get_metadata_for_wft_complete(),
|
992
|
+
},
|
993
|
+
})
|
392
994
|
} else {
|
393
995
|
ActivationCompleteOutcome::DoNothing
|
394
996
|
};
|
@@ -401,131 +1003,136 @@ impl ManagedRun {
|
|
401
1003
|
}
|
402
1004
|
}
|
403
1005
|
|
404
|
-
|
1006
|
+
/// Pump some local activity requests into the sink, applying any immediate results to the
|
1007
|
+
/// workflow machines.
|
1008
|
+
fn sink_la_requests(
|
405
1009
|
&mut self,
|
406
|
-
|
407
|
-
) -> Result<
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
if let Some(mut wait_dat) = self.waiting_on_la.take() {
|
412
|
-
// Cancel the heartbeat timeout
|
413
|
-
wait_dat.heartbeat_timeout_task.abort();
|
414
|
-
if let Some((completion_dat, resp_chan)) = wait_dat.completion_dat.take() {
|
415
|
-
return Ok(Some(self.prepare_complete_resp(
|
416
|
-
resp_chan,
|
417
|
-
completion_dat,
|
418
|
-
false,
|
419
|
-
)));
|
420
|
-
}
|
421
|
-
}
|
1010
|
+
new_local_acts: Vec<LocalActRequest>,
|
1011
|
+
) -> Result<(), WFMachinesError> {
|
1012
|
+
let immediate_resolutions = self.local_activity_request_sink.sink_reqs(new_local_acts);
|
1013
|
+
if !immediate_resolutions.is_empty() {
|
1014
|
+
warn!("Immediate res: {:?}", &immediate_resolutions);
|
422
1015
|
}
|
423
|
-
|
1016
|
+
for resolution in immediate_resolutions {
|
1017
|
+
self.wfm
|
1018
|
+
.notify_of_local_result(LocalResolution::LocalActivity(resolution))?;
|
1019
|
+
}
|
1020
|
+
Ok(())
|
424
1021
|
}
|
425
1022
|
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
compl.fulfill();
|
438
|
-
} else {
|
439
|
-
// Auto-reply WFT complete
|
440
|
-
return true;
|
441
|
-
}
|
442
|
-
} else {
|
443
|
-
// If a heartbeat timeout happened, we should always have been waiting on LAs
|
444
|
-
dbg_panic!("WFT heartbeat timeout fired but we were not waiting on any LAs");
|
1023
|
+
fn reply_to_complete(
|
1024
|
+
&self,
|
1025
|
+
outcome: ActivationCompleteOutcome,
|
1026
|
+
chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
1027
|
+
) {
|
1028
|
+
if let Some(chan) = chan {
|
1029
|
+
chan.send(ActivationCompleteResult {
|
1030
|
+
most_recently_processed_event: self.most_recently_processed_event_number() as usize,
|
1031
|
+
outcome,
|
1032
|
+
})
|
1033
|
+
.expect("Rcv half of activation reply not dropped");
|
445
1034
|
}
|
446
|
-
false
|
447
1035
|
}
|
448
1036
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
as usize,
|
480
|
-
in_response_to_wft,
|
481
|
-
}),
|
482
|
-
span: Span::current(),
|
483
|
-
})
|
484
|
-
.expect("Machine can send update");
|
1037
|
+
/// Returns true if the handle is currently processing a WFT which contains a legacy query.
|
1038
|
+
fn pending_work_is_legacy_query(&self) -> bool {
|
1039
|
+
// Either we know because there is a pending legacy query, or it's already been drained and
|
1040
|
+
// sent as an activation.
|
1041
|
+
matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
|
1042
|
+
|| self
|
1043
|
+
.wft
|
1044
|
+
.as_ref()
|
1045
|
+
.map(|t| t.has_pending_legacy_query())
|
1046
|
+
.unwrap_or_default()
|
1047
|
+
}
|
1048
|
+
|
1049
|
+
fn most_recently_processed_event_number(&self) -> i64 {
|
1050
|
+
self.wfm.machines.last_processed_event
|
1051
|
+
}
|
1052
|
+
|
1053
|
+
fn activation_has_eviction(&mut self) -> bool {
|
1054
|
+
self.activation
|
1055
|
+
.map(OutstandingActivation::has_eviction)
|
1056
|
+
.unwrap_or_default()
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
fn activation_has_only_eviction(&mut self) -> bool {
|
1060
|
+
self.activation
|
1061
|
+
.map(OutstandingActivation::has_only_eviction)
|
1062
|
+
.unwrap_or_default()
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
fn run_id(&self) -> &str {
|
1066
|
+
&self.wfm.machines.run_id
|
485
1067
|
}
|
486
1068
|
}
|
487
1069
|
|
488
|
-
|
489
|
-
|
1070
|
+
/// Drains pending queries from the workflow task and appends them to the activation's jobs
|
1071
|
+
fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
|
1072
|
+
// Nothing to do if there are no pending queries
|
1073
|
+
if wft.pending_queries.is_empty() {
|
1074
|
+
return;
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
let has_legacy = wft.has_pending_legacy_query();
|
1078
|
+
// Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
|
1079
|
+
// activity resolves while we've gotten a legacy query after heartbeating.
|
1080
|
+
if has_legacy && !act.jobs.is_empty() {
|
1081
|
+
return;
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
debug!(queries=?wft.pending_queries, "Dispatching queries");
|
1085
|
+
let query_jobs = wft
|
1086
|
+
.pending_queries
|
1087
|
+
.drain(..)
|
1088
|
+
.map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
|
1089
|
+
act.jobs.extend(query_jobs);
|
1090
|
+
}
|
1091
|
+
fn sink_heartbeat_timeout_start(
|
1092
|
+
run_id: String,
|
1093
|
+
sink: &dyn LocalActivityRequestSink,
|
490
1094
|
wft_start_time: Instant,
|
491
1095
|
wft_timeout: Duration,
|
492
|
-
) ->
|
1096
|
+
) -> AbortHandle {
|
493
1097
|
// The heartbeat deadline is 80% of the WFT timeout
|
494
|
-
let
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
}
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
AfterLocalResolution(Option<FulfillableActivationComplete>),
|
506
|
-
AfterCompletion(Option<FulfillableActivationComplete>),
|
507
|
-
AfterHeartbeatTimeout(Option<ActivationOrAuto>),
|
1098
|
+
let deadline = wft_start_time.add(wft_timeout.mul_f32(WFT_HEARTBEAT_TIMEOUT_FRACTION));
|
1099
|
+
let (abort_handle, abort_reg) = AbortHandle::new_pair();
|
1100
|
+
sink.sink_reqs(vec![LocalActRequest::StartHeartbeatTimeout {
|
1101
|
+
send_on_elapse: HeartbeatTimeoutMsg {
|
1102
|
+
run_id,
|
1103
|
+
span: Span::current(),
|
1104
|
+
},
|
1105
|
+
deadline,
|
1106
|
+
abort_reg,
|
1107
|
+
}]);
|
1108
|
+
abort_handle
|
508
1109
|
}
|
509
1110
|
|
510
|
-
|
511
|
-
|
512
|
-
struct
|
513
|
-
|
514
|
-
|
1111
|
+
/// If an activation completion needed to wait on LA completions (or heartbeat timeout) we use
|
1112
|
+
/// this struct to store the data we need to finish the completion once that has happened
|
1113
|
+
struct WaitingOnLAs {
|
1114
|
+
wft_timeout: Duration,
|
1115
|
+
/// If set, we are waiting for LAs to complete as part of a just-finished workflow activation.
|
1116
|
+
/// If unset, we already had a heartbeat timeout and got a new WFT without any new work while
|
1117
|
+
/// there are still incomplete LAs.
|
1118
|
+
completion_dat: Option<(
|
1119
|
+
CompletionDataForWFT,
|
1120
|
+
Option<oneshot::Sender<ActivationCompleteResult>>,
|
1121
|
+
)>,
|
1122
|
+
/// Can be used to abort heartbeat timeouts
|
1123
|
+
hb_timeout_handle: AbortHandle,
|
515
1124
|
}
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
}
|
523
|
-
}
|
1125
|
+
#[derive(Debug)]
|
1126
|
+
struct CompletionDataForWFT {
|
1127
|
+
task_token: TaskToken,
|
1128
|
+
query_responses: Vec<QueryResult>,
|
1129
|
+
has_pending_query: bool,
|
1130
|
+
activation_was_only_eviction: bool,
|
524
1131
|
}
|
525
1132
|
|
526
1133
|
/// Manages an instance of a [WorkflowMachines], which is not thread-safe, as well as other data
|
527
1134
|
/// associated with that specific workflow run.
|
528
|
-
|
1135
|
+
struct WorkflowManager {
|
529
1136
|
machines: WorkflowMachines,
|
530
1137
|
/// Is always `Some` in normal operation. Optional to allow for unit testing with the test
|
531
1138
|
/// workflow driver, which does not need to complete activations the normal way.
|
@@ -535,24 +1142,9 @@ pub(crate) struct WorkflowManager {
|
|
535
1142
|
impl WorkflowManager {
|
536
1143
|
/// Create a new workflow manager given workflow history and execution info as would be found
|
537
1144
|
/// in [PollWorkflowTaskQueueResponse]
|
538
|
-
|
539
|
-
history: HistoryUpdate,
|
540
|
-
namespace: String,
|
541
|
-
workflow_id: String,
|
542
|
-
workflow_type: String,
|
543
|
-
run_id: String,
|
544
|
-
metrics: MetricsContext,
|
545
|
-
) -> Self {
|
1145
|
+
fn new(basics: RunBasics) -> Self {
|
546
1146
|
let (wfb, cmd_sink) = WorkflowBridge::new();
|
547
|
-
let state_machines = WorkflowMachines::new(
|
548
|
-
namespace,
|
549
|
-
workflow_id,
|
550
|
-
workflow_type,
|
551
|
-
run_id,
|
552
|
-
history,
|
553
|
-
Box::new(wfb).into(),
|
554
|
-
metrics,
|
555
|
-
);
|
1147
|
+
let state_machines = WorkflowMachines::new(basics, Box::new(wfb).into());
|
556
1148
|
Self {
|
557
1149
|
machines: state_machines,
|
558
1150
|
command_sink: Some(cmd_sink),
|
@@ -560,7 +1152,7 @@ impl WorkflowManager {
|
|
560
1152
|
}
|
561
1153
|
|
562
1154
|
#[cfg(test)]
|
563
|
-
|
1155
|
+
const fn new_from_machines(workflow_machines: WorkflowMachines) -> Self {
|
564
1156
|
Self {
|
565
1157
|
machines: workflow_machines,
|
566
1158
|
command_sink: None,
|
@@ -571,12 +1163,15 @@ impl WorkflowManager {
|
|
571
1163
|
///
|
572
1164
|
/// Should only be called when a workflow has caught up on replay (or is just beginning). It
|
573
1165
|
/// will return a workflow activation if one is needed.
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
1166
|
+
fn feed_history_from_server(&mut self, update: HistoryUpdate) -> Result<WorkflowActivation> {
|
1167
|
+
self.machines.new_history_from_server(update)?;
|
1168
|
+
self.get_next_activation()
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
/// Update the machines with some events from fetching another page of history. Does *not*
|
1172
|
+
/// attempt to pull the next activation, unlike [Self::feed_history_from_server].
|
1173
|
+
fn feed_history_from_new_page(&mut self, update: HistoryUpdate) -> Result<()> {
|
1174
|
+
self.machines.new_history_from_server(update)
|
580
1175
|
}
|
581
1176
|
|
582
1177
|
/// Let this workflow know that something we've been waiting locally on has resolved, like a
|
@@ -593,27 +1188,33 @@ impl WorkflowManager {
|
|
593
1188
|
///
|
594
1189
|
/// Callers may also need to call [get_server_commands] after this to issue any pending commands
|
595
1190
|
/// to the server.
|
596
|
-
|
1191
|
+
fn get_next_activation(&mut self) -> Result<WorkflowActivation> {
|
597
1192
|
// First check if there are already some pending jobs, which can be a result of replay.
|
598
1193
|
let activation = self.machines.get_wf_activation();
|
599
1194
|
if !activation.jobs.is_empty() {
|
600
1195
|
return Ok(activation);
|
601
1196
|
}
|
602
1197
|
|
603
|
-
self.machines.apply_next_wft_from_history()
|
1198
|
+
self.machines.apply_next_wft_from_history()?;
|
604
1199
|
Ok(self.machines.get_wf_activation())
|
605
1200
|
}
|
606
1201
|
|
1202
|
+
/// Returns true if machines are ready to apply the next WFT sequence, false if events will need
|
1203
|
+
/// to be fetched in order to create a complete update with the entire next WFT sequence.
|
1204
|
+
pub(crate) fn ready_to_apply_next_wft(&self) -> bool {
|
1205
|
+
self.machines.ready_to_apply_next_wft()
|
1206
|
+
}
|
1207
|
+
|
607
1208
|
/// If there are no pending jobs for the workflow, apply the next workflow task and check
|
608
1209
|
/// again if there are any jobs. Importantly, does not *drain* jobs.
|
609
1210
|
///
|
610
1211
|
/// Returns true if there are jobs (before or after applying the next WFT).
|
611
|
-
|
1212
|
+
fn apply_next_task_if_ready(&mut self) -> Result<bool> {
|
612
1213
|
if self.machines.has_pending_jobs() {
|
613
1214
|
return Ok(true);
|
614
1215
|
}
|
615
1216
|
loop {
|
616
|
-
let consumed_events = self.machines.apply_next_wft_from_history()
|
1217
|
+
let consumed_events = self.machines.apply_next_wft_from_history()?;
|
617
1218
|
|
618
1219
|
if consumed_events == 0 || !self.machines.replaying || self.machines.has_pending_jobs()
|
619
1220
|
{
|
@@ -643,13 +1244,62 @@ impl WorkflowManager {
|
|
643
1244
|
|
644
1245
|
/// Feed the workflow machines new commands issued by the executing workflow code, and iterate
|
645
1246
|
/// the machines.
|
646
|
-
|
1247
|
+
fn push_commands_and_iterate(&mut self, cmds: Vec<WFCommand>) -> Result<()> {
|
647
1248
|
if let Some(cs) = self.command_sink.as_mut() {
|
648
1249
|
cs.send(cmds).map_err(|_| {
|
649
1250
|
WFMachinesError::Fatal("Internal error buffering workflow commands".to_string())
|
650
1251
|
})?;
|
651
1252
|
}
|
652
|
-
self.machines.iterate_machines()
|
1253
|
+
self.machines.iterate_machines()?;
|
653
1254
|
Ok(())
|
654
1255
|
}
|
655
1256
|
}
|
1257
|
+
|
1258
|
+
#[derive(Debug)]
|
1259
|
+
struct FulfillableActivationComplete {
|
1260
|
+
result: ActivationCompleteResult,
|
1261
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
1262
|
+
}
|
1263
|
+
impl FulfillableActivationComplete {
|
1264
|
+
fn fulfill(self) {
|
1265
|
+
if let Some(resp_chan) = self.resp_chan {
|
1266
|
+
let _ = resp_chan.send(self.result);
|
1267
|
+
}
|
1268
|
+
}
|
1269
|
+
}
|
1270
|
+
|
1271
|
+
#[derive(Debug)]
|
1272
|
+
struct RunActivationCompletion {
|
1273
|
+
task_token: TaskToken,
|
1274
|
+
start_time: Instant,
|
1275
|
+
commands: Vec<WFCommand>,
|
1276
|
+
activation_was_eviction: bool,
|
1277
|
+
activation_was_only_eviction: bool,
|
1278
|
+
has_pending_query: bool,
|
1279
|
+
query_responses: Vec<QueryResult>,
|
1280
|
+
used_flags: Vec<u32>,
|
1281
|
+
/// Used to notify the worker when the completion is done processing and the completion can
|
1282
|
+
/// unblock. Must always be `Some` when initialized.
|
1283
|
+
resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
1284
|
+
}
|
1285
|
+
#[derive(Debug, derive_more::From)]
|
1286
|
+
enum ActOrFulfill {
|
1287
|
+
OutgoingAct(Option<ActivationOrAuto>),
|
1288
|
+
FulfillableComplete(Option<FulfillableActivationComplete>),
|
1289
|
+
}
|
1290
|
+
|
1291
|
+
#[derive(derive_more::DebugCustom)]
|
1292
|
+
#[debug(fmt = "RunUpdateErr({source:?})")]
|
1293
|
+
struct RunUpdateErr {
|
1294
|
+
source: WFMachinesError,
|
1295
|
+
complete_resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
|
1296
|
+
}
|
1297
|
+
|
1298
|
+
impl From<WFMachinesError> for RunUpdateErr {
|
1299
|
+
fn from(e: WFMachinesError) -> Self {
|
1300
|
+
RunUpdateErr {
|
1301
|
+
source: e,
|
1302
|
+
complete_resp_chan: None,
|
1303
|
+
}
|
1304
|
+
}
|
1305
|
+
}
|