temporalio 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +180 -7
- data/bridge/Cargo.lock +208 -76
- data/bridge/Cargo.toml +5 -2
- data/bridge/sdk-core/Cargo.toml +1 -1
- data/bridge/sdk-core/README.md +20 -10
- data/bridge/sdk-core/client/Cargo.toml +1 -1
- data/bridge/sdk-core/client/src/lib.rs +227 -59
- data/bridge/sdk-core/client/src/metrics.rs +17 -8
- data/bridge/sdk-core/client/src/raw.rs +13 -12
- data/bridge/sdk-core/client/src/retry.rs +132 -43
- data/bridge/sdk-core/core/Cargo.toml +28 -15
- data/bridge/sdk-core/core/benches/workflow_replay.rs +13 -10
- data/bridge/sdk-core/core/src/abstractions.rs +225 -36
- data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +217 -79
- data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
- data/bridge/sdk-core/core/src/core_tests/local_activities.rs +565 -34
- data/bridge/sdk-core/core/src/core_tests/queries.rs +247 -90
- data/bridge/sdk-core/core/src/core_tests/workers.rs +3 -5
- data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +430 -67
- data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
- data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
- data/bridge/sdk-core/core/src/lib.rs +148 -34
- data/bridge/sdk-core/core/src/protosext/mod.rs +1 -1
- data/bridge/sdk-core/core/src/replay/mod.rs +185 -41
- data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
- data/bridge/sdk-core/core/src/telemetry/metrics.rs +219 -140
- data/bridge/sdk-core/core/src/telemetry/mod.rs +326 -315
- data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +20 -14
- data/bridge/sdk-core/core/src/test_help/mod.rs +85 -21
- data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
- data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +364 -128
- data/bridge/sdk-core/core/src/worker/activities.rs +263 -170
- data/bridge/sdk-core/core/src/worker/client/mocks.rs +23 -3
- data/bridge/sdk-core/core/src/worker/client.rs +48 -6
- data/bridge/sdk-core/core/src/worker/mod.rs +186 -75
- data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +13 -24
- data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +879 -226
- data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +101 -48
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +8 -12
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +90 -32
- data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -10
- data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +160 -83
- data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +36 -54
- data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +179 -0
- data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +104 -157
- data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +8 -12
- data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -13
- data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +10 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -11
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +395 -299
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +12 -20
- data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +33 -18
- data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1032 -374
- data/bridge/sdk-core/core/src/worker/workflow/mod.rs +525 -392
- data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +3 -6
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +456 -681
- data/bridge/sdk-core/core-api/Cargo.toml +6 -4
- data/bridge/sdk-core/core-api/src/errors.rs +1 -34
- data/bridge/sdk-core/core-api/src/lib.rs +7 -45
- data/bridge/sdk-core/core-api/src/telemetry.rs +141 -0
- data/bridge/sdk-core/core-api/src/worker.rs +27 -1
- data/bridge/sdk-core/etc/deps.svg +115 -140
- data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
- data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
- data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
- data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
- data/bridge/sdk-core/protos/api_upstream/buf.yaml +0 -3
- data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
- data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
- data/bridge/sdk-core/protos/api_upstream/{temporal/api/enums/v1/cluster.proto → build/tools.go} +7 -18
- data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +12 -9
- data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +15 -26
- data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +4 -9
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +10 -8
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +28 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +62 -26
- data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +24 -61
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -21
- data/bridge/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +110 -31
- data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +4 -4
- data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +111 -36
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +19 -5
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +9 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +9 -1
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
- data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- data/bridge/sdk-core/sdk/Cargo.toml +4 -3
- data/bridge/sdk-core/sdk/src/interceptors.rs +36 -3
- data/bridge/sdk-core/sdk/src/lib.rs +94 -25
- data/bridge/sdk-core/sdk/src/workflow_context.rs +13 -2
- data/bridge/sdk-core/sdk/src/workflow_future.rs +10 -13
- data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- data/bridge/sdk-core/sdk-core-protos/build.rs +36 -2
- data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +164 -104
- data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +27 -23
- data/bridge/sdk-core/sdk-core-protos/src/lib.rs +252 -74
- data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- data/bridge/sdk-core/test-utils/Cargo.toml +4 -1
- data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
- data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
- data/bridge/sdk-core/test-utils/src/lib.rs +161 -50
- data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
- data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
- data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
- data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +239 -0
- data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -60
- data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +151 -116
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +54 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +115 -24
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
- data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +27 -18
- data/bridge/sdk-core/tests/main.rs +8 -16
- data/bridge/sdk-core/tests/runner.rs +75 -36
- data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
- data/bridge/src/connection.rs +117 -82
- data/bridge/src/lib.rs +356 -42
- data/bridge/src/runtime.rs +10 -3
- data/bridge/src/test_server.rs +153 -0
- data/bridge/src/worker.rs +133 -9
- data/lib/gen/temporal/api/batch/v1/message_pb.rb +8 -6
- data/lib/gen/temporal/api/command/v1/message_pb.rb +10 -16
- data/lib/gen/temporal/api/common/v1/message_pb.rb +5 -1
- data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +2 -1
- data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +3 -3
- data/lib/gen/temporal/api/enums/v1/common_pb.rb +2 -1
- data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +5 -4
- data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +9 -1
- data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/query_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/reset_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/update_pb.rb +7 -10
- data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +1 -1
- data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/failure/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/filter/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/history/v1/message_pb.rb +34 -25
- data/lib/gen/temporal/api/namespace/v1/message_pb.rb +2 -1
- data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +14 -51
- data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +1 -1
- data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
- data/lib/gen/temporal/api/query/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/replication/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/schedule/v1/message_pb.rb +22 -1
- data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
- data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +2 -2
- data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
- data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
- data/lib/gen/temporal/api/update/v1/message_pb.rb +49 -3
- data/lib/gen/temporal/api/version/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/workflow/v1/message_pb.rb +2 -1
- data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +47 -20
- data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +1 -1
- data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
- data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
- data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +28 -21
- data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +32 -24
- data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
- data/lib/temporalio/activity/context.rb +102 -0
- data/lib/temporalio/activity/info.rb +67 -0
- data/lib/temporalio/activity.rb +85 -0
- data/lib/temporalio/bridge/connect_options.rb +15 -0
- data/lib/temporalio/bridge/error.rb +8 -0
- data/lib/temporalio/bridge/retry_config.rb +24 -0
- data/lib/temporalio/bridge/tls_options.rb +19 -0
- data/lib/temporalio/bridge.rb +14 -0
- data/lib/{temporal → temporalio}/client/implementation.rb +57 -56
- data/lib/{temporal → temporalio}/client/workflow_handle.rb +35 -35
- data/lib/{temporal → temporalio}/client.rb +19 -32
- data/lib/temporalio/connection/retry_config.rb +44 -0
- data/lib/temporalio/connection/service.rb +20 -0
- data/lib/temporalio/connection/test_service.rb +92 -0
- data/lib/temporalio/connection/tls_options.rb +51 -0
- data/lib/temporalio/connection/workflow_service.rb +731 -0
- data/lib/temporalio/connection.rb +86 -0
- data/lib/{temporal → temporalio}/data_converter.rb +76 -35
- data/lib/{temporal → temporalio}/error/failure.rb +6 -6
- data/lib/{temporal → temporalio}/error/workflow_failure.rb +4 -2
- data/lib/{temporal → temporalio}/errors.rb +19 -1
- data/lib/{temporal → temporalio}/failure_converter/base.rb +5 -5
- data/lib/{temporal → temporalio}/failure_converter/basic.rb +58 -52
- data/lib/temporalio/failure_converter.rb +7 -0
- data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
- data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
- data/lib/{temporal → temporalio}/interceptor/chain.rb +7 -6
- data/lib/{temporal → temporalio}/interceptor/client.rb +27 -2
- data/lib/temporalio/interceptor.rb +22 -0
- data/lib/{temporal → temporalio}/payload_codec/base.rb +5 -5
- data/lib/{temporal → temporalio}/payload_converter/base.rb +3 -3
- data/lib/{temporal → temporalio}/payload_converter/bytes.rb +4 -3
- data/lib/{temporal → temporalio}/payload_converter/composite.rb +7 -5
- data/lib/{temporal → temporalio}/payload_converter/encoding_base.rb +4 -4
- data/lib/{temporal → temporalio}/payload_converter/json.rb +4 -3
- data/lib/{temporal → temporalio}/payload_converter/nil.rb +4 -3
- data/lib/temporalio/payload_converter.rb +14 -0
- data/lib/{temporal → temporalio}/retry_policy.rb +17 -7
- data/lib/{temporal → temporalio}/retry_state.rb +1 -1
- data/lib/temporalio/runtime.rb +25 -0
- data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
- data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
- data/lib/temporalio/testing/workflow_environment.rb +112 -0
- data/lib/temporalio/testing.rb +175 -0
- data/lib/{temporal → temporalio}/timeout_type.rb +2 -2
- data/lib/temporalio/version.rb +3 -0
- data/lib/temporalio/worker/activity_runner.rb +114 -0
- data/lib/temporalio/worker/activity_worker.rb +164 -0
- data/lib/temporalio/worker/reactor.rb +46 -0
- data/lib/temporalio/worker/runner.rb +63 -0
- data/lib/temporalio/worker/sync_worker.rb +124 -0
- data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
- data/lib/temporalio/worker.rb +204 -0
- data/lib/temporalio/workflow/async.rb +46 -0
- data/lib/{temporal → temporalio}/workflow/execution_info.rb +4 -4
- data/lib/{temporal → temporalio}/workflow/execution_status.rb +1 -1
- data/lib/temporalio/workflow/future.rb +138 -0
- data/lib/{temporal → temporalio}/workflow/id_reuse_policy.rb +6 -6
- data/lib/temporalio/workflow/info.rb +76 -0
- data/lib/{temporal → temporalio}/workflow/query_reject_condition.rb +5 -5
- data/lib/temporalio.rb +12 -3
- data/temporalio.gemspec +11 -6
- metadata +137 -64
- data/bridge/sdk-core/Cargo.lock +0 -2606
- data/bridge/sdk-core/bridge-ffi/Cargo.toml +0 -24
- data/bridge/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- data/bridge/sdk-core/bridge-ffi/build.rs +0 -25
- data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -249
- data/bridge/sdk-core/bridge-ffi/src/lib.rs +0 -825
- data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +0 -211
- data/bridge/sdk-core/core/src/log_export.rs +0 -62
- data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
- data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
- data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
- data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- data/bridge/sdk-core/sdk/src/conversions.rs +0 -8
- data/lib/bridge.so +0 -0
- data/lib/gen/temporal/api/cluster/v1/message_pb.rb +0 -67
- data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +0 -26
- data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
- data/lib/temporal/bridge.rb +0 -14
- data/lib/temporal/connection.rb +0 -736
- data/lib/temporal/failure_converter.rb +0 -8
- data/lib/temporal/payload_converter.rb +0 -14
- data/lib/temporal/runtime.rb +0 -22
- data/lib/temporal/version.rb +0 -3
- data/lib/temporal.rb +0 -8
@@ -1,217 +1,177 @@
|
|
1
|
+
#[cfg(feature = "save_wf_inputs")]
|
2
|
+
mod saved_wf_inputs;
|
3
|
+
#[cfg(feature = "save_wf_inputs")]
|
4
|
+
mod tonic_status_serde;
|
5
|
+
|
6
|
+
#[cfg(feature = "save_wf_inputs")]
|
7
|
+
pub use saved_wf_inputs::replay_wf_state_inputs;
|
8
|
+
|
1
9
|
use crate::{
|
2
|
-
abstractions::
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
10
|
+
abstractions::dbg_panic,
|
11
|
+
worker::workflow::{
|
12
|
+
managed_run::RunUpdateAct,
|
13
|
+
run_cache::RunCache,
|
14
|
+
wft_extraction::{HistfetchRC, HistoryFetchReq, WFTExtractorOutput},
|
15
|
+
*,
|
8
16
|
},
|
9
17
|
MetricsContext,
|
10
18
|
};
|
11
19
|
use futures::{stream, stream::PollNext, Stream, StreamExt};
|
12
|
-
use std::{collections::VecDeque, fmt::Debug, future, sync::Arc
|
13
|
-
use temporal_sdk_core_api::errors::
|
14
|
-
use temporal_sdk_core_protos::
|
15
|
-
coresdk::{
|
16
|
-
workflow_activation::{
|
17
|
-
create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
|
18
|
-
workflow_activation_job,
|
19
|
-
},
|
20
|
-
workflow_completion::Failure,
|
21
|
-
},
|
22
|
-
temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
|
23
|
-
};
|
24
|
-
use tokio::sync::{mpsc::unbounded_channel, oneshot};
|
25
|
-
use tokio_stream::wrappers::UnboundedReceiverStream;
|
20
|
+
use std::{collections::VecDeque, fmt::Debug, future, sync::Arc};
|
21
|
+
use temporal_sdk_core_api::errors::PollWfError;
|
22
|
+
use temporal_sdk_core_protos::coresdk::workflow_activation::remove_from_cache::EvictionReason;
|
26
23
|
use tokio_util::sync::CancellationToken;
|
27
24
|
use tracing::{Level, Span};
|
28
25
|
|
29
|
-
/// This struct holds all the state needed for tracking
|
30
|
-
/// and
|
26
|
+
/// This struct holds all the state needed for tracking the state of currently cached workflow runs
|
27
|
+
/// and directs all actions which affect them. It is ultimately the top-level arbiter of nearly
|
28
|
+
/// everything important relating to workflow state.
|
31
29
|
///
|
32
30
|
/// See [WFStream::build] for more
|
33
|
-
pub(
|
31
|
+
pub(super) struct WFStream {
|
34
32
|
runs: RunCache,
|
35
33
|
/// Buffered polls for new runs which need a cache slot to open up before we can handle them
|
36
34
|
buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
|
35
|
+
/// Is filled with runs that we decided need to have their history fetched during state
|
36
|
+
/// manipulation. Must be drained after handling each input.
|
37
|
+
runs_needing_fetching: VecDeque<HistoryFetchReq>,
|
37
38
|
|
38
|
-
|
39
|
-
client: Arc<dyn WorkerClient>,
|
40
|
-
|
41
|
-
/// Ensures we stay at or below this worker's maximum concurrent workflow task limit
|
42
|
-
wft_semaphore: MeteredSemaphore,
|
39
|
+
history_fetch_refcounter: Arc<HistfetchRC>,
|
43
40
|
shutdown_token: CancellationToken,
|
41
|
+
ignore_evicts_on_shutdown: bool,
|
44
42
|
|
45
43
|
metrics: MetricsContext,
|
46
|
-
}
|
47
|
-
/// All possible inputs to the [WFStream]
|
48
|
-
#[derive(derive_more::From, Debug)]
|
49
|
-
enum WFStreamInput {
|
50
|
-
NewWft(PermittedWFT),
|
51
|
-
Local(LocalInput),
|
52
|
-
/// The stream given to us which represents the poller (or a mock) terminated.
|
53
|
-
PollerDead,
|
54
|
-
/// The stream given to us which represents the poller (or a mock) encountered a non-retryable
|
55
|
-
/// error while polling
|
56
|
-
PollerError(tonic::Status),
|
57
|
-
}
|
58
|
-
impl From<RunUpdateResponse> for WFStreamInput {
|
59
|
-
fn from(r: RunUpdateResponse) -> Self {
|
60
|
-
WFStreamInput::Local(LocalInput {
|
61
|
-
input: LocalInputs::RunUpdateResponse(r.kind),
|
62
|
-
span: r.span,
|
63
|
-
})
|
64
|
-
}
|
65
|
-
}
|
66
|
-
/// A non-poller-received input to the [WFStream]
|
67
|
-
#[derive(derive_more::DebugCustom)]
|
68
|
-
#[debug(fmt = "LocalInput {{ {:?} }}", input)]
|
69
|
-
pub(super) struct LocalInput {
|
70
|
-
pub input: LocalInputs,
|
71
|
-
pub span: Span,
|
72
|
-
}
|
73
|
-
/// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
|
74
|
-
/// new polls.
|
75
|
-
#[derive(Debug, derive_more::From)]
|
76
|
-
pub(super) enum LocalInputs {
|
77
|
-
Completion(WFActCompleteMsg),
|
78
|
-
LocalResolution(LocalResolutionMsg),
|
79
|
-
PostActivation(PostActivationMsg),
|
80
|
-
RunUpdateResponse(RunUpdateResponseKind),
|
81
|
-
RequestEviction(RequestEvictMsg),
|
82
|
-
GetStateInfo(GetStateInfoMsg),
|
83
|
-
}
|
84
|
-
#[derive(Debug, derive_more::From)]
|
85
|
-
#[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
|
86
|
-
enum ExternalPollerInputs {
|
87
|
-
NewWft(PermittedWFT),
|
88
|
-
PollerDead,
|
89
|
-
PollerError(tonic::Status),
|
90
|
-
}
|
91
|
-
impl From<ExternalPollerInputs> for WFStreamInput {
|
92
|
-
fn from(l: ExternalPollerInputs) -> Self {
|
93
|
-
match l {
|
94
|
-
ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
|
95
|
-
ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
|
96
|
-
ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
|
97
|
-
}
|
98
|
-
}
|
99
|
-
}
|
100
44
|
|
45
|
+
#[cfg(feature = "save_wf_inputs")]
|
46
|
+
wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
|
47
|
+
}
|
101
48
|
impl WFStream {
|
102
49
|
/// Constructs workflow state management and returns a stream which outputs activations.
|
103
50
|
///
|
104
|
-
/// * `
|
105
|
-
///
|
106
|
-
/// come down.
|
51
|
+
/// * `wft_stream` is a stream of validated poll responses and fetched history pages as returned
|
52
|
+
/// by a poller (or mock), via [WFTExtractor].
|
107
53
|
/// * `local_rx` is a stream of actions that workflow state needs to see. Things like
|
108
|
-
///
|
54
|
+
/// completions, local activities finishing, etc. See [LocalInputs].
|
55
|
+
/// * `local_activity_request_sink` is used to handle outgoing requests to start or cancel
|
56
|
+
/// local activities, and may return resolutions that need to be handled immediately.
|
109
57
|
///
|
110
|
-
///
|
111
|
-
///
|
112
|
-
///
|
58
|
+
/// The stream inputs are combined into a stream of [WFActStreamInput]s. The stream processor
|
59
|
+
/// then takes action on those inputs, mutating the [WFStream] state, and then may yield
|
60
|
+
/// activations.
|
113
61
|
///
|
114
|
-
///
|
115
|
-
///
|
116
|
-
///
|
62
|
+
/// Importantly, nothing async happens while actually mutating state. This means all changes to
|
63
|
+
/// all workflow state can be represented purely via the stream of inputs, plus the
|
64
|
+
/// calls/retvals from the LA request sink, which is the last unfortunate bit of impurity in
|
65
|
+
/// the design. Eliminating it would be nice, so that all inputs come from the passed-in streams
|
66
|
+
/// and all outputs flow from the return stream, but it's difficult to do so since it would
|
67
|
+
/// require "pausing" in-progress changes to a run while sending & waiting for response from
|
68
|
+
/// local activity management. Likely the best option would be to move the pure state info
|
69
|
+
/// needed to determine immediate responses into LA state machines themselves (out of the LA
|
70
|
+
/// manager), which is a quite substantial change.
|
117
71
|
pub(super) fn build(
|
118
72
|
basics: WorkflowBasics,
|
119
|
-
|
73
|
+
wft_stream: impl Stream<Item = Result<WFTExtractorOutput, tonic::Status>> + Send + 'static,
|
120
74
|
local_rx: impl Stream<Item = LocalInput> + Send + 'static,
|
121
|
-
|
122
|
-
|
123
|
-
+ Send
|
124
|
-
+ Sync
|
125
|
-
+ 'static,
|
126
|
-
) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
|
127
|
-
let wft_semaphore = MeteredSemaphore::new(
|
128
|
-
basics.max_outstanding_wfts,
|
129
|
-
basics.metrics.with_new_attrs([workflow_worker_type()]),
|
130
|
-
MetricsContext::available_task_slots,
|
131
|
-
);
|
132
|
-
let wft_sem_clone = wft_semaphore.clone();
|
133
|
-
let proceeder = move || {
|
134
|
-
let wft_sem_clone = wft_sem_clone.clone();
|
135
|
-
async move { wft_sem_clone.acquire_owned().await.unwrap() }
|
136
|
-
};
|
137
|
-
let poller_wfts = stream_when_allowed(external_wfts, proceeder);
|
138
|
-
let (run_update_tx, run_update_rx) = unbounded_channel();
|
139
|
-
let local_rx = stream::select(
|
140
|
-
local_rx.map(Into::into),
|
141
|
-
UnboundedReceiverStream::new(run_update_rx).map(Into::into),
|
142
|
-
);
|
75
|
+
local_activity_request_sink: impl LocalActivityRequestSink,
|
76
|
+
) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
|
143
77
|
let all_inputs = stream::select_with_strategy(
|
144
|
-
local_rx,
|
145
|
-
|
146
|
-
.map(
|
147
|
-
Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
|
148
|
-
Err(e) => ExternalPollerInputs::PollerError(e),
|
149
|
-
})
|
78
|
+
local_rx.map(Into::into),
|
79
|
+
wft_stream
|
80
|
+
.map(Into::into)
|
150
81
|
.chain(stream::once(async { ExternalPollerInputs::PollerDead }))
|
151
82
|
.map(Into::into)
|
152
83
|
.boxed(),
|
153
84
|
// Priority always goes to the local stream
|
154
85
|
|_: &mut ()| PollNext::Left,
|
155
86
|
);
|
87
|
+
Self::build_internal(all_inputs, basics, local_activity_request_sink)
|
88
|
+
}
|
89
|
+
|
90
|
+
fn build_internal(
|
91
|
+
all_inputs: impl Stream<Item = WFStreamInput>,
|
92
|
+
basics: WorkflowBasics,
|
93
|
+
local_activity_request_sink: impl LocalActivityRequestSink,
|
94
|
+
) -> impl Stream<Item = Result<WFStreamOutput, PollWfError>> {
|
156
95
|
let mut state = WFStream {
|
157
96
|
buffered_polls_need_cache_slot: Default::default(),
|
158
97
|
runs: RunCache::new(
|
159
98
|
basics.max_cached_workflows,
|
160
99
|
basics.namespace.clone(),
|
161
|
-
|
162
|
-
|
100
|
+
basics.server_capabilities.clone(),
|
101
|
+
local_activity_request_sink,
|
163
102
|
basics.metrics.clone(),
|
164
103
|
),
|
165
|
-
client,
|
166
|
-
wft_semaphore,
|
167
104
|
shutdown_token: basics.shutdown_token,
|
105
|
+
ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
|
168
106
|
metrics: basics.metrics,
|
107
|
+
runs_needing_fetching: Default::default(),
|
108
|
+
history_fetch_refcounter: Arc::new(HistfetchRC {}),
|
109
|
+
|
110
|
+
#[cfg(feature = "save_wf_inputs")]
|
111
|
+
wf_state_inputs: basics.wf_state_inputs,
|
169
112
|
};
|
170
113
|
all_inputs
|
171
|
-
.map(move |action| {
|
114
|
+
.map(move |action: WFStreamInput| {
|
172
115
|
let span = span!(Level::DEBUG, "new_stream_input", action=?action);
|
173
116
|
let _span_g = span.enter();
|
174
117
|
|
175
|
-
|
118
|
+
#[cfg(feature = "save_wf_inputs")]
|
119
|
+
let maybe_write = state.prep_input(&action);
|
120
|
+
|
121
|
+
let mut activations = vec![];
|
122
|
+
let maybe_act = match action {
|
176
123
|
WFStreamInput::NewWft(pwft) => {
|
177
|
-
debug!(run_id=%pwft.
|
178
|
-
state.instantiate_or_update(pwft)
|
179
|
-
None
|
124
|
+
debug!(run_id=%pwft.work.execution.run_id, "New WFT");
|
125
|
+
state.instantiate_or_update(pwft)
|
180
126
|
}
|
181
127
|
WFStreamInput::Local(local_input) => {
|
182
128
|
let _span_g = local_input.span.enter();
|
183
|
-
|
184
|
-
|
185
|
-
|
129
|
+
if let Some(rid) = local_input.input.run_id() {
|
130
|
+
if let Some(rh) = state.runs.get_mut(rid) {
|
131
|
+
rh.record_span_fields(&local_input.span);
|
186
132
|
}
|
133
|
+
}
|
134
|
+
match local_input.input {
|
187
135
|
LocalInputs::Completion(completion) => {
|
188
|
-
|
189
|
-
|
136
|
+
activations.extend(
|
137
|
+
state.process_completion(NewOrFetchedComplete::New(completion)),
|
138
|
+
);
|
139
|
+
None // completions can return more than one activation
|
140
|
+
}
|
141
|
+
LocalInputs::FetchedPageCompletion { paginator, update } => {
|
142
|
+
activations.extend(state.process_completion(
|
143
|
+
NewOrFetchedComplete::Fetched(update, paginator),
|
144
|
+
));
|
145
|
+
None // completions can return more than one activation
|
190
146
|
}
|
191
147
|
LocalInputs::PostActivation(report) => {
|
192
|
-
state.process_post_activation(report)
|
193
|
-
None
|
148
|
+
state.process_post_activation(report)
|
194
149
|
}
|
195
|
-
LocalInputs::LocalResolution(res) =>
|
196
|
-
|
197
|
-
|
150
|
+
LocalInputs::LocalResolution(res) => state.local_resolution(res),
|
151
|
+
LocalInputs::HeartbeatTimeout(hbt) => {
|
152
|
+
state.process_heartbeat_timeout(hbt)
|
198
153
|
}
|
199
154
|
LocalInputs::RequestEviction(evict) => {
|
200
|
-
state.request_eviction(evict)
|
201
|
-
None
|
155
|
+
state.request_eviction(evict).into_run_update_resp()
|
202
156
|
}
|
203
157
|
LocalInputs::GetStateInfo(gsi) => {
|
204
158
|
let _ = gsi.response_tx.send(WorkflowStateInfo {
|
205
159
|
cached_workflows: state.runs.len(),
|
206
160
|
outstanding_wft: state.outstanding_wfts(),
|
207
|
-
available_wft_permits: state.wft_semaphore.available_permits(),
|
208
161
|
});
|
209
162
|
None
|
210
163
|
}
|
211
164
|
}
|
212
165
|
}
|
166
|
+
WFStreamInput::FailedFetch { run_id, err } => state
|
167
|
+
.request_eviction(RequestEvictMsg {
|
168
|
+
run_id,
|
169
|
+
message: format!("Fetching history failed: {err:?}"),
|
170
|
+
reason: EvictionReason::PaginationOrHistoryFetch,
|
171
|
+
})
|
172
|
+
.into_run_update_resp(),
|
213
173
|
WFStreamInput::PollerDead => {
|
214
|
-
debug!("WFT poller died,
|
174
|
+
debug!("WFT poller died, beginning shutdown");
|
215
175
|
state.shutdown_token.cancel();
|
216
176
|
None
|
217
177
|
}
|
@@ -221,457 +181,228 @@ impl WFStream {
|
|
221
181
|
}
|
222
182
|
};
|
223
183
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
184
|
+
activations.extend(maybe_act.into_iter());
|
185
|
+
activations.extend(state.reconcile_buffered());
|
186
|
+
|
187
|
+
// Always flush *after* actually handling the input, as this allows LA sink
|
188
|
+
// responses to be recorded before the input, so they can be read and buffered to be
|
189
|
+
// replayed during the handling of the input itself.
|
190
|
+
#[cfg(feature = "save_wf_inputs")]
|
191
|
+
if let Some(write) = maybe_write {
|
192
|
+
state.flush_write(write);
|
230
193
|
}
|
231
|
-
|
194
|
+
|
232
195
|
if state.shutdown_done() {
|
196
|
+
info!("Workflow shutdown is done");
|
233
197
|
return Err(PollWfError::ShutDown);
|
234
198
|
}
|
235
199
|
|
236
|
-
Ok(
|
200
|
+
Ok(WFStreamOutput {
|
201
|
+
activations: activations.into(),
|
202
|
+
fetch_histories: std::mem::take(&mut state.runs_needing_fetching),
|
203
|
+
})
|
237
204
|
})
|
238
|
-
.
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
Err(e) => {
|
243
|
-
if !matches!(e, PollWfError::ShutDown) {
|
244
|
-
error!(
|
205
|
+
.inspect(|o| {
|
206
|
+
if let Some(e) = o.as_ref().err() {
|
207
|
+
if !matches!(e, PollWfError::ShutDown) {
|
208
|
+
error!(
|
245
209
|
"Workflow processing encountered fatal error and must shut down {:?}",
|
246
210
|
e
|
247
|
-
|
248
|
-
}
|
249
|
-
Some(Err(e))
|
211
|
+
);
|
250
212
|
}
|
251
|
-
}
|
213
|
+
}
|
252
214
|
})
|
253
215
|
// Stop the stream once we have shut down
|
254
216
|
.take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
|
255
217
|
}
|
256
218
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
match
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
.get_mut(&resp.run_id)
|
267
|
-
.expect("Workflow must exist, it just sent us an update response");
|
268
|
-
run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
|
269
|
-
run_handle.more_pending_work = resp.more_pending_work;
|
270
|
-
run_handle.last_action_acked = true;
|
271
|
-
run_handle.most_recently_processed_event_number =
|
272
|
-
resp.most_recently_processed_event_number;
|
273
|
-
|
274
|
-
let r = match resp.outgoing_activation {
|
275
|
-
Some(ActivationOrAuto::LangActivation(mut activation)) => {
|
276
|
-
if resp.in_response_to_wft {
|
277
|
-
let wft = run_handle
|
278
|
-
.wft
|
279
|
-
.as_mut()
|
280
|
-
.expect("WFT must exist for run just updated with one");
|
281
|
-
// If there are in-poll queries, insert jobs for those queries into the
|
282
|
-
// activation, but only if we hit the cache. If we didn't, those queries
|
283
|
-
// will need to be dealt with once replay is over
|
284
|
-
if !wft.pending_queries.is_empty() && wft.hit_cache {
|
285
|
-
put_queries_in_act(&mut activation, wft);
|
286
|
-
}
|
287
|
-
}
|
288
|
-
|
289
|
-
if activation.jobs.is_empty() {
|
290
|
-
dbg_panic!("Should not send lang activation with no jobs");
|
291
|
-
}
|
292
|
-
Some(ActivationOrAuto::LangActivation(activation))
|
293
|
-
}
|
294
|
-
Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
|
295
|
-
if let Some(wft) = run_handle.wft.as_mut() {
|
296
|
-
put_queries_in_act(&mut act, wft);
|
297
|
-
Some(ActivationOrAuto::LangActivation(act))
|
298
|
-
} else {
|
299
|
-
dbg_panic!("Ready for queries but no WFT!");
|
300
|
-
None
|
301
|
-
}
|
302
|
-
}
|
303
|
-
a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
|
304
|
-
None => {
|
305
|
-
// If the response indicates there is no activation to send yet but there
|
306
|
-
// is more pending work, we should check again.
|
307
|
-
if run_handle.more_pending_work {
|
308
|
-
run_handle.check_more_activations();
|
309
|
-
None
|
310
|
-
} else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
|
311
|
-
// If a run update came back and had nothing to do, but we're trying to
|
312
|
-
// evict, just do that now as long as there's no other outstanding work.
|
313
|
-
if run_handle.activation.is_none() && !run_handle.more_pending_work {
|
314
|
-
let mut evict_act = create_evict_activation(
|
315
|
-
resp.run_id,
|
316
|
-
reason.message.clone(),
|
317
|
-
reason.reason,
|
318
|
-
);
|
319
|
-
evict_act.history_length =
|
320
|
-
run_handle.most_recently_processed_event_number as u32;
|
321
|
-
Some(ActivationOrAuto::LangActivation(evict_act))
|
322
|
-
} else {
|
323
|
-
None
|
324
|
-
}
|
325
|
-
} else {
|
326
|
-
None
|
327
|
-
}
|
328
|
-
}
|
329
|
-
};
|
330
|
-
if let Some(f) = resp.fulfillable_complete.take() {
|
331
|
-
f.fulfill();
|
332
|
-
}
|
333
|
-
|
334
|
-
// After each run update, check if it's ready to handle any buffered poll
|
335
|
-
if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
|
336
|
-
&& !run_handle.has_any_pending_work(false, true)
|
337
|
-
{
|
338
|
-
if let Some(bufft) = run_handle.buffered_resp.take() {
|
339
|
-
self.instantiate_or_update(bufft);
|
340
|
-
}
|
341
|
-
}
|
342
|
-
r
|
343
|
-
}
|
344
|
-
RunUpdateResponseKind::Fail(fail) => {
|
345
|
-
if let Some(r) = self.runs.get_mut(&fail.run_id) {
|
346
|
-
r.last_action_acked = true;
|
347
|
-
}
|
348
|
-
|
349
|
-
if let Some(resp_chan) = fail.completion_resp {
|
350
|
-
// Automatically fail the workflow task in the event we couldn't update machines
|
351
|
-
let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
|
352
|
-
WorkflowTaskFailedCause::NonDeterministicError
|
353
|
-
} else {
|
354
|
-
WorkflowTaskFailedCause::Unspecified
|
355
|
-
};
|
356
|
-
let wft_fail_str = format!("{:?}", fail.err);
|
357
|
-
self.failed_completion(
|
358
|
-
fail.run_id,
|
359
|
-
fail_cause,
|
360
|
-
fail.err.evict_reason(),
|
361
|
-
TFailure::application_failure(wft_fail_str, false).into(),
|
362
|
-
resp_chan,
|
363
|
-
);
|
364
|
-
} else {
|
365
|
-
// TODO: This should probably also fail workflow tasks, but that wasn't
|
366
|
-
// implemented pre-refactor either.
|
367
|
-
warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
|
368
|
-
self.request_eviction(RequestEvictMsg {
|
369
|
-
run_id: fail.run_id,
|
370
|
-
message: format!("Error while updating workflow: {:?}", fail.err),
|
371
|
-
reason: fail.err.evict_reason(),
|
372
|
-
});
|
373
|
-
}
|
374
|
-
None
|
219
|
+
/// Instantiate or update run machines with a new WFT
|
220
|
+
#[instrument(skip(self, pwft)
|
221
|
+
fields(run_id=%pwft.work.execution.run_id,
|
222
|
+
workflow_id=%pwft.work.execution.workflow_id))]
|
223
|
+
fn instantiate_or_update(&mut self, pwft: PermittedWFT) -> RunUpdateAct {
|
224
|
+
match self._instantiate_or_update(pwft) {
|
225
|
+
Err(histfetch) => {
|
226
|
+
self.runs_needing_fetching.push_back(histfetch);
|
227
|
+
Default::default()
|
375
228
|
}
|
229
|
+
Ok(r) => r,
|
376
230
|
}
|
377
231
|
}
|
378
232
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
233
|
+
fn _instantiate_or_update(
|
234
|
+
&mut self,
|
235
|
+
pwft: PermittedWFT,
|
236
|
+
) -> Result<RunUpdateAct, HistoryFetchReq> {
|
237
|
+
// If the run already exists, possibly buffer the work and return early if we can't handle
|
238
|
+
// it yet.
|
239
|
+
let pwft = if let Some(rh) = self.runs.get_mut(&pwft.work.execution.run_id) {
|
240
|
+
if let Some(w) = rh.buffer_wft_if_outstanding_work(pwft) {
|
241
|
+
w
|
242
|
+
} else {
|
243
|
+
return Ok(None);
|
244
|
+
}
|
384
245
|
} else {
|
385
|
-
|
246
|
+
pwft
|
386
247
|
};
|
387
248
|
|
388
|
-
let run_id = work.
|
249
|
+
let run_id = pwft.work.execution.run_id.clone();
|
389
250
|
// If our cache is full and this WFT is for an unseen run we must first evict a run before
|
390
251
|
// we can deal with this task. So, buffer the task in that case.
|
391
252
|
if !self.runs.has_run(&run_id) && self.runs.is_full() {
|
392
|
-
self.buffer_resp_on_full_cache(
|
393
|
-
return;
|
253
|
+
self.buffer_resp_on_full_cache(pwft);
|
254
|
+
return Ok(None);
|
394
255
|
}
|
395
256
|
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
history_length = %work.history.events.len(),
|
401
|
-
start_event_id = ?start_event_id,
|
402
|
-
has_legacy_query = %work.legacy_query.is_some(),
|
403
|
-
attempt = %work.attempt,
|
404
|
-
"Applying new workflow task from server"
|
405
|
-
);
|
406
|
-
|
407
|
-
let wft_info = WorkflowTaskInfo {
|
408
|
-
attempt: work.attempt,
|
409
|
-
task_token: work.task_token,
|
410
|
-
};
|
411
|
-
let poll_resp_is_incremental = work
|
412
|
-
.history
|
413
|
-
.events
|
414
|
-
.get(0)
|
415
|
-
.map(|ev| ev.event_id > 1)
|
416
|
-
.unwrap_or_default();
|
417
|
-
let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
|
418
|
-
|
419
|
-
let mut did_miss_cache = !poll_resp_is_incremental;
|
420
|
-
|
421
|
-
let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
|
257
|
+
// This check can't really be lifted up higher since we could EX: See it's in the cache,
|
258
|
+
// not fetch more history, send the task, see cache is full, buffer it, then evict that
|
259
|
+
// run, and now we still have a cache miss.
|
260
|
+
if !self.runs.has_run(&run_id) && pwft.work.is_incremental() {
|
422
261
|
debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
|
423
262
|
cache. Will fetch history");
|
424
263
|
self.metrics.sticky_cache_miss();
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
};
|
430
|
-
let history_update = HistoryUpdate::new(
|
431
|
-
HistoryPaginator::new(
|
432
|
-
work.history,
|
433
|
-
work.workflow_execution.workflow_id.clone(),
|
434
|
-
run_id.clone(),
|
435
|
-
page_token,
|
436
|
-
self.client.clone(),
|
437
|
-
),
|
438
|
-
work.previous_started_event_id,
|
439
|
-
);
|
440
|
-
let legacy_query_from_poll = work
|
441
|
-
.legacy_query
|
442
|
-
.take()
|
443
|
-
.map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
|
444
|
-
|
445
|
-
let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
|
446
|
-
if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
|
447
|
-
error!(
|
448
|
-
"Server issued both normal and legacy queries. This should not happen. Please \
|
449
|
-
file a bug report."
|
450
|
-
);
|
451
|
-
self.request_eviction(RequestEvictMsg {
|
452
|
-
run_id,
|
453
|
-
message: "Server issued both normal and legacy query".to_string(),
|
454
|
-
reason: EvictionReason::Fatal,
|
455
|
-
});
|
456
|
-
return;
|
457
|
-
}
|
458
|
-
if let Some(lq) = legacy_query_from_poll {
|
459
|
-
pending_queries.push(lq);
|
264
|
+
return Err(HistoryFetchReq::Full(
|
265
|
+
CacheMissFetchReq { original_wft: pwft },
|
266
|
+
self.history_fetch_refcounter.clone(),
|
267
|
+
));
|
460
268
|
}
|
461
269
|
|
462
|
-
let
|
463
|
-
|
464
|
-
&run_id,
|
465
|
-
&work.workflow_execution.workflow_id,
|
466
|
-
&work.workflow_type,
|
467
|
-
history_update,
|
468
|
-
start_time,
|
469
|
-
);
|
470
|
-
run_handle.wft = Some(OutstandingTask {
|
471
|
-
info: wft_info,
|
472
|
-
hit_cache: !did_miss_cache,
|
473
|
-
pending_queries,
|
474
|
-
start_time,
|
475
|
-
permit,
|
476
|
-
})
|
270
|
+
let rur = self.runs.instantiate_or_update(pwft);
|
271
|
+
Ok(rur)
|
477
272
|
}
|
478
273
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
274
|
+
fn process_completion(&mut self, complete: NewOrFetchedComplete) -> Vec<ActivationOrAuto> {
|
275
|
+
let rh = if let Some(rh) = self.runs.get_mut(complete.run_id()) {
|
276
|
+
rh
|
277
|
+
} else {
|
278
|
+
dbg_panic!("Run missing during completion {:?}", complete);
|
279
|
+
return vec![];
|
280
|
+
};
|
281
|
+
let mut acts: Vec<_> = match complete {
|
282
|
+
NewOrFetchedComplete::New(complete) => match complete.completion {
|
283
|
+
ValidatedCompletion::Success {
|
284
|
+
commands,
|
285
|
+
used_flags,
|
286
|
+
..
|
287
|
+
} => match rh.successful_completion(commands, used_flags, complete.response_tx) {
|
288
|
+
Ok(acts) => acts,
|
289
|
+
Err(npr) => {
|
290
|
+
self.runs_needing_fetching
|
291
|
+
.push_back(HistoryFetchReq::NextPage(
|
292
|
+
npr,
|
293
|
+
self.history_fetch_refcounter.clone(),
|
294
|
+
));
|
295
|
+
None
|
296
|
+
}
|
297
|
+
},
|
298
|
+
ValidatedCompletion::Fail { failure, .. } => rh.failed_completion(
|
299
|
+
failure.force_cause(),
|
490
300
|
EvictionReason::LangFail,
|
491
301
|
failure,
|
492
302
|
complete.response_tx,
|
493
|
-
)
|
303
|
+
),
|
304
|
+
},
|
305
|
+
NewOrFetchedComplete::Fetched(update, paginator) => {
|
306
|
+
rh.fetched_page_completion(update, paginator)
|
494
307
|
}
|
495
308
|
}
|
309
|
+
.into_iter()
|
310
|
+
.collect();
|
496
311
|
// Always queue evictions after completion when we have a zero-size cache
|
497
312
|
if self.runs.cache_capacity() == 0 {
|
498
|
-
self.request_eviction_of_lru_run()
|
313
|
+
acts.extend(self.request_eviction_of_lru_run().into_run_update_resp())
|
499
314
|
}
|
315
|
+
acts
|
500
316
|
}
|
501
317
|
|
502
|
-
fn
|
503
|
-
&
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
(
|
512
|
-
entry.info.task_token.clone(),
|
513
|
-
!entry.pending_queries.is_empty(),
|
514
|
-
entry.start_time,
|
515
|
-
)
|
516
|
-
} else {
|
517
|
-
if !activation_was_only_eviction {
|
518
|
-
// Not an error if this was an eviction, since it's normal to issue eviction
|
519
|
-
// activations without an associated workflow task in that case.
|
520
|
-
dbg_panic!(
|
521
|
-
"Attempted to complete activation for run {} without associated workflow task",
|
318
|
+
fn process_post_activation(&mut self, report: PostActivationMsg) -> RunUpdateAct {
|
319
|
+
let run_id = &report.run_id;
|
320
|
+
let wft_from_complete = report.wft_from_complete;
|
321
|
+
if let Some((wft, _)) = &wft_from_complete {
|
322
|
+
if &wft.execution.run_id != run_id {
|
323
|
+
dbg_panic!(
|
324
|
+
"Server returned a WFT on completion for a different run ({}) than the \
|
325
|
+
one being completed ({}). This is a server bug.",
|
326
|
+
wft.execution.run_id,
|
522
327
|
run_id
|
523
|
-
|
524
|
-
}
|
525
|
-
self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
|
526
|
-
return;
|
527
|
-
};
|
528
|
-
|
529
|
-
// If the only command from the activation is a legacy query response, that means we need
|
530
|
-
// to respond differently than a typical activation.
|
531
|
-
if matches!(&commands.as_slice(),
|
532
|
-
&[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
|
533
|
-
{
|
534
|
-
let qr = match commands.remove(0) {
|
535
|
-
WFCommand::QueryResponse(qr) => qr,
|
536
|
-
_ => unreachable!("We just verified this is the only command"),
|
537
|
-
};
|
538
|
-
self.reply_to_complete(
|
539
|
-
&run_id,
|
540
|
-
ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
|
541
|
-
task_token,
|
542
|
-
action: ActivationAction::RespondLegacyQuery {
|
543
|
-
result: Box::new(qr),
|
544
|
-
},
|
545
|
-
}),
|
546
|
-
resp_chan,
|
547
|
-
);
|
548
|
-
} else {
|
549
|
-
// First strip out query responses from other commands that actually affect machines
|
550
|
-
// Would be prettier with `drain_filter`
|
551
|
-
let mut i = 0;
|
552
|
-
let mut query_responses = vec![];
|
553
|
-
while i < commands.len() {
|
554
|
-
if matches!(commands[i], WFCommand::QueryResponse(_)) {
|
555
|
-
if let WFCommand::QueryResponse(qr) = commands.remove(i) {
|
556
|
-
query_responses.push(qr);
|
557
|
-
}
|
558
|
-
} else {
|
559
|
-
i += 1;
|
560
|
-
}
|
561
|
-
}
|
562
|
-
|
563
|
-
let activation_was_eviction = self.activation_has_eviction(&run_id);
|
564
|
-
if let Some(rh) = self.runs.get_mut(&run_id) {
|
565
|
-
rh.send_completion(RunActivationCompletion {
|
566
|
-
task_token,
|
567
|
-
start_time,
|
568
|
-
commands,
|
569
|
-
activation_was_eviction,
|
570
|
-
activation_was_only_eviction,
|
571
|
-
has_pending_query,
|
572
|
-
query_responses,
|
573
|
-
resp_chan: Some(resp_chan),
|
574
|
-
});
|
575
|
-
} else {
|
576
|
-
dbg_panic!("Run {} missing during completion", run_id);
|
328
|
+
);
|
577
329
|
}
|
578
|
-
};
|
579
|
-
}
|
580
|
-
|
581
|
-
fn failed_completion(
|
582
|
-
&mut self,
|
583
|
-
run_id: String,
|
584
|
-
cause: WorkflowTaskFailedCause,
|
585
|
-
reason: EvictionReason,
|
586
|
-
failure: Failure,
|
587
|
-
resp_chan: oneshot::Sender<ActivationCompleteResult>,
|
588
|
-
) {
|
589
|
-
let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
|
590
|
-
tt
|
591
|
-
} else {
|
592
|
-
dbg_panic!(
|
593
|
-
"No workflow task for run id {} found when trying to fail activation",
|
594
|
-
run_id
|
595
|
-
);
|
596
|
-
self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
|
597
|
-
return;
|
598
|
-
};
|
599
|
-
|
600
|
-
if let Some(m) = self.run_metrics(&run_id) {
|
601
|
-
m.wf_task_failed();
|
602
330
|
}
|
603
|
-
let message = format!("Workflow activation completion failed: {:?}", &failure);
|
604
|
-
// Blow up any cached data associated with the workflow
|
605
|
-
let should_report = match self.request_eviction(RequestEvictMsg {
|
606
|
-
run_id: run_id.clone(),
|
607
|
-
message,
|
608
|
-
reason,
|
609
|
-
}) {
|
610
|
-
EvictionRequestResult::EvictionRequested(Some(attempt))
|
611
|
-
| EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
|
612
|
-
_ => false,
|
613
|
-
};
|
614
|
-
// If the outstanding WFT is a legacy query task, report that we need to fail it
|
615
|
-
let outcome = if self
|
616
|
-
.runs
|
617
|
-
.get(&run_id)
|
618
|
-
.map(|rh| rh.pending_work_is_legacy_query())
|
619
|
-
.unwrap_or_default()
|
620
|
-
{
|
621
|
-
ActivationCompleteOutcome::ReportWFTFail(
|
622
|
-
FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
|
623
|
-
)
|
624
|
-
} else if should_report {
|
625
|
-
ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
|
626
|
-
tt, cause, failure,
|
627
|
-
))
|
628
|
-
} else {
|
629
|
-
ActivationCompleteOutcome::DoNothing
|
630
|
-
};
|
631
|
-
self.reply_to_complete(&run_id, outcome, resp_chan);
|
632
|
-
}
|
633
331
|
|
634
|
-
|
635
|
-
let run_id = &report.run_id;
|
332
|
+
let mut res = None;
|
636
333
|
|
637
334
|
// If we reported to server, we always want to mark it complete.
|
638
|
-
let maybe_t = self.complete_wft(run_id, report.
|
335
|
+
let maybe_t = self.complete_wft(run_id, report.wft_report_status);
|
336
|
+
// Delete the activation
|
337
|
+
let activation = self
|
338
|
+
.runs
|
339
|
+
.get_mut(run_id)
|
340
|
+
.and_then(|rh| rh.delete_activation());
|
341
|
+
|
342
|
+
// Evict the run if the activation contained an eviction
|
343
|
+
let mut applied_buffered_poll_for_this_run = false;
|
344
|
+
if activation.map(|a| a.has_eviction()).unwrap_or_default() {
|
345
|
+
debug!(run_id=%run_id, "Evicting run");
|
346
|
+
|
347
|
+
if let Some(mut rh) = self.runs.remove(run_id) {
|
348
|
+
if let Some(buff) = rh.take_buffered_wft() {
|
349
|
+
// Don't try to apply a buffered poll for this run if we just got a new WFT
|
350
|
+
// from completing, because by definition that buffered poll is now an
|
351
|
+
// out-of-date WFT.
|
352
|
+
if wft_from_complete.is_none() {
|
353
|
+
res = self.instantiate_or_update(buff);
|
354
|
+
applied_buffered_poll_for_this_run = true;
|
355
|
+
}
|
356
|
+
}
|
357
|
+
}
|
639
358
|
|
640
|
-
|
641
|
-
.
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
359
|
+
// Attempt to apply a buffered poll for some *other* run, if we didn't have a wft
|
360
|
+
// from complete or a buffered poll for *this* run.
|
361
|
+
if wft_from_complete.is_none() && !applied_buffered_poll_for_this_run {
|
362
|
+
if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
|
363
|
+
res = self.instantiate_or_update(buff);
|
364
|
+
}
|
365
|
+
}
|
646
366
|
};
|
647
367
|
|
648
|
-
if let Some(wft) =
|
649
|
-
debug!(run_id=%wft.
|
368
|
+
if let Some((wft, pag)) = wft_from_complete {
|
369
|
+
debug!(run_id=%wft.execution.run_id, "New WFT from completion");
|
650
370
|
if let Some(t) = maybe_t {
|
651
|
-
self.instantiate_or_update(PermittedWFT {
|
652
|
-
wft,
|
371
|
+
res = self.instantiate_or_update(PermittedWFT {
|
372
|
+
work: wft,
|
653
373
|
permit: t.permit,
|
654
|
-
|
374
|
+
paginator: pag,
|
375
|
+
});
|
655
376
|
}
|
656
377
|
}
|
657
378
|
|
658
|
-
if
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
379
|
+
if res.is_none() {
|
380
|
+
if let Some(rh) = self.runs.get_mut(run_id) {
|
381
|
+
// Attempt to produce the next activation if needed
|
382
|
+
res = rh.check_more_activations();
|
383
|
+
}
|
663
384
|
}
|
385
|
+
res
|
664
386
|
}
|
665
387
|
|
666
|
-
fn local_resolution(&mut self, msg: LocalResolutionMsg) {
|
388
|
+
fn local_resolution(&mut self, msg: LocalResolutionMsg) -> RunUpdateAct {
|
667
389
|
let run_id = msg.run_id;
|
668
390
|
if let Some(rh) = self.runs.get_mut(&run_id) {
|
669
|
-
rh.
|
391
|
+
rh.local_resolution(msg.res)
|
670
392
|
} else {
|
671
393
|
// It isn't an explicit error if the machine is missing when a local activity resolves.
|
672
394
|
// This can happen if an activity reports a timeout after we stopped caring about it.
|
673
395
|
debug!(run_id = %run_id,
|
674
396
|
"Tried to resolve a local activity for a run we are no longer tracking");
|
397
|
+
None
|
398
|
+
}
|
399
|
+
}
|
400
|
+
|
401
|
+
fn process_heartbeat_timeout(&mut self, run_id: String) -> RunUpdateAct {
|
402
|
+
if let Some(rh) = self.runs.get_mut(&run_id) {
|
403
|
+
rh.heartbeat_timeout()
|
404
|
+
} else {
|
405
|
+
None
|
675
406
|
}
|
676
407
|
}
|
677
408
|
|
@@ -679,19 +410,10 @@ impl WFStream {
|
|
679
410
|
/// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
|
680
411
|
/// until lang replies to that activation
|
681
412
|
fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
|
682
|
-
let activation_has_eviction = self.activation_has_eviction(&info.run_id);
|
683
413
|
if let Some(rh) = self.runs.get_mut(&info.run_id) {
|
684
|
-
|
685
|
-
if !activation_has_eviction && rh.trying_to_evict.is_none() {
|
686
|
-
debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
|
687
|
-
rh.trying_to_evict = Some(info);
|
688
|
-
rh.check_more_activations();
|
689
|
-
EvictionRequestResult::EvictionRequested(attempts)
|
690
|
-
} else {
|
691
|
-
EvictionRequestResult::EvictionAlreadyRequested(attempts)
|
692
|
-
}
|
414
|
+
rh.request_eviction(info)
|
693
415
|
} else {
|
694
|
-
|
416
|
+
debug!(run_id=%info.run_id, "Eviction requested for unknown run");
|
695
417
|
EvictionRequestResult::NotFound
|
696
418
|
}
|
697
419
|
}
|
@@ -710,36 +432,10 @@ impl WFStream {
|
|
710
432
|
}
|
711
433
|
}
|
712
434
|
|
713
|
-
/// Evict a workflow from the cache by its run id. Any existing pending activations will be
|
714
|
-
/// destroyed, and any outstanding activations invalidated.
|
715
|
-
fn evict_run(&mut self, run_id: &str) {
|
716
|
-
debug!(run_id=%run_id, "Evicting run");
|
717
|
-
|
718
|
-
let mut did_take_buff = false;
|
719
|
-
// Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
|
720
|
-
// there was one.
|
721
|
-
if let Some(mut rh) = self.runs.remove(run_id) {
|
722
|
-
rh.handle.abort();
|
723
|
-
|
724
|
-
if let Some(buff) = rh.buffered_resp.take() {
|
725
|
-
self.instantiate_or_update(buff);
|
726
|
-
did_take_buff = true;
|
727
|
-
}
|
728
|
-
}
|
729
|
-
|
730
|
-
if !did_take_buff {
|
731
|
-
// If there wasn't a buffered poll, there might be one for a different run which needs
|
732
|
-
// a free cache slot, and now there is.
|
733
|
-
if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
|
734
|
-
self.instantiate_or_update(buff);
|
735
|
-
}
|
736
|
-
}
|
737
|
-
}
|
738
|
-
|
739
435
|
fn complete_wft(
|
740
436
|
&mut self,
|
741
437
|
run_id: &str,
|
742
|
-
|
438
|
+
wft_report_status: WFTReportStatus,
|
743
439
|
) -> Option<OutstandingTask> {
|
744
440
|
// If the WFT completion wasn't sent to the server, but we did see the final event, we still
|
745
441
|
// want to clear the workflow task. This can really only happen in replay testing, where we
|
@@ -749,9 +445,9 @@ impl WFStream {
|
|
749
445
|
let saw_final = self
|
750
446
|
.runs
|
751
447
|
.get(run_id)
|
752
|
-
.map(|r| r.have_seen_terminal_event)
|
448
|
+
.map(|r| r.have_seen_terminal_event())
|
753
449
|
.unwrap_or_default();
|
754
|
-
if !saw_final && !
|
450
|
+
if !saw_final && matches!(wft_report_status, WFTReportStatus::NotReported) {
|
755
451
|
return None;
|
756
452
|
}
|
757
453
|
|
@@ -759,60 +455,26 @@ impl WFStream {
|
|
759
455
|
// Can't mark the WFT complete if there are pending queries, as doing so would destroy
|
760
456
|
// them.
|
761
457
|
if rh
|
762
|
-
.wft
|
763
|
-
.as_ref()
|
458
|
+
.wft()
|
764
459
|
.map(|wft| !wft.pending_queries.is_empty())
|
765
460
|
.unwrap_or_default()
|
766
461
|
{
|
767
462
|
return None;
|
768
463
|
}
|
769
464
|
|
770
|
-
|
771
|
-
let retme = rh.wft.take();
|
772
|
-
if let Some(ot) = &retme {
|
773
|
-
if let Some(m) = self.run_metrics(run_id) {
|
774
|
-
m.wf_task_latency(ot.start_time.elapsed());
|
775
|
-
}
|
776
|
-
}
|
777
|
-
retme
|
465
|
+
rh.mark_wft_complete(wft_report_status)
|
778
466
|
} else {
|
779
467
|
None
|
780
468
|
}
|
781
469
|
}
|
782
470
|
|
783
|
-
/// Stores some work if there is any outstanding WFT or activation for the run. If there was
|
784
|
-
/// not, returns the work back out inside the option.
|
785
|
-
fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
|
786
|
-
let run_id = &work.wft.workflow_execution.run_id;
|
787
|
-
if let Some(mut run) = self.runs.get_mut(run_id) {
|
788
|
-
let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
|
789
|
-
let has_wft = run.wft.is_some();
|
790
|
-
let has_activation = run.activation.is_some();
|
791
|
-
if has_wft
|
792
|
-
|| has_activation
|
793
|
-
|| about_to_issue_evict
|
794
|
-
|| run.more_pending_work
|
795
|
-
|| !run.last_action_acked
|
796
|
-
{
|
797
|
-
debug!(run_id = %run_id, run = ?run,
|
798
|
-
"Got new WFT for a run with outstanding work, buffering it");
|
799
|
-
run.buffered_resp = Some(work);
|
800
|
-
None
|
801
|
-
} else {
|
802
|
-
Some(work)
|
803
|
-
}
|
804
|
-
} else {
|
805
|
-
Some(work)
|
806
|
-
}
|
807
|
-
}
|
808
|
-
|
809
471
|
fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
|
810
|
-
debug!(run_id=%work.
|
472
|
+
debug!(run_id=%work.work.execution.run_id, "Buffering WFT because cache is full");
|
811
473
|
// If there's already a buffered poll for the run, replace it.
|
812
474
|
if let Some(rh) = self
|
813
475
|
.buffered_polls_need_cache_slot
|
814
476
|
.iter_mut()
|
815
|
-
.find(|w| w.
|
477
|
+
.find(|w| w.work.execution.run_id == work.work.execution.run_id)
|
816
478
|
{
|
817
479
|
*rh = work;
|
818
480
|
} else {
|
@@ -823,7 +485,7 @@ impl WFStream {
|
|
823
485
|
|
824
486
|
/// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
|
825
487
|
/// waiting on a cache slot
|
826
|
-
fn reconcile_buffered(&mut self) {
|
488
|
+
fn reconcile_buffered(&mut self) -> Vec<ActivationOrAuto> {
|
827
489
|
// We must ensure that there are at least as many pending evictions as there are tasks
|
828
490
|
// that we might need to un-buffer (skipping runs which already have buffered tasks for
|
829
491
|
// themselves)
|
@@ -832,109 +494,222 @@ impl WFStream {
|
|
832
494
|
let num_existing_evictions = self
|
833
495
|
.runs
|
834
496
|
.runs_lru_order()
|
835
|
-
.filter(|(_, h)| h.
|
497
|
+
.filter(|(_, h)| h.is_trying_to_evict())
|
836
498
|
.count();
|
837
499
|
let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
|
838
500
|
for (rid, handle) in self.runs.runs_lru_order() {
|
839
501
|
if num_evicts_needed == 0 {
|
840
502
|
break;
|
841
503
|
}
|
842
|
-
if handle.
|
504
|
+
if !handle.has_buffered_wft() {
|
843
505
|
num_evicts_needed -= 1;
|
844
506
|
evict_these.push(rid.to_string());
|
845
507
|
}
|
846
508
|
}
|
509
|
+
let mut acts = vec![];
|
847
510
|
for run_id in evict_these {
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
511
|
+
acts.extend(
|
512
|
+
self.request_eviction(RequestEvictMsg {
|
513
|
+
run_id,
|
514
|
+
message: "Workflow cache full".to_string(),
|
515
|
+
reason: EvictionReason::CacheFull,
|
516
|
+
})
|
517
|
+
.into_run_update_resp(),
|
518
|
+
);
|
853
519
|
}
|
854
|
-
|
855
|
-
|
856
|
-
fn reply_to_complete(
|
857
|
-
&self,
|
858
|
-
run_id: &str,
|
859
|
-
outcome: ActivationCompleteOutcome,
|
860
|
-
chan: oneshot::Sender<ActivationCompleteResult>,
|
861
|
-
) {
|
862
|
-
let most_recently_processed_event = self
|
863
|
-
.runs
|
864
|
-
.peek(run_id)
|
865
|
-
.map(|rh| rh.most_recently_processed_event_number)
|
866
|
-
.unwrap_or_default();
|
867
|
-
chan.send(ActivationCompleteResult {
|
868
|
-
most_recently_processed_event,
|
869
|
-
outcome,
|
870
|
-
})
|
871
|
-
.expect("Rcv half of activation reply not dropped");
|
520
|
+
acts
|
872
521
|
}
|
873
522
|
|
874
523
|
fn shutdown_done(&self) -> bool {
|
875
|
-
|
876
|
-
.
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
524
|
+
if self.shutdown_token.is_cancelled() {
|
525
|
+
if Arc::strong_count(&self.history_fetch_refcounter) > 1 {
|
526
|
+
// Don't exit if there are outstanding fetch requests
|
527
|
+
return false;
|
528
|
+
}
|
529
|
+
let all_runs_ready = self
|
530
|
+
.runs
|
531
|
+
.handles()
|
532
|
+
.all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
|
533
|
+
if all_runs_ready {
|
534
|
+
return true;
|
535
|
+
}
|
884
536
|
}
|
885
|
-
|
886
|
-
|
887
|
-
fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
|
888
|
-
self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
|
889
|
-
}
|
890
|
-
|
891
|
-
fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
|
892
|
-
self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
|
893
|
-
}
|
894
|
-
|
895
|
-
fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
|
896
|
-
self.runs.get(run_id).map(|r| &r.metrics)
|
897
|
-
}
|
898
|
-
|
899
|
-
fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
|
900
|
-
self.runs
|
901
|
-
.get(run_id)
|
902
|
-
.and_then(|rh| rh.activation)
|
903
|
-
.map(OutstandingActivation::has_only_eviction)
|
904
|
-
.unwrap_or_default()
|
905
|
-
}
|
906
|
-
|
907
|
-
fn activation_has_eviction(&mut self, run_id: &str) -> bool {
|
908
|
-
self.runs
|
909
|
-
.get(run_id)
|
910
|
-
.and_then(|rh| rh.activation)
|
911
|
-
.map(OutstandingActivation::has_eviction)
|
912
|
-
.unwrap_or_default()
|
537
|
+
false
|
913
538
|
}
|
914
539
|
|
915
540
|
fn outstanding_wfts(&self) -> usize {
|
916
|
-
self.runs.handles().filter(|r| r.wft.is_some()).count()
|
541
|
+
self.runs.handles().filter(|r| r.wft().is_some()).count()
|
917
542
|
}
|
918
543
|
|
919
544
|
// Useful when debugging
|
920
545
|
#[allow(dead_code)]
|
921
546
|
fn info_dump(&self, run_id: &str) {
|
922
547
|
if let Some(r) = self.runs.peek(run_id) {
|
923
|
-
info!(run_id, wft=?r.wft, activation=?r.activation
|
924
|
-
|
925
|
-
|
548
|
+
info!(run_id, wft=?r.wft(), activation=?r.activation(),
|
549
|
+
buffered_wft=r.has_buffered_wft(),
|
550
|
+
trying_to_evict=r.is_trying_to_evict(), more_work=r.more_pending_work());
|
926
551
|
} else {
|
927
552
|
info!(run_id, "Run not found");
|
928
553
|
}
|
929
554
|
}
|
930
555
|
}
|
931
556
|
|
932
|
-
///
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
557
|
+
/// All possible inputs to the [WFStream]
|
558
|
+
#[derive(derive_more::From, Debug)]
|
559
|
+
#[cfg_attr(
|
560
|
+
feature = "save_wf_inputs",
|
561
|
+
derive(serde::Serialize, serde::Deserialize)
|
562
|
+
)]
|
563
|
+
enum WFStreamInput {
|
564
|
+
NewWft(PermittedWFT),
|
565
|
+
Local(LocalInput),
|
566
|
+
/// The stream given to us which represents the poller (or a mock) terminated.
|
567
|
+
PollerDead,
|
568
|
+
/// The stream given to us which represents the poller (or a mock) encountered a non-retryable
|
569
|
+
/// error while polling
|
570
|
+
PollerError(
|
571
|
+
#[cfg_attr(
|
572
|
+
feature = "save_wf_inputs",
|
573
|
+
serde(with = "tonic_status_serde::SerdeStatus")
|
574
|
+
)]
|
575
|
+
tonic::Status,
|
576
|
+
),
|
577
|
+
FailedFetch {
|
578
|
+
run_id: String,
|
579
|
+
#[cfg_attr(
|
580
|
+
feature = "save_wf_inputs",
|
581
|
+
serde(with = "tonic_status_serde::SerdeStatus")
|
582
|
+
)]
|
583
|
+
err: tonic::Status,
|
584
|
+
},
|
585
|
+
}
|
586
|
+
|
587
|
+
/// A non-poller-received input to the [WFStream]
|
588
|
+
#[derive(derive_more::DebugCustom)]
|
589
|
+
#[cfg_attr(
|
590
|
+
feature = "save_wf_inputs",
|
591
|
+
derive(serde::Serialize, serde::Deserialize)
|
592
|
+
)]
|
593
|
+
#[debug(fmt = "LocalInput {{ {input:?} }}")]
|
594
|
+
pub(super) struct LocalInput {
|
595
|
+
pub input: LocalInputs,
|
596
|
+
#[cfg_attr(feature = "save_wf_inputs", serde(skip, default = "Span::current"))]
|
597
|
+
pub span: Span,
|
598
|
+
}
|
599
|
+
impl From<HeartbeatTimeoutMsg> for LocalInput {
|
600
|
+
fn from(hb: HeartbeatTimeoutMsg) -> Self {
|
601
|
+
Self {
|
602
|
+
input: LocalInputs::HeartbeatTimeout(hb.run_id),
|
603
|
+
span: hb.span,
|
604
|
+
}
|
605
|
+
}
|
606
|
+
}
|
607
|
+
/// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
|
608
|
+
/// new polls.
|
609
|
+
#[derive(Debug, derive_more::From)]
|
610
|
+
#[cfg_attr(
|
611
|
+
feature = "save_wf_inputs",
|
612
|
+
derive(serde::Serialize, serde::Deserialize)
|
613
|
+
)]
|
614
|
+
pub(super) enum LocalInputs {
|
615
|
+
Completion(WFActCompleteMsg),
|
616
|
+
FetchedPageCompletion {
|
617
|
+
paginator: HistoryPaginator,
|
618
|
+
update: HistoryUpdate,
|
619
|
+
},
|
620
|
+
LocalResolution(LocalResolutionMsg),
|
621
|
+
PostActivation(PostActivationMsg),
|
622
|
+
RequestEviction(RequestEvictMsg),
|
623
|
+
HeartbeatTimeout(String),
|
624
|
+
#[cfg_attr(feature = "save_wf_inputs", serde(skip))]
|
625
|
+
GetStateInfo(GetStateInfoMsg),
|
626
|
+
}
|
627
|
+
impl LocalInputs {
|
628
|
+
fn run_id(&self) -> Option<&str> {
|
629
|
+
Some(match self {
|
630
|
+
LocalInputs::Completion(c) => c.completion.run_id(),
|
631
|
+
LocalInputs::FetchedPageCompletion { paginator, .. } => &paginator.run_id,
|
632
|
+
LocalInputs::LocalResolution(lr) => &lr.run_id,
|
633
|
+
LocalInputs::PostActivation(pa) => &pa.run_id,
|
634
|
+
LocalInputs::RequestEviction(re) => &re.run_id,
|
635
|
+
LocalInputs::HeartbeatTimeout(hb) => hb,
|
636
|
+
LocalInputs::GetStateInfo(_) => return None,
|
637
|
+
})
|
638
|
+
}
|
639
|
+
}
|
640
|
+
#[derive(Debug)]
|
641
|
+
#[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
|
642
|
+
enum ExternalPollerInputs {
|
643
|
+
NewWft(PermittedWFT),
|
644
|
+
PollerDead,
|
645
|
+
PollerError(tonic::Status),
|
646
|
+
FetchedUpdate(PermittedWFT),
|
647
|
+
NextPage {
|
648
|
+
paginator: HistoryPaginator,
|
649
|
+
update: HistoryUpdate,
|
650
|
+
span: Span,
|
651
|
+
},
|
652
|
+
FailedFetch {
|
653
|
+
run_id: String,
|
654
|
+
err: tonic::Status,
|
655
|
+
},
|
656
|
+
}
|
657
|
+
impl From<ExternalPollerInputs> for WFStreamInput {
|
658
|
+
fn from(l: ExternalPollerInputs) -> Self {
|
659
|
+
match l {
|
660
|
+
ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
|
661
|
+
ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
|
662
|
+
ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
|
663
|
+
ExternalPollerInputs::FetchedUpdate(wft) => WFStreamInput::NewWft(wft),
|
664
|
+
ExternalPollerInputs::FailedFetch { run_id, err } => {
|
665
|
+
WFStreamInput::FailedFetch { run_id, err }
|
666
|
+
}
|
667
|
+
ExternalPollerInputs::NextPage {
|
668
|
+
paginator,
|
669
|
+
update,
|
670
|
+
span,
|
671
|
+
} => WFStreamInput::Local(LocalInput {
|
672
|
+
input: LocalInputs::FetchedPageCompletion { paginator, update },
|
673
|
+
span,
|
674
|
+
}),
|
675
|
+
}
|
676
|
+
}
|
677
|
+
}
|
678
|
+
impl From<Result<WFTExtractorOutput, tonic::Status>> for ExternalPollerInputs {
|
679
|
+
fn from(v: Result<WFTExtractorOutput, tonic::Status>) -> Self {
|
680
|
+
match v {
|
681
|
+
Ok(WFTExtractorOutput::NewWFT(pwft)) => ExternalPollerInputs::NewWft(pwft),
|
682
|
+
Ok(WFTExtractorOutput::FetchResult(updated_wft, _)) => {
|
683
|
+
ExternalPollerInputs::FetchedUpdate(updated_wft)
|
684
|
+
}
|
685
|
+
Ok(WFTExtractorOutput::NextPage {
|
686
|
+
paginator,
|
687
|
+
update,
|
688
|
+
span,
|
689
|
+
rc: _rc,
|
690
|
+
}) => ExternalPollerInputs::NextPage {
|
691
|
+
paginator,
|
692
|
+
update,
|
693
|
+
span,
|
694
|
+
},
|
695
|
+
Ok(WFTExtractorOutput::FailedFetch { run_id, err }) => {
|
696
|
+
ExternalPollerInputs::FailedFetch { run_id, err }
|
697
|
+
}
|
698
|
+
Ok(WFTExtractorOutput::PollerDead) => ExternalPollerInputs::PollerDead,
|
699
|
+
Err(e) => ExternalPollerInputs::PollerError(e),
|
700
|
+
}
|
701
|
+
}
|
702
|
+
}
|
703
|
+
#[derive(Debug)]
|
704
|
+
enum NewOrFetchedComplete {
|
705
|
+
New(WFActCompleteMsg),
|
706
|
+
Fetched(HistoryUpdate, HistoryPaginator),
|
707
|
+
}
|
708
|
+
impl NewOrFetchedComplete {
|
709
|
+
fn run_id(&self) -> &str {
|
710
|
+
match self {
|
711
|
+
NewOrFetchedComplete::New(c) => c.completion.run_id(),
|
712
|
+
NewOrFetchedComplete::Fetched(_, p) => &p.run_id,
|
713
|
+
}
|
714
|
+
}
|
940
715
|
}
|