temporalio 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +180 -7
- data/bridge/Cargo.lock +208 -76
- data/bridge/Cargo.toml +5 -2
- data/bridge/sdk-core/Cargo.toml +1 -1
- data/bridge/sdk-core/README.md +20 -10
- data/bridge/sdk-core/client/Cargo.toml +1 -1
- data/bridge/sdk-core/client/src/lib.rs +227 -59
- data/bridge/sdk-core/client/src/metrics.rs +17 -8
- data/bridge/sdk-core/client/src/raw.rs +13 -12
- data/bridge/sdk-core/client/src/retry.rs +132 -43
- data/bridge/sdk-core/core/Cargo.toml +28 -15
- data/bridge/sdk-core/core/benches/workflow_replay.rs +13 -10
- data/bridge/sdk-core/core/src/abstractions.rs +225 -36
- data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +217 -79
- data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
- data/bridge/sdk-core/core/src/core_tests/local_activities.rs +565 -34
- data/bridge/sdk-core/core/src/core_tests/queries.rs +247 -90
- data/bridge/sdk-core/core/src/core_tests/workers.rs +3 -5
- data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
- data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +430 -67
- data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
- data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
- data/bridge/sdk-core/core/src/lib.rs +148 -34
- data/bridge/sdk-core/core/src/protosext/mod.rs +1 -1
- data/bridge/sdk-core/core/src/replay/mod.rs +185 -41
- data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
- data/bridge/sdk-core/core/src/telemetry/metrics.rs +219 -140
- data/bridge/sdk-core/core/src/telemetry/mod.rs +326 -315
- data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +20 -14
- data/bridge/sdk-core/core/src/test_help/mod.rs +85 -21
- data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
- data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
- data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +364 -128
- data/bridge/sdk-core/core/src/worker/activities.rs +263 -170
- data/bridge/sdk-core/core/src/worker/client/mocks.rs +23 -3
- data/bridge/sdk-core/core/src/worker/client.rs +48 -6
- data/bridge/sdk-core/core/src/worker/mod.rs +186 -75
- data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
- data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +13 -24
- data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +879 -226
- data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +101 -48
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +8 -12
- data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +90 -32
- data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -10
- data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +6 -9
- data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +160 -83
- data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +36 -54
- data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +179 -0
- data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +104 -157
- data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +8 -12
- data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -13
- data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +10 -4
- data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -11
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +395 -299
- data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +12 -20
- data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +33 -18
- data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1032 -374
- data/bridge/sdk-core/core/src/worker/workflow/mod.rs +525 -392
- data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
- data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
- data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +3 -6
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
- data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +456 -681
- data/bridge/sdk-core/core-api/Cargo.toml +6 -4
- data/bridge/sdk-core/core-api/src/errors.rs +1 -34
- data/bridge/sdk-core/core-api/src/lib.rs +7 -45
- data/bridge/sdk-core/core-api/src/telemetry.rs +141 -0
- data/bridge/sdk-core/core-api/src/worker.rs +27 -1
- data/bridge/sdk-core/etc/deps.svg +115 -140
- data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
- data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
- data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
- data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
- data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
- data/bridge/sdk-core/protos/api_upstream/buf.yaml +0 -3
- data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
- data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
- data/bridge/sdk-core/protos/api_upstream/{temporal/api/enums/v1/cluster.proto → build/tools.go} +7 -18
- data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +12 -9
- data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +15 -26
- data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +4 -9
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +10 -8
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +28 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
- data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +62 -26
- data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +24 -61
- data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -21
- data/bridge/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +110 -31
- data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
- data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +4 -4
- data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
- data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +3 -2
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +111 -36
- data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +19 -5
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +9 -0
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +9 -1
- data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
- data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
- data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
- data/bridge/sdk-core/sdk/Cargo.toml +4 -3
- data/bridge/sdk-core/sdk/src/interceptors.rs +36 -3
- data/bridge/sdk-core/sdk/src/lib.rs +94 -25
- data/bridge/sdk-core/sdk/src/workflow_context.rs +13 -2
- data/bridge/sdk-core/sdk/src/workflow_future.rs +10 -13
- data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
- data/bridge/sdk-core/sdk-core-protos/build.rs +36 -2
- data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +164 -104
- data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +27 -23
- data/bridge/sdk-core/sdk-core-protos/src/lib.rs +252 -74
- data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
- data/bridge/sdk-core/test-utils/Cargo.toml +4 -1
- data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
- data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
- data/bridge/sdk-core/test-utils/src/lib.rs +161 -50
- data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
- data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
- data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
- data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
- data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
- data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
- data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +239 -0
- data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -60
- data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
- data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +151 -116
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +54 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +115 -24
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
- data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
- data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +27 -18
- data/bridge/sdk-core/tests/main.rs +8 -16
- data/bridge/sdk-core/tests/runner.rs +75 -36
- data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
- data/bridge/src/connection.rs +117 -82
- data/bridge/src/lib.rs +356 -42
- data/bridge/src/runtime.rs +10 -3
- data/bridge/src/test_server.rs +153 -0
- data/bridge/src/worker.rs +133 -9
- data/lib/gen/temporal/api/batch/v1/message_pb.rb +8 -6
- data/lib/gen/temporal/api/command/v1/message_pb.rb +10 -16
- data/lib/gen/temporal/api/common/v1/message_pb.rb +5 -1
- data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +2 -1
- data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +3 -3
- data/lib/gen/temporal/api/enums/v1/common_pb.rb +2 -1
- data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +5 -4
- data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +9 -1
- data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/query_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/reset_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +1 -1
- data/lib/gen/temporal/api/enums/v1/update_pb.rb +7 -10
- data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +1 -1
- data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/failure/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/filter/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/history/v1/message_pb.rb +34 -25
- data/lib/gen/temporal/api/namespace/v1/message_pb.rb +2 -1
- data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +14 -51
- data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +1 -1
- data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
- data/lib/gen/temporal/api/query/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/replication/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/schedule/v1/message_pb.rb +22 -1
- data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
- data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +2 -2
- data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
- data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
- data/lib/gen/temporal/api/update/v1/message_pb.rb +49 -3
- data/lib/gen/temporal/api/version/v1/message_pb.rb +1 -1
- data/lib/gen/temporal/api/workflow/v1/message_pb.rb +2 -1
- data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +47 -20
- data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +1 -1
- data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
- data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
- data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
- data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
- data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +28 -21
- data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +32 -24
- data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
- data/lib/temporalio/activity/context.rb +102 -0
- data/lib/temporalio/activity/info.rb +67 -0
- data/lib/temporalio/activity.rb +85 -0
- data/lib/temporalio/bridge/connect_options.rb +15 -0
- data/lib/temporalio/bridge/error.rb +8 -0
- data/lib/temporalio/bridge/retry_config.rb +24 -0
- data/lib/temporalio/bridge/tls_options.rb +19 -0
- data/lib/temporalio/bridge.rb +14 -0
- data/lib/{temporal → temporalio}/client/implementation.rb +57 -56
- data/lib/{temporal → temporalio}/client/workflow_handle.rb +35 -35
- data/lib/{temporal → temporalio}/client.rb +19 -32
- data/lib/temporalio/connection/retry_config.rb +44 -0
- data/lib/temporalio/connection/service.rb +20 -0
- data/lib/temporalio/connection/test_service.rb +92 -0
- data/lib/temporalio/connection/tls_options.rb +51 -0
- data/lib/temporalio/connection/workflow_service.rb +731 -0
- data/lib/temporalio/connection.rb +86 -0
- data/lib/{temporal → temporalio}/data_converter.rb +76 -35
- data/lib/{temporal → temporalio}/error/failure.rb +6 -6
- data/lib/{temporal → temporalio}/error/workflow_failure.rb +4 -2
- data/lib/{temporal → temporalio}/errors.rb +19 -1
- data/lib/{temporal → temporalio}/failure_converter/base.rb +5 -5
- data/lib/{temporal → temporalio}/failure_converter/basic.rb +58 -52
- data/lib/temporalio/failure_converter.rb +7 -0
- data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
- data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
- data/lib/{temporal → temporalio}/interceptor/chain.rb +7 -6
- data/lib/{temporal → temporalio}/interceptor/client.rb +27 -2
- data/lib/temporalio/interceptor.rb +22 -0
- data/lib/{temporal → temporalio}/payload_codec/base.rb +5 -5
- data/lib/{temporal → temporalio}/payload_converter/base.rb +3 -3
- data/lib/{temporal → temporalio}/payload_converter/bytes.rb +4 -3
- data/lib/{temporal → temporalio}/payload_converter/composite.rb +7 -5
- data/lib/{temporal → temporalio}/payload_converter/encoding_base.rb +4 -4
- data/lib/{temporal → temporalio}/payload_converter/json.rb +4 -3
- data/lib/{temporal → temporalio}/payload_converter/nil.rb +4 -3
- data/lib/temporalio/payload_converter.rb +14 -0
- data/lib/{temporal → temporalio}/retry_policy.rb +17 -7
- data/lib/{temporal → temporalio}/retry_state.rb +1 -1
- data/lib/temporalio/runtime.rb +25 -0
- data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
- data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
- data/lib/temporalio/testing/workflow_environment.rb +112 -0
- data/lib/temporalio/testing.rb +175 -0
- data/lib/{temporal → temporalio}/timeout_type.rb +2 -2
- data/lib/temporalio/version.rb +3 -0
- data/lib/temporalio/worker/activity_runner.rb +114 -0
- data/lib/temporalio/worker/activity_worker.rb +164 -0
- data/lib/temporalio/worker/reactor.rb +46 -0
- data/lib/temporalio/worker/runner.rb +63 -0
- data/lib/temporalio/worker/sync_worker.rb +124 -0
- data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
- data/lib/temporalio/worker.rb +204 -0
- data/lib/temporalio/workflow/async.rb +46 -0
- data/lib/{temporal → temporalio}/workflow/execution_info.rb +4 -4
- data/lib/{temporal → temporalio}/workflow/execution_status.rb +1 -1
- data/lib/temporalio/workflow/future.rb +138 -0
- data/lib/{temporal → temporalio}/workflow/id_reuse_policy.rb +6 -6
- data/lib/temporalio/workflow/info.rb +76 -0
- data/lib/{temporal → temporalio}/workflow/query_reject_condition.rb +5 -5
- data/lib/temporalio.rb +12 -3
- data/temporalio.gemspec +11 -6
- metadata +137 -64
- data/bridge/sdk-core/Cargo.lock +0 -2606
- data/bridge/sdk-core/bridge-ffi/Cargo.toml +0 -24
- data/bridge/sdk-core/bridge-ffi/LICENSE.txt +0 -23
- data/bridge/sdk-core/bridge-ffi/build.rs +0 -25
- data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -249
- data/bridge/sdk-core/bridge-ffi/src/lib.rs +0 -825
- data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +0 -211
- data/bridge/sdk-core/core/src/log_export.rs +0 -62
- data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
- data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
- data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
- data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
- data/bridge/sdk-core/sdk/src/conversions.rs +0 -8
- data/lib/bridge.so +0 -0
- data/lib/gen/temporal/api/cluster/v1/message_pb.rb +0 -67
- data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +0 -26
- data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
- data/lib/temporal/bridge.rb +0 -14
- data/lib/temporal/connection.rb +0 -736
- data/lib/temporal/failure_converter.rb +0 -8
- data/lib/temporal/payload_converter.rb +0 -14
- data/lib/temporal/runtime.rb +0 -22
- data/lib/temporal/version.rb +0 -3
- data/lib/temporal.rb +0 -8
@@ -1,13 +1,18 @@
|
|
1
1
|
use crate::{
|
2
|
-
abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
|
2
|
+
abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
|
3
3
|
protosext::ValidScheduleLA,
|
4
4
|
retry_logic::RetryPolicyExt,
|
5
|
+
worker::workflow::HeartbeatTimeoutMsg,
|
5
6
|
MetricsContext, TaskToken,
|
6
7
|
};
|
7
|
-
use
|
8
|
+
use futures::{stream::BoxStream, Stream};
|
9
|
+
use futures_util::{future, future::AbortRegistration, stream, StreamExt};
|
10
|
+
use parking_lot::{Mutex, MutexGuard};
|
8
11
|
use std::{
|
9
|
-
collections::HashMap,
|
12
|
+
collections::{hash_map::Entry, HashMap},
|
10
13
|
fmt::{Debug, Formatter},
|
14
|
+
pin::Pin,
|
15
|
+
task::{Context, Poll},
|
11
16
|
time::{Duration, Instant, SystemTime},
|
12
17
|
};
|
13
18
|
use temporal_sdk_core_protos::{
|
@@ -25,6 +30,7 @@ use tokio::{
|
|
25
30
|
task::JoinHandle,
|
26
31
|
time::sleep,
|
27
32
|
};
|
33
|
+
use tokio_stream::wrappers::UnboundedReceiverStream;
|
28
34
|
use tokio_util::sync::CancellationToken;
|
29
35
|
|
30
36
|
#[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
|
@@ -45,10 +51,14 @@ pub(crate) struct LocalInFlightActInfo {
|
|
45
51
|
pub la_info: NewLocalAct,
|
46
52
|
pub dispatch_time: Instant,
|
47
53
|
pub attempt: u32,
|
48
|
-
_permit:
|
54
|
+
_permit: UsedMeteredSemPermit,
|
49
55
|
}
|
50
56
|
|
51
57
|
#[derive(Debug, Clone)]
|
58
|
+
#[cfg_attr(
|
59
|
+
feature = "save_wf_inputs",
|
60
|
+
derive(serde::Serialize, serde::Deserialize)
|
61
|
+
)]
|
52
62
|
pub(crate) enum LocalActivityExecutionResult {
|
53
63
|
Completed(Success),
|
54
64
|
Failed(ActFail),
|
@@ -65,6 +75,10 @@ impl LocalActivityExecutionResult {
|
|
65
75
|
}
|
66
76
|
|
67
77
|
#[derive(Debug, Clone)]
|
78
|
+
#[cfg_attr(
|
79
|
+
feature = "save_wf_inputs",
|
80
|
+
derive(serde::Serialize, serde::Deserialize)
|
81
|
+
)]
|
68
82
|
pub(crate) struct LocalActivityResolution {
|
69
83
|
pub seq: u32,
|
70
84
|
pub result: LocalActivityExecutionResult,
|
@@ -96,6 +110,17 @@ impl Debug for NewLocalAct {
|
|
96
110
|
pub(crate) enum LocalActRequest {
|
97
111
|
New(NewLocalAct),
|
98
112
|
Cancel(ExecutingLAId),
|
113
|
+
#[from(ignore)]
|
114
|
+
CancelAllInRun(String),
|
115
|
+
StartHeartbeatTimeout {
|
116
|
+
send_on_elapse: HeartbeatTimeoutMsg,
|
117
|
+
deadline: Instant,
|
118
|
+
abort_reg: AbortRegistration,
|
119
|
+
},
|
120
|
+
/// Tell the LA manager that a workflow task was responded to (completed or failed) for a
|
121
|
+
/// certain run id
|
122
|
+
#[from(ignore)]
|
123
|
+
IndicateWorkflowTaskCompleted(String),
|
99
124
|
}
|
100
125
|
|
101
126
|
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
@@ -107,28 +132,43 @@ pub(crate) struct ExecutingLAId {
|
|
107
132
|
pub(crate) struct LocalActivityManager {
|
108
133
|
/// Just so we can provide activity tasks the same namespace as the worker
|
109
134
|
namespace: String,
|
110
|
-
/// Constrains number of currently executing local activities
|
111
|
-
semaphore: MeteredSemaphore,
|
112
135
|
/// Sink for new activity execution requests
|
113
136
|
act_req_tx: UnboundedSender<NewOrRetry>,
|
114
137
|
/// Cancels need a different queue since they should be taken first, and don't take a permit
|
115
138
|
cancels_req_tx: UnboundedSender<CancelOrTimeout>,
|
139
|
+
/// For the emission of heartbeat timeouts, back into the workflow machines. This channel
|
140
|
+
/// needs to come in from above us, because we cannot rely on callers getting the next
|
141
|
+
/// activation as a way to deliver heartbeats.
|
142
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
116
143
|
/// Wakes every time a complete is processed
|
117
144
|
complete_notify: Notify,
|
145
|
+
/// Set once workflows have finished shutting down, and thus we know we will no longer receive
|
146
|
+
/// any requests to spawn new LAs
|
147
|
+
workflows_have_shut_down: CancellationToken,
|
118
148
|
|
119
149
|
rcvs: tokio::sync::Mutex<RcvChans>,
|
120
150
|
shutdown_complete_tok: CancellationToken,
|
121
151
|
dat: Mutex<LAMData>,
|
122
152
|
}
|
123
153
|
|
154
|
+
struct LocalActivityInfo {
|
155
|
+
task_token: TaskToken,
|
156
|
+
/// Tasks for the current backoff until the next retry, if any.
|
157
|
+
backing_off_task: Option<JoinHandle<()>>,
|
158
|
+
/// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
|
159
|
+
/// while the LA id has been generated, but it has not yet been scheduled.
|
160
|
+
timeout_bag: Option<TimeoutBag>,
|
161
|
+
/// True once the first workflow task this LA started in has elapsed
|
162
|
+
first_wft_has_ended: bool,
|
163
|
+
/// Attempts at executing this LA during the current WFT
|
164
|
+
attempts_in_wft: usize,
|
165
|
+
}
|
166
|
+
|
124
167
|
struct LAMData {
|
168
|
+
/// Maps local activity identifiers to information about them
|
169
|
+
la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
|
125
170
|
/// Activities that have been issued to lang but not yet completed
|
126
171
|
outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
|
127
|
-
id_to_tt: HashMap<ExecutingLAId, TaskToken>,
|
128
|
-
/// Tasks for activities which are currently backing off. May be used to cancel retrying them.
|
129
|
-
backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
|
130
|
-
/// Tasks for timing out activities which are currently in the queue or dispatched.
|
131
|
-
timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
|
132
172
|
next_tt_num: u32,
|
133
173
|
}
|
134
174
|
|
@@ -143,43 +183,47 @@ impl LocalActivityManager {
|
|
143
183
|
pub(crate) fn new(
|
144
184
|
max_concurrent: usize,
|
145
185
|
namespace: String,
|
186
|
+
heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
|
146
187
|
metrics_context: MetricsContext,
|
147
188
|
) -> Self {
|
148
189
|
let (act_req_tx, act_req_rx) = unbounded_channel();
|
149
190
|
let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
|
150
191
|
let shutdown_complete_tok = CancellationToken::new();
|
192
|
+
let semaphore = MeteredSemaphore::new(
|
193
|
+
max_concurrent,
|
194
|
+
metrics_context,
|
195
|
+
MetricsContext::available_task_slots,
|
196
|
+
);
|
151
197
|
Self {
|
152
198
|
namespace,
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
199
|
+
rcvs: tokio::sync::Mutex::new(RcvChans::new(
|
200
|
+
act_req_rx,
|
201
|
+
semaphore,
|
202
|
+
cancels_req_rx,
|
203
|
+
shutdown_complete_tok.clone(),
|
204
|
+
)),
|
158
205
|
act_req_tx,
|
159
206
|
cancels_req_tx,
|
207
|
+
heartbeat_timeout_tx,
|
160
208
|
complete_notify: Notify::new(),
|
161
|
-
rcvs: tokio::sync::Mutex::new(RcvChans {
|
162
|
-
act_req_rx,
|
163
|
-
cancels_req_rx,
|
164
|
-
shutdown: shutdown_complete_tok.clone(),
|
165
|
-
}),
|
166
209
|
shutdown_complete_tok,
|
167
210
|
dat: Mutex::new(LAMData {
|
168
211
|
outstanding_activity_tasks: Default::default(),
|
169
|
-
|
170
|
-
backing_off_tasks: Default::default(),
|
171
|
-
timeout_tasks: Default::default(),
|
212
|
+
la_info: Default::default(),
|
172
213
|
next_tt_num: 0,
|
173
214
|
}),
|
215
|
+
workflows_have_shut_down: Default::default(),
|
174
216
|
}
|
175
217
|
}
|
176
218
|
|
177
219
|
#[cfg(test)]
|
178
220
|
fn test(max_concurrent: usize) -> Self {
|
221
|
+
let (hb_tx, _hb_rx) = unbounded_channel();
|
179
222
|
Self::new(
|
180
223
|
max_concurrent,
|
181
224
|
"fake_ns".to_string(),
|
182
|
-
|
225
|
+
hb_tx,
|
226
|
+
MetricsContext::no_op(),
|
183
227
|
)
|
184
228
|
}
|
185
229
|
|
@@ -190,76 +234,116 @@ impl LocalActivityManager {
|
|
190
234
|
|
191
235
|
#[cfg(test)]
|
192
236
|
fn num_in_backoff(&self) -> usize {
|
193
|
-
self.dat
|
237
|
+
self.dat
|
238
|
+
.lock()
|
239
|
+
.la_info
|
240
|
+
.values()
|
241
|
+
.filter(|lai| lai.backing_off_task.is_some())
|
242
|
+
.count()
|
194
243
|
}
|
195
244
|
|
196
245
|
pub(crate) fn enqueue(
|
197
246
|
&self,
|
198
247
|
reqs: impl IntoIterator<Item = LocalActRequest>,
|
199
248
|
) -> Vec<LocalActivityResolution> {
|
249
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
250
|
+
dbg_panic!("Tried to enqueue local activity after workflows were shut down");
|
251
|
+
return vec![];
|
252
|
+
}
|
200
253
|
let mut immediate_resolutions = vec![];
|
201
254
|
for req in reqs {
|
202
|
-
debug!(local_activity = ?req, "Queuing local activity");
|
203
255
|
match req {
|
204
256
|
LocalActRequest::New(act) => {
|
257
|
+
debug!(local_activity=?act, "Queuing local activity");
|
205
258
|
let id = ExecutingLAId {
|
206
259
|
run_id: act.workflow_exec_info.run_id.clone(),
|
207
260
|
seq_num: act.schedule_cmd.seq,
|
208
261
|
};
|
209
262
|
let mut dlock = self.dat.lock();
|
210
|
-
if dlock.id_to_tt.contains_key(&id) {
|
211
|
-
// Do not queue local activities which are in fact already executing.
|
212
|
-
// This can happen during evictions.
|
213
|
-
debug!("Tried to queue already-executing local activity {:?}", &id);
|
214
|
-
continue;
|
215
|
-
}
|
216
|
-
// Pre-generate and insert the task token now, before we may or may not dispatch
|
217
|
-
// the activity, so we can enforce idempotency. Prevents two identical LAs
|
218
|
-
// ending up in the queue at once.
|
219
263
|
let tt = dlock.gen_next_token();
|
220
|
-
dlock.
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
264
|
+
match dlock.la_info.entry(id) {
|
265
|
+
Entry::Occupied(o) => {
|
266
|
+
// Do not queue local activities which are in fact already executing.
|
267
|
+
// This can happen during evictions.
|
268
|
+
debug!(
|
269
|
+
"Tried to queue already-executing local activity {:?}",
|
270
|
+
o.key()
|
271
|
+
);
|
272
|
+
continue;
|
273
|
+
}
|
274
|
+
Entry::Vacant(ve) => {
|
275
|
+
// Insert the task token now, before we may or may not dispatch the
|
276
|
+
// activity, so we can enforce idempotency. Prevents two identical LAs
|
277
|
+
// ending up in the queue at once.
|
278
|
+
let lai = ve.insert(LocalActivityInfo {
|
279
|
+
task_token: tt,
|
280
|
+
backing_off_task: None,
|
281
|
+
timeout_bag: None,
|
282
|
+
first_wft_has_ended: false,
|
283
|
+
attempts_in_wft: 0,
|
284
|
+
});
|
285
|
+
|
286
|
+
// Set up timeouts for the new activity
|
287
|
+
match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
|
288
|
+
Ok(tb) => {
|
289
|
+
lai.timeout_bag = Some(tb);
|
290
|
+
|
291
|
+
self.act_req_tx.send(NewOrRetry::New(act)).expect(
|
292
|
+
"Receive half of LA request channel cannot be dropped",
|
293
|
+
);
|
294
|
+
}
|
295
|
+
Err(res) => immediate_resolutions.push(res),
|
296
|
+
}
|
230
297
|
}
|
231
|
-
Err(res) => immediate_resolutions.push(res),
|
232
298
|
}
|
233
299
|
}
|
300
|
+
LocalActRequest::StartHeartbeatTimeout {
|
301
|
+
send_on_elapse,
|
302
|
+
deadline,
|
303
|
+
abort_reg,
|
304
|
+
} => {
|
305
|
+
let chan = self.heartbeat_timeout_tx.clone();
|
306
|
+
tokio::spawn(future::Abortable::new(
|
307
|
+
async move {
|
308
|
+
tokio::time::sleep_until(deadline.into()).await;
|
309
|
+
let _ = chan.send(send_on_elapse);
|
310
|
+
},
|
311
|
+
abort_reg,
|
312
|
+
));
|
313
|
+
}
|
234
314
|
LocalActRequest::Cancel(id) => {
|
315
|
+
debug!(id=?id, "Cancelling local activity");
|
235
316
|
let mut dlock = self.dat.lock();
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
t.abort();
|
241
|
-
immediate_resolutions.push(LocalActivityResolution {
|
242
|
-
seq: id.seq_num,
|
243
|
-
result: LocalActivityExecutionResult::Cancelled(
|
244
|
-
Cancellation::from_details(None),
|
245
|
-
),
|
246
|
-
runtime: Duration::from_secs(0),
|
247
|
-
attempt: 0,
|
248
|
-
backoff: None,
|
249
|
-
original_schedule_time: None,
|
250
|
-
});
|
251
|
-
continue;
|
317
|
+
if let Some(lai) = dlock.la_info.get_mut(&id) {
|
318
|
+
if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
|
319
|
+
immediate_resolutions.push(immediate_res);
|
320
|
+
}
|
252
321
|
}
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
322
|
+
}
|
323
|
+
LocalActRequest::CancelAllInRun(run_id) => {
|
324
|
+
debug!(run_id=%run_id, "Cancelling all local activities for run");
|
325
|
+
let mut dlock = self.dat.lock();
|
326
|
+
// Even if we've got 100k+ LAs this should only take a ms or two. Not worth
|
327
|
+
// adding another map to keep in sync.
|
328
|
+
let las_for_run = dlock
|
329
|
+
.la_info
|
330
|
+
.iter_mut()
|
331
|
+
.filter(|(id, _)| id.run_id == run_id);
|
332
|
+
for (laid, lainf) in las_for_run {
|
333
|
+
if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
|
334
|
+
immediate_resolutions.push(immediate_res);
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
|
339
|
+
let mut dlock = self.dat.lock();
|
340
|
+
let las_for_run = dlock
|
341
|
+
.la_info
|
342
|
+
.iter_mut()
|
343
|
+
.filter(|(id, _)| id.run_id == run_id);
|
344
|
+
for (_, lainf) in las_for_run {
|
345
|
+
lainf.first_wft_has_ended = true;
|
346
|
+
lainf.attempts_in_wft = 0;
|
263
347
|
}
|
264
348
|
}
|
265
349
|
}
|
@@ -270,7 +354,7 @@ impl LocalActivityManager {
|
|
270
354
|
/// Returns the next pending local-activity related action, or None if shutdown has initiated
|
271
355
|
/// and there are no more remaining actions to take.
|
272
356
|
pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
|
273
|
-
let (new_or_retry, permit) = match self.rcvs.lock().await.next(
|
357
|
+
let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
|
274
358
|
NewOrCancel::Cancel(c) => {
|
275
359
|
return match c {
|
276
360
|
CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
|
@@ -283,12 +367,13 @@ impl LocalActivityManager {
|
|
283
367
|
let tt = self
|
284
368
|
.dat
|
285
369
|
.lock()
|
286
|
-
.
|
370
|
+
.la_info
|
287
371
|
.get(&ExecutingLAId {
|
288
372
|
run_id: run_id.clone(),
|
289
373
|
seq_num: resolution.seq,
|
290
374
|
})
|
291
|
-
.
|
375
|
+
.as_ref()
|
376
|
+
.map(|lai| lai.task_token.clone());
|
292
377
|
if let Some(task_token) = tt {
|
293
378
|
self.complete(&task_token, &resolution.result);
|
294
379
|
Some(ActivityTask {
|
@@ -323,18 +408,21 @@ impl LocalActivityManager {
|
|
323
408
|
}
|
324
409
|
NewOrRetry::Retry { in_flight, attempt } => (in_flight, attempt),
|
325
410
|
};
|
326
|
-
let
|
411
|
+
let la_info_for_in_flight_map = new_la.clone();
|
327
412
|
let id = ExecutingLAId {
|
328
413
|
run_id: new_la.workflow_exec_info.run_id.clone(),
|
329
414
|
seq_num: new_la.schedule_cmd.seq,
|
330
415
|
};
|
416
|
+
let orig_sched_time = new_la.schedule_cmd.original_schedule_time;
|
331
417
|
let sa = new_la.schedule_cmd;
|
332
418
|
|
333
419
|
let mut dat = self.dat.lock();
|
334
420
|
// If this request originated from a local backoff task, clear the entry for it. We
|
335
421
|
// don't await the handle because we know it must already be done, and there's no
|
336
422
|
// meaningful value.
|
337
|
-
dat.
|
423
|
+
dat.la_info
|
424
|
+
.get_mut(&id)
|
425
|
+
.map(|lai| lai.backing_off_task.take());
|
338
426
|
|
339
427
|
// If this task sat in the queue for too long, return a timeout for it instead
|
340
428
|
if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
|
@@ -348,30 +436,27 @@ impl LocalActivityManager {
|
|
348
436
|
runtime: sat_for,
|
349
437
|
attempt,
|
350
438
|
backoff: None,
|
351
|
-
original_schedule_time:
|
439
|
+
original_schedule_time: orig_sched_time,
|
352
440
|
},
|
353
441
|
task: None,
|
354
442
|
});
|
355
443
|
}
|
356
444
|
}
|
357
445
|
|
358
|
-
let
|
359
|
-
|
360
|
-
|
361
|
-
.
|
362
|
-
|
446
|
+
let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
|
447
|
+
let tt = la_info.task_token.clone();
|
448
|
+
if let Some(to) = la_info.timeout_bag.as_mut() {
|
449
|
+
to.mark_started();
|
450
|
+
}
|
363
451
|
dat.outstanding_activity_tasks.insert(
|
364
452
|
tt.clone(),
|
365
453
|
LocalInFlightActInfo {
|
366
|
-
la_info:
|
454
|
+
la_info: la_info_for_in_flight_map,
|
367
455
|
dispatch_time: Instant::now(),
|
368
456
|
attempt,
|
369
|
-
_permit: permit,
|
457
|
+
_permit: permit.into_used(),
|
370
458
|
},
|
371
459
|
);
|
372
|
-
if let Some(to) = dat.timeout_tasks.get_mut(&id) {
|
373
|
-
to.mark_started();
|
374
|
-
}
|
375
460
|
|
376
461
|
let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
|
377
462
|
Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
|
@@ -406,11 +491,23 @@ impl LocalActivityManager {
|
|
406
491
|
) -> LACompleteAction {
|
407
492
|
let mut dlock = self.dat.lock();
|
408
493
|
if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
|
494
|
+
if self.workflows_have_shut_down.is_cancelled() {
|
495
|
+
// If workflows are already shut down, the results of all this don't matter.
|
496
|
+
// Just say we're done if there's nothing outstanding any more.
|
497
|
+
self.set_shutdown_complete_if_ready(&mut dlock);
|
498
|
+
}
|
499
|
+
|
409
500
|
let exec_id = ExecutingLAId {
|
410
501
|
run_id: info.la_info.workflow_exec_info.run_id.clone(),
|
411
502
|
seq_num: info.la_info.schedule_cmd.seq,
|
412
503
|
};
|
413
|
-
dlock.
|
504
|
+
let maybe_old_lai = dlock.la_info.remove(&exec_id);
|
505
|
+
if let Some(ref oldlai) = maybe_old_lai {
|
506
|
+
if let Some(ref bot) = oldlai.backing_off_task {
|
507
|
+
dbg_panic!("Just-resolved LA should not have backoff task");
|
508
|
+
bot.abort();
|
509
|
+
}
|
510
|
+
}
|
414
511
|
|
415
512
|
match status {
|
416
513
|
LocalActivityExecutionResult::Completed(_)
|
@@ -446,8 +543,6 @@ impl LocalActivityManager {
|
|
446
543
|
}
|
447
544
|
// Immediately create a new task token for the to-be-retried LA
|
448
545
|
let tt = dlock.gen_next_token();
|
449
|
-
dlock.id_to_tt.insert(exec_id.clone(), tt);
|
450
|
-
|
451
546
|
// Send the retry request after waiting the backoff duration
|
452
547
|
let send_chan = self.act_req_tx.clone();
|
453
548
|
let jh = tokio::spawn(async move {
|
@@ -460,7 +555,22 @@ impl LocalActivityManager {
|
|
460
555
|
})
|
461
556
|
.expect("Receive half of LA request channel cannot be dropped");
|
462
557
|
});
|
463
|
-
dlock.
|
558
|
+
dlock.la_info.insert(
|
559
|
+
exec_id,
|
560
|
+
LocalActivityInfo {
|
561
|
+
task_token: tt,
|
562
|
+
backing_off_task: Some(jh),
|
563
|
+
first_wft_has_ended: maybe_old_lai
|
564
|
+
.as_ref()
|
565
|
+
.map(|old| old.first_wft_has_ended)
|
566
|
+
.unwrap_or_default(),
|
567
|
+
attempts_in_wft: maybe_old_lai
|
568
|
+
.as_ref()
|
569
|
+
.map(|old| old.attempts_in_wft + 1)
|
570
|
+
.unwrap_or(1),
|
571
|
+
timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
|
572
|
+
},
|
573
|
+
);
|
464
574
|
|
465
575
|
LACompleteAction::WillBeRetried
|
466
576
|
} else {
|
@@ -473,11 +583,70 @@ impl LocalActivityManager {
|
|
473
583
|
}
|
474
584
|
}
|
475
585
|
|
476
|
-
pub(crate)
|
477
|
-
|
586
|
+
pub(crate) fn workflows_have_shutdown(&self) {
|
587
|
+
self.workflows_have_shut_down.cancel();
|
588
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
589
|
+
}
|
590
|
+
|
591
|
+
pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
|
592
|
+
while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
|
478
593
|
self.complete_notify.notified().await;
|
479
594
|
}
|
480
|
-
|
595
|
+
}
|
596
|
+
|
597
|
+
/// Try to close the activity stream as soon as worker shutdown is initiated.
|
598
|
+
/// This is required for activity-only workers where since workflows are not polled and the activity poller might
|
599
|
+
/// get "stuck".
|
600
|
+
pub(crate) fn shutdown_initiated(&self) {
|
601
|
+
self.set_shutdown_complete_if_ready(&mut self.dat.lock());
|
602
|
+
}
|
603
|
+
|
604
|
+
pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
|
605
|
+
let dlock = self.dat.lock();
|
606
|
+
dlock
|
607
|
+
.la_info
|
608
|
+
.iter()
|
609
|
+
.filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
|
610
|
+
.map(|(_, info)| info.attempts_in_wft)
|
611
|
+
.sum()
|
612
|
+
}
|
613
|
+
|
614
|
+
fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
|
615
|
+
let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
|
616
|
+
if nothing_outstanding {
|
617
|
+
self.shutdown_complete_tok.cancel();
|
618
|
+
}
|
619
|
+
nothing_outstanding
|
620
|
+
}
|
621
|
+
|
622
|
+
fn cancel_one_la(
|
623
|
+
&self,
|
624
|
+
seq: u32,
|
625
|
+
lai: &mut LocalActivityInfo,
|
626
|
+
) -> Option<LocalActivityResolution> {
|
627
|
+
// First check if this ID is currently backing off, if so abort the backoff
|
628
|
+
// task
|
629
|
+
if let Some(t) = lai.backing_off_task.take() {
|
630
|
+
t.abort();
|
631
|
+
return Some(LocalActivityResolution {
|
632
|
+
seq,
|
633
|
+
result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
|
634
|
+
runtime: Duration::from_secs(0),
|
635
|
+
attempt: 0,
|
636
|
+
backoff: None,
|
637
|
+
original_schedule_time: None,
|
638
|
+
});
|
639
|
+
}
|
640
|
+
|
641
|
+
self.cancels_req_tx
|
642
|
+
.send(CancelOrTimeout::Cancel(ActivityTask {
|
643
|
+
task_token: lai.task_token.0.clone(),
|
644
|
+
variant: Some(activity_task::Variant::Cancel(Cancel {
|
645
|
+
reason: ActivityCancelReason::Cancelled as i32,
|
646
|
+
})),
|
647
|
+
}))
|
648
|
+
.expect("Receive half of LA cancel channel cannot be dropped");
|
649
|
+
None
|
481
650
|
}
|
482
651
|
}
|
483
652
|
|
@@ -515,37 +684,51 @@ enum CancelOrTimeout {
|
|
515
684
|
},
|
516
685
|
}
|
517
686
|
|
687
|
+
#[allow(clippy::large_enum_variant)]
|
518
688
|
enum NewOrCancel {
|
519
689
|
New(NewOrRetry, OwnedMeteredSemPermit),
|
520
690
|
Cancel(CancelOrTimeout),
|
521
691
|
}
|
522
692
|
|
693
|
+
#[pin_project::pin_project]
|
523
694
|
struct RcvChans {
|
524
|
-
|
525
|
-
|
526
|
-
/// Cancels to send to lang or apply internally
|
527
|
-
cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
|
528
|
-
shutdown: CancellationToken,
|
695
|
+
#[pin]
|
696
|
+
inner: BoxStream<'static, NewOrCancel>,
|
529
697
|
}
|
530
698
|
|
531
699
|
impl RcvChans {
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
700
|
+
fn new(
|
701
|
+
new_reqs: UnboundedReceiver<NewOrRetry>,
|
702
|
+
new_sem: MeteredSemaphore,
|
703
|
+
cancels: UnboundedReceiver<CancelOrTimeout>,
|
704
|
+
shutdown_completed: CancellationToken,
|
705
|
+
) -> Self {
|
706
|
+
let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
|
707
|
+
let new_stream = UnboundedReceiverStream::new(new_reqs)
|
708
|
+
// Get a permit for each new activity request
|
709
|
+
.zip(stream::unfold(new_sem, |new_sem| async move {
|
710
|
+
let permit = new_sem
|
711
|
+
.acquire_owned()
|
712
|
+
.await
|
713
|
+
.expect("Local activity semaphore is never closed");
|
714
|
+
Some((permit, new_sem))
|
715
|
+
}))
|
716
|
+
.map(|(req, permit)| NewOrCancel::New(req, permit));
|
717
|
+
Self {
|
718
|
+
inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
|
719
|
+
.take_until(async move { shutdown_completed.cancelled().await })
|
720
|
+
.boxed(),
|
546
721
|
}
|
547
722
|
}
|
548
723
|
}
|
724
|
+
impl Stream for RcvChans {
|
725
|
+
type Item = NewOrCancel;
|
726
|
+
|
727
|
+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
728
|
+
let this = self.project();
|
729
|
+
this.inner.poll_next(cx)
|
730
|
+
}
|
731
|
+
}
|
549
732
|
|
550
733
|
struct TimeoutBag {
|
551
734
|
sched_to_close_handle: JoinHandle<()>,
|
@@ -566,17 +749,21 @@ impl TimeoutBag {
|
|
566
749
|
let (schedule_to_close, start_to_close) =
|
567
750
|
new_la.schedule_cmd.close_timeouts.into_sched_and_start();
|
568
751
|
|
752
|
+
let sched_time = new_la
|
753
|
+
.schedule_cmd
|
754
|
+
.original_schedule_time
|
755
|
+
.unwrap_or(new_la.schedule_time);
|
569
756
|
let resolution = LocalActivityResolution {
|
570
757
|
seq: new_la.schedule_cmd.seq,
|
571
758
|
result: LocalActivityExecutionResult::timeout(TimeoutType::ScheduleToClose),
|
572
759
|
runtime: Default::default(),
|
573
760
|
attempt: new_la.schedule_cmd.attempt,
|
574
761
|
backoff: None,
|
575
|
-
original_schedule_time:
|
762
|
+
original_schedule_time: new_la.schedule_cmd.original_schedule_time,
|
576
763
|
};
|
577
764
|
// Remove any time already elapsed since the scheduling time
|
578
765
|
let schedule_to_close = schedule_to_close
|
579
|
-
.map(|s2c| s2c.saturating_sub(
|
766
|
+
.map(|s2c| s2c.saturating_sub(sched_time.elapsed().unwrap_or_default()));
|
580
767
|
if let Some(ref s2c) = schedule_to_close {
|
581
768
|
if s2c.is_zero() {
|
582
769
|
return Err(resolution);
|
@@ -639,18 +826,19 @@ impl Drop for TimeoutBag {
|
|
639
826
|
mod tests {
|
640
827
|
use super::*;
|
641
828
|
use crate::{prost_dur, protosext::LACloseTimeouts};
|
829
|
+
use futures_util::FutureExt;
|
642
830
|
use temporal_sdk_core_protos::temporal::api::{
|
643
831
|
common::v1::RetryPolicy,
|
644
832
|
failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
|
645
833
|
};
|
646
|
-
use tokio::
|
834
|
+
use tokio::task::yield_now;
|
647
835
|
|
648
836
|
impl DispatchOrTimeoutLA {
|
649
837
|
fn unwrap(self) -> ActivityTask {
|
650
838
|
match self {
|
651
839
|
DispatchOrTimeoutLA::Dispatch(t) => t,
|
652
|
-
|
653
|
-
panic!("
|
840
|
+
_ => {
|
841
|
+
panic!("Non-dispatched action returned")
|
654
842
|
}
|
655
843
|
}
|
656
844
|
}
|
@@ -1025,18 +1213,66 @@ mod tests {
|
|
1025
1213
|
lam.next_pending().await.unwrap().unwrap();
|
1026
1214
|
assert_eq!(lam.num_outstanding(), 1);
|
1027
1215
|
// There should be nothing else in the queue
|
1028
|
-
|
1029
|
-
lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
|
1030
|
-
TryRecvError::Empty
|
1031
|
-
);
|
1216
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
1032
1217
|
|
1033
1218
|
// Verify that if we now enqueue the same act again, after the task is outstanding, we still
|
1034
1219
|
// don't add it.
|
1035
1220
|
lam.enqueue([new_la.into()]);
|
1036
1221
|
assert_eq!(lam.num_outstanding(), 1);
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1222
|
+
assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
|
1223
|
+
}
|
1224
|
+
|
1225
|
+
#[tokio::test]
|
1226
|
+
async fn nonfirst_la_attempt_count_is_accurate() {
|
1227
|
+
let run_id = "run_id";
|
1228
|
+
let lam = LocalActivityManager::test(10);
|
1229
|
+
let new_la = NewLocalAct {
|
1230
|
+
schedule_cmd: ValidScheduleLA {
|
1231
|
+
seq: 1,
|
1232
|
+
activity_id: 1.to_string(),
|
1233
|
+
retry_policy: RetryPolicy {
|
1234
|
+
initial_interval: Some(prost_dur!(from_millis(1))),
|
1235
|
+
backoff_coefficient: 1.0,
|
1236
|
+
..Default::default()
|
1237
|
+
},
|
1238
|
+
local_retry_threshold: Duration::from_secs(500),
|
1239
|
+
..Default::default()
|
1240
|
+
},
|
1241
|
+
workflow_type: "".to_string(),
|
1242
|
+
workflow_exec_info: WorkflowExecution {
|
1243
|
+
workflow_id: "".to_string(),
|
1244
|
+
run_id: run_id.to_string(),
|
1245
|
+
},
|
1246
|
+
schedule_time: SystemTime::now(),
|
1247
|
+
};
|
1248
|
+
lam.enqueue([new_la.clone().into()]);
|
1249
|
+
let spinfail = || async {
|
1250
|
+
for _ in 1..=10 {
|
1251
|
+
let next = lam.next_pending().await.unwrap().unwrap();
|
1252
|
+
let tt = TaskToken(next.task_token);
|
1253
|
+
lam.complete(
|
1254
|
+
&tt,
|
1255
|
+
&LocalActivityExecutionResult::Failed(Default::default()),
|
1256
|
+
);
|
1257
|
+
}
|
1258
|
+
};
|
1259
|
+
|
1260
|
+
// Fail a bunch of times
|
1261
|
+
spinfail().await;
|
1262
|
+
// Nonfirst attempt count should still be zero
|
1263
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
1264
|
+
assert_eq!(count, 0);
|
1265
|
+
|
1266
|
+
for _ in 1..=2 {
|
1267
|
+
// This should work over multiple WFTs
|
1268
|
+
// say the first wft was completed
|
1269
|
+
lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
|
1270
|
+
run_id.to_string(),
|
1271
|
+
)]);
|
1272
|
+
// Do some more attempts
|
1273
|
+
spinfail().await;
|
1274
|
+
let count = lam.get_nonfirst_attempt_count(run_id);
|
1275
|
+
assert_eq!(count, 10);
|
1276
|
+
}
|
1041
1277
|
}
|
1042
1278
|
}
|