temporalio 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (327) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +301 -0
  3. data/bridge/Cargo.lock +2888 -0
  4. data/bridge/Cargo.toml +27 -0
  5. data/bridge/sdk-core/ARCHITECTURE.md +76 -0
  6. data/bridge/sdk-core/Cargo.lock +2606 -0
  7. data/bridge/sdk-core/Cargo.toml +2 -0
  8. data/bridge/sdk-core/LICENSE.txt +23 -0
  9. data/bridge/sdk-core/README.md +104 -0
  10. data/bridge/sdk-core/arch_docs/diagrams/README.md +10 -0
  11. data/bridge/sdk-core/arch_docs/diagrams/sticky_queues.puml +40 -0
  12. data/bridge/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  13. data/bridge/sdk-core/arch_docs/sticky_queues.md +51 -0
  14. data/bridge/sdk-core/client/Cargo.toml +40 -0
  15. data/bridge/sdk-core/client/LICENSE.txt +23 -0
  16. data/bridge/sdk-core/client/src/lib.rs +1286 -0
  17. data/bridge/sdk-core/client/src/metrics.rs +165 -0
  18. data/bridge/sdk-core/client/src/raw.rs +932 -0
  19. data/bridge/sdk-core/client/src/retry.rs +751 -0
  20. data/bridge/sdk-core/client/src/workflow_handle/mod.rs +185 -0
  21. data/bridge/sdk-core/core/Cargo.toml +116 -0
  22. data/bridge/sdk-core/core/LICENSE.txt +23 -0
  23. data/bridge/sdk-core/core/benches/workflow_replay.rs +76 -0
  24. data/bridge/sdk-core/core/src/abstractions.rs +166 -0
  25. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +1014 -0
  26. data/bridge/sdk-core/core/src/core_tests/child_workflows.rs +221 -0
  27. data/bridge/sdk-core/core/src/core_tests/determinism.rs +107 -0
  28. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +925 -0
  29. data/bridge/sdk-core/core/src/core_tests/mod.rs +100 -0
  30. data/bridge/sdk-core/core/src/core_tests/queries.rs +894 -0
  31. data/bridge/sdk-core/core/src/core_tests/replay_flag.rs +65 -0
  32. data/bridge/sdk-core/core/src/core_tests/workers.rs +259 -0
  33. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +124 -0
  34. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +2090 -0
  35. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +515 -0
  36. data/bridge/sdk-core/core/src/lib.rs +282 -0
  37. data/bridge/sdk-core/core/src/pollers/mod.rs +54 -0
  38. data/bridge/sdk-core/core/src/pollers/poll_buffer.rs +297 -0
  39. data/bridge/sdk-core/core/src/protosext/mod.rs +428 -0
  40. data/bridge/sdk-core/core/src/replay/mod.rs +215 -0
  41. data/bridge/sdk-core/core/src/retry_logic.rs +202 -0
  42. data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
  43. data/bridge/sdk-core/core/src/telemetry/metrics.rs +428 -0
  44. data/bridge/sdk-core/core/src/telemetry/mod.rs +407 -0
  45. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +78 -0
  46. data/bridge/sdk-core/core/src/test_help/mod.rs +889 -0
  47. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +580 -0
  48. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +1048 -0
  49. data/bridge/sdk-core/core/src/worker/activities.rs +481 -0
  50. data/bridge/sdk-core/core/src/worker/client/mocks.rs +87 -0
  51. data/bridge/sdk-core/core/src/worker/client.rs +373 -0
  52. data/bridge/sdk-core/core/src/worker/mod.rs +570 -0
  53. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +37 -0
  54. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +101 -0
  55. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +532 -0
  56. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +907 -0
  57. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +294 -0
  58. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +167 -0
  59. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +858 -0
  60. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +136 -0
  61. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +157 -0
  62. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +129 -0
  63. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +1450 -0
  64. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +316 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +178 -0
  66. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +708 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +439 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +435 -0
  69. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +175 -0
  70. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +242 -0
  71. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +96 -0
  72. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +1200 -0
  73. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +272 -0
  74. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  75. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +655 -0
  76. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +1200 -0
  77. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +145 -0
  78. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  79. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +985 -0
  80. data/bridge/sdk-core/core-api/Cargo.toml +32 -0
  81. data/bridge/sdk-core/core-api/LICENSE.txt +23 -0
  82. data/bridge/sdk-core/core-api/src/errors.rs +95 -0
  83. data/bridge/sdk-core/core-api/src/lib.rs +109 -0
  84. data/bridge/sdk-core/core-api/src/telemetry.rs +147 -0
  85. data/bridge/sdk-core/core-api/src/worker.rs +148 -0
  86. data/bridge/sdk-core/etc/deps.svg +162 -0
  87. data/bridge/sdk-core/etc/dynamic-config.yaml +2 -0
  88. data/bridge/sdk-core/etc/otel-collector-config.yaml +36 -0
  89. data/bridge/sdk-core/etc/prometheus.yaml +6 -0
  90. data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
  91. data/bridge/sdk-core/fsm/Cargo.toml +18 -0
  92. data/bridge/sdk-core/fsm/LICENSE.txt +23 -0
  93. data/bridge/sdk-core/fsm/README.md +3 -0
  94. data/bridge/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +27 -0
  95. data/bridge/sdk-core/fsm/rustfsm_procmacro/LICENSE.txt +23 -0
  96. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +647 -0
  97. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/progress.rs +8 -0
  98. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.rs +18 -0
  99. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.stderr +12 -0
  100. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dynamic_dest_pass.rs +41 -0
  101. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.rs +14 -0
  102. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.stderr +11 -0
  103. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_arg_pass.rs +32 -0
  104. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_pass.rs +31 -0
  105. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/medium_complex_pass.rs +46 -0
  106. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.rs +29 -0
  107. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +12 -0
  108. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/simple_pass.rs +32 -0
  109. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.rs +18 -0
  110. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.stderr +5 -0
  111. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.rs +11 -0
  112. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.stderr +5 -0
  113. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.rs +11 -0
  114. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.stderr +5 -0
  115. data/bridge/sdk-core/fsm/rustfsm_trait/Cargo.toml +14 -0
  116. data/bridge/sdk-core/fsm/rustfsm_trait/LICENSE.txt +23 -0
  117. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +249 -0
  118. data/bridge/sdk-core/fsm/src/lib.rs +2 -0
  119. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
  120. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
  121. data/bridge/sdk-core/histories/fail_wf_task.bin +0 -0
  122. data/bridge/sdk-core/histories/timer_workflow_history.bin +0 -0
  123. data/bridge/sdk-core/integ-with-otel.sh +7 -0
  124. data/bridge/sdk-core/protos/api_upstream/README.md +9 -0
  125. data/bridge/sdk-core/protos/api_upstream/api-linter.yaml +40 -0
  126. data/bridge/sdk-core/protos/api_upstream/buf.yaml +9 -0
  127. data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
  128. data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
  129. data/bridge/sdk-core/protos/api_upstream/build/tools.go +29 -0
  130. data/bridge/sdk-core/protos/api_upstream/dependencies/gogoproto/gogo.proto +141 -0
  131. data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
  132. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +89 -0
  133. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +260 -0
  134. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +112 -0
  135. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +47 -0
  136. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +57 -0
  137. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +56 -0
  138. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +170 -0
  139. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +118 -0
  140. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/interaction_type.proto +39 -0
  141. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +51 -0
  142. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +50 -0
  143. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +41 -0
  144. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  145. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +59 -0
  146. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +40 -0
  147. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +122 -0
  148. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +108 -0
  149. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +114 -0
  150. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +56 -0
  151. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +758 -0
  152. data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +87 -0
  153. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +97 -0
  154. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +121 -0
  155. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +80 -0
  156. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +61 -0
  157. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +55 -0
  158. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +379 -0
  159. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +108 -0
  160. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +59 -0
  161. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +146 -0
  162. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +1168 -0
  163. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +415 -0
  164. data/bridge/sdk-core/protos/grpc/health/v1/health.proto +63 -0
  165. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  166. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +79 -0
  167. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +77 -0
  168. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +15 -0
  169. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +30 -0
  170. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  171. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +263 -0
  172. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +304 -0
  173. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +29 -0
  174. data/bridge/sdk-core/protos/testsrv_upstream/api-linter.yaml +38 -0
  175. data/bridge/sdk-core/protos/testsrv_upstream/buf.yaml +13 -0
  176. data/bridge/sdk-core/protos/testsrv_upstream/dependencies/gogoproto/gogo.proto +141 -0
  177. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +63 -0
  178. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +90 -0
  179. data/bridge/sdk-core/rustfmt.toml +1 -0
  180. data/bridge/sdk-core/sdk/Cargo.toml +47 -0
  181. data/bridge/sdk-core/sdk/LICENSE.txt +23 -0
  182. data/bridge/sdk-core/sdk/src/activity_context.rs +230 -0
  183. data/bridge/sdk-core/sdk/src/app_data.rs +37 -0
  184. data/bridge/sdk-core/sdk/src/interceptors.rs +50 -0
  185. data/bridge/sdk-core/sdk/src/lib.rs +794 -0
  186. data/bridge/sdk-core/sdk/src/payload_converter.rs +11 -0
  187. data/bridge/sdk-core/sdk/src/workflow_context/options.rs +295 -0
  188. data/bridge/sdk-core/sdk/src/workflow_context.rs +694 -0
  189. data/bridge/sdk-core/sdk/src/workflow_future.rs +499 -0
  190. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +30 -0
  191. data/bridge/sdk-core/sdk-core-protos/LICENSE.txt +23 -0
  192. data/bridge/sdk-core/sdk-core-protos/build.rs +107 -0
  193. data/bridge/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  194. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +544 -0
  195. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +230 -0
  196. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +1970 -0
  197. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  198. data/bridge/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  199. data/bridge/sdk-core/test-utils/Cargo.toml +36 -0
  200. data/bridge/sdk-core/test-utils/src/canned_histories.rs +1579 -0
  201. data/bridge/sdk-core/test-utils/src/histfetch.rs +28 -0
  202. data/bridge/sdk-core/test-utils/src/lib.rs +650 -0
  203. data/bridge/sdk-core/tests/integ_tests/client_tests.rs +36 -0
  204. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +128 -0
  205. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +221 -0
  206. data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +37 -0
  207. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +133 -0
  208. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +437 -0
  209. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +93 -0
  210. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +878 -0
  211. data/bridge/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  212. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +59 -0
  213. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +58 -0
  214. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +50 -0
  215. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +60 -0
  216. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +54 -0
  217. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +788 -0
  218. data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +53 -0
  219. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +113 -0
  220. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +223 -0
  221. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +93 -0
  222. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +167 -0
  223. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +99 -0
  224. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +131 -0
  225. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +75 -0
  226. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +597 -0
  227. data/bridge/sdk-core/tests/load_tests.rs +191 -0
  228. data/bridge/sdk-core/tests/main.rs +113 -0
  229. data/bridge/sdk-core/tests/runner.rs +93 -0
  230. data/bridge/src/connection.rs +186 -0
  231. data/bridge/src/lib.rs +239 -0
  232. data/bridge/src/runtime.rs +54 -0
  233. data/bridge/src/worker.rs +124 -0
  234. data/ext/Rakefile +9 -0
  235. data/lib/bridge.so +0 -0
  236. data/lib/gen/dependencies/gogoproto/gogo_pb.rb +14 -0
  237. data/lib/gen/temporal/api/batch/v1/message_pb.rb +50 -0
  238. data/lib/gen/temporal/api/command/v1/message_pb.rb +174 -0
  239. data/lib/gen/temporal/api/common/v1/message_pb.rb +69 -0
  240. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +33 -0
  241. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +39 -0
  242. data/lib/gen/temporal/api/enums/v1/common_pb.rb +42 -0
  243. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +68 -0
  244. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +77 -0
  245. data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +25 -0
  246. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +37 -0
  247. data/lib/gen/temporal/api/enums/v1/query_pb.rb +31 -0
  248. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +24 -0
  249. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +28 -0
  250. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +30 -0
  251. data/lib/gen/temporal/api/enums/v1/update_pb.rb +23 -0
  252. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +89 -0
  253. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +84 -0
  254. data/lib/gen/temporal/api/failure/v1/message_pb.rb +83 -0
  255. data/lib/gen/temporal/api/filter/v1/message_pb.rb +40 -0
  256. data/lib/gen/temporal/api/history/v1/message_pb.rb +490 -0
  257. data/lib/gen/temporal/api/interaction/v1/message_pb.rb +49 -0
  258. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +63 -0
  259. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +85 -0
  260. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +20 -0
  261. data/lib/gen/temporal/api/query/v1/message_pb.rb +38 -0
  262. data/lib/gen/temporal/api/replication/v1/message_pb.rb +37 -0
  263. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +149 -0
  264. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +73 -0
  265. data/lib/gen/temporal/api/version/v1/message_pb.rb +41 -0
  266. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +111 -0
  267. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +788 -0
  268. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +20 -0
  269. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +58 -0
  270. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +57 -0
  271. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +222 -0
  272. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +57 -0
  273. data/lib/gen/temporal/sdk/core/common/common_pb.rb +22 -0
  274. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +34 -0
  275. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +27 -0
  276. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +165 -0
  277. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +196 -0
  278. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +34 -0
  279. data/lib/temporalio/activity/context.rb +97 -0
  280. data/lib/temporalio/activity/info.rb +67 -0
  281. data/lib/temporalio/activity.rb +85 -0
  282. data/lib/temporalio/bridge/error.rb +8 -0
  283. data/lib/temporalio/bridge.rb +14 -0
  284. data/lib/temporalio/client/implementation.rb +340 -0
  285. data/lib/temporalio/client/workflow_handle.rb +243 -0
  286. data/lib/temporalio/client.rb +131 -0
  287. data/lib/temporalio/connection.rb +751 -0
  288. data/lib/temporalio/data_converter.rb +191 -0
  289. data/lib/temporalio/error/failure.rb +194 -0
  290. data/lib/temporalio/error/workflow_failure.rb +19 -0
  291. data/lib/temporalio/errors.rb +40 -0
  292. data/lib/temporalio/failure_converter/base.rb +26 -0
  293. data/lib/temporalio/failure_converter/basic.rb +319 -0
  294. data/lib/temporalio/failure_converter.rb +7 -0
  295. data/lib/temporalio/interceptor/chain.rb +28 -0
  296. data/lib/temporalio/interceptor/client.rb +123 -0
  297. data/lib/temporalio/payload_codec/base.rb +32 -0
  298. data/lib/temporalio/payload_converter/base.rb +24 -0
  299. data/lib/temporalio/payload_converter/bytes.rb +27 -0
  300. data/lib/temporalio/payload_converter/composite.rb +49 -0
  301. data/lib/temporalio/payload_converter/encoding_base.rb +35 -0
  302. data/lib/temporalio/payload_converter/json.rb +26 -0
  303. data/lib/temporalio/payload_converter/nil.rb +26 -0
  304. data/lib/temporalio/payload_converter.rb +14 -0
  305. data/lib/temporalio/retry_policy.rb +82 -0
  306. data/lib/temporalio/retry_state.rb +35 -0
  307. data/lib/temporalio/runtime.rb +25 -0
  308. data/lib/temporalio/timeout_type.rb +29 -0
  309. data/lib/temporalio/version.rb +3 -0
  310. data/lib/temporalio/worker/activity_runner.rb +92 -0
  311. data/lib/temporalio/worker/activity_worker.rb +138 -0
  312. data/lib/temporalio/worker/reactor.rb +46 -0
  313. data/lib/temporalio/worker/runner.rb +63 -0
  314. data/lib/temporalio/worker/sync_worker.rb +88 -0
  315. data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
  316. data/lib/temporalio/worker.rb +198 -0
  317. data/lib/temporalio/workflow/execution_info.rb +54 -0
  318. data/lib/temporalio/workflow/execution_status.rb +36 -0
  319. data/lib/temporalio/workflow/id_reuse_policy.rb +36 -0
  320. data/lib/temporalio/workflow/query_reject_condition.rb +33 -0
  321. data/lib/temporalio.rb +12 -1
  322. data/lib/thermite_patch.rb +23 -0
  323. data/temporalio.gemspec +45 -0
  324. metadata +566 -9
  325. data/lib/temporal/version.rb +0 -3
  326. data/lib/temporal.rb +0 -4
  327. data/temporal.gemspec +0 -20
@@ -0,0 +1,985 @@
1
+ use crate::{
2
+ abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
3
+ protosext::ValidPollWFTQResponse,
4
+ telemetry::metrics::workflow_worker_type,
5
+ worker::{
6
+ workflow::{history_update::NextPageToken, run_cache::RunCache, *},
7
+ LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
8
+ },
9
+ MetricsContext,
10
+ };
11
+ use futures::{stream, stream::PollNext, Stream, StreamExt};
12
+ use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
13
+ use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
14
+ use temporal_sdk_core_protos::{
15
+ coresdk::{
16
+ workflow_activation::{
17
+ create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
18
+ workflow_activation_job,
19
+ },
20
+ workflow_completion::Failure,
21
+ },
22
+ temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
23
+ };
24
+ use tokio::sync::{mpsc::unbounded_channel, oneshot};
25
+ use tokio_stream::wrappers::UnboundedReceiverStream;
26
+ use tokio_util::sync::CancellationToken;
27
+ use tracing::{Level, Span};
28
+
29
+ /// This struct holds all the state needed for tracking what workflow runs are currently cached
30
+ /// and how WFTs should be dispatched to them, etc.
31
+ ///
32
+ /// See [WFStream::build] for more
33
+ pub(crate) struct WFStream {
34
+ runs: RunCache,
35
+ /// Buffered polls for new runs which need a cache slot to open up before we can handle them
36
+ buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
37
+
38
+ /// Client for accessing server for history pagination etc.
39
+ client: Arc<dyn WorkerClient>,
40
+
41
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
+ wft_semaphore: MeteredSemaphore,
43
+ shutdown_token: CancellationToken,
44
+ ignore_evicts_on_shutdown: bool,
45
+
46
+ metrics: MetricsContext,
47
+ }
48
+ impl WFStream {
49
+ fn record_span_fields(&mut self, run_id: &str, span: &Span) {
50
+ if let Some(run_handle) = self.runs.get_mut(run_id) {
51
+ if let Some(spid) = span.id() {
52
+ if run_handle.recorded_span_ids.contains(&spid) {
53
+ return;
54
+ }
55
+ run_handle.recorded_span_ids.insert(spid);
56
+
57
+ if let Some(wid) = run_handle.wft.as_ref().map(|wft| &wft.info.wf_id) {
58
+ span.record("workflow_id", wid.as_str());
59
+ }
60
+ }
61
+ }
62
+ }
63
+ }
64
+
65
+ /// All possible inputs to the [WFStream]
66
+ #[derive(derive_more::From, Debug)]
67
+ enum WFStreamInput {
68
+ NewWft(PermittedWFT),
69
+ Local(LocalInput),
70
+ /// The stream given to us which represents the poller (or a mock) terminated.
71
+ PollerDead,
72
+ /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
73
+ /// error while polling
74
+ PollerError(tonic::Status),
75
+ }
76
+ impl From<RunUpdateResponse> for WFStreamInput {
77
+ fn from(r: RunUpdateResponse) -> Self {
78
+ WFStreamInput::Local(LocalInput {
79
+ input: LocalInputs::RunUpdateResponse(r.kind),
80
+ span: r.span,
81
+ })
82
+ }
83
+ }
84
+ /// A non-poller-received input to the [WFStream]
85
+ #[derive(derive_more::DebugCustom)]
86
+ #[debug(fmt = "LocalInput {{ {:?} }}", input)]
87
+ pub(super) struct LocalInput {
88
+ pub input: LocalInputs,
89
+ pub span: Span,
90
+ }
91
+ /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
92
+ /// new polls.
93
+ #[derive(Debug, derive_more::From)]
94
+ pub(super) enum LocalInputs {
95
+ Completion(WFActCompleteMsg),
96
+ LocalResolution(LocalResolutionMsg),
97
+ PostActivation(PostActivationMsg),
98
+ RunUpdateResponse(RunUpdateResponseKind),
99
+ RequestEviction(RequestEvictMsg),
100
+ GetStateInfo(GetStateInfoMsg),
101
+ }
102
+ impl LocalInputs {
103
+ fn run_id(&self) -> Option<&str> {
104
+ Some(match self {
105
+ LocalInputs::Completion(c) => c.completion.run_id(),
106
+ LocalInputs::LocalResolution(lr) => &lr.run_id,
107
+ LocalInputs::PostActivation(pa) => &pa.run_id,
108
+ LocalInputs::RunUpdateResponse(rur) => rur.run_id(),
109
+ LocalInputs::RequestEviction(re) => &re.run_id,
110
+ LocalInputs::GetStateInfo(_) => return None,
111
+ })
112
+ }
113
+ }
114
+ #[derive(Debug, derive_more::From)]
115
+ #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
116
+ enum ExternalPollerInputs {
117
+ NewWft(PermittedWFT),
118
+ PollerDead,
119
+ PollerError(tonic::Status),
120
+ }
121
+ impl From<ExternalPollerInputs> for WFStreamInput {
122
+ fn from(l: ExternalPollerInputs) -> Self {
123
+ match l {
124
+ ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
125
+ ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
126
+ ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
127
+ }
128
+ }
129
+ }
130
+
131
+ impl WFStream {
132
+ /// Constructs workflow state management and returns a stream which outputs activations.
133
+ ///
134
+ /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
135
+ /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
136
+ /// come down.
137
+ /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
138
+ /// completions, local activities finishing, etc. See [LocalInputs].
139
+ ///
140
+ /// These inputs are combined, along with an internal feedback channel for run-specific updates,
141
+ /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
142
+ /// action on those inputs, and then may yield activations.
143
+ ///
144
+ /// Updating runs may need to do async work like fetching additional history. In order to
145
+ /// facilitate this, each run lives in its own task which is communicated with by sending
146
+ /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
147
+ pub(super) fn build(
148
+ basics: WorkflowBasics,
149
+ external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
150
+ local_rx: impl Stream<Item = LocalInput> + Send + 'static,
151
+ client: Arc<dyn WorkerClient>,
152
+ local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
153
+ + Send
154
+ + Sync
155
+ + 'static,
156
+ ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
157
+ let wft_semaphore = MeteredSemaphore::new(
158
+ basics.max_outstanding_wfts,
159
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
160
+ MetricsContext::available_task_slots,
161
+ );
162
+ let wft_sem_clone = wft_semaphore.clone();
163
+ let proceeder = stream::unfold(wft_sem_clone, |sem| async move {
164
+ Some((sem.acquire_owned().await.unwrap(), sem))
165
+ });
166
+ let poller_wfts = stream_when_allowed(external_wfts, proceeder);
167
+ let (run_update_tx, run_update_rx) = unbounded_channel();
168
+ let local_rx = stream::select(
169
+ local_rx.map(Into::into),
170
+ UnboundedReceiverStream::new(run_update_rx).map(Into::into),
171
+ );
172
+ let all_inputs = stream::select_with_strategy(
173
+ local_rx,
174
+ poller_wfts
175
+ .map(|(wft, permit)| match wft {
176
+ Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
177
+ Err(e) => ExternalPollerInputs::PollerError(e),
178
+ })
179
+ .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
180
+ .map(Into::into)
181
+ .boxed(),
182
+ // Priority always goes to the local stream
183
+ |_: &mut ()| PollNext::Left,
184
+ );
185
+ let mut state = WFStream {
186
+ buffered_polls_need_cache_slot: Default::default(),
187
+ runs: RunCache::new(
188
+ basics.max_cached_workflows,
189
+ basics.namespace.clone(),
190
+ run_update_tx,
191
+ Arc::new(local_activity_request_sink),
192
+ basics.metrics.clone(),
193
+ ),
194
+ client,
195
+ wft_semaphore,
196
+ shutdown_token: basics.shutdown_token,
197
+ ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
198
+ metrics: basics.metrics,
199
+ };
200
+ all_inputs
201
+ .map(move |action| {
202
+ let span = span!(Level::DEBUG, "new_stream_input", action=?action);
203
+ let _span_g = span.enter();
204
+
205
+ let maybe_activation = match action {
206
+ WFStreamInput::NewWft(pwft) => {
207
+ debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
208
+ state.instantiate_or_update(pwft);
209
+ None
210
+ }
211
+ WFStreamInput::Local(local_input) => {
212
+ let _span_g = local_input.span.enter();
213
+ if let Some(rid) = local_input.input.run_id() {
214
+ state.record_span_fields(rid, &local_input.span);
215
+ }
216
+ match local_input.input {
217
+ LocalInputs::RunUpdateResponse(resp) => {
218
+ state.process_run_update_response(resp)
219
+ }
220
+ LocalInputs::Completion(completion) => {
221
+ state.process_completion(completion);
222
+ None
223
+ }
224
+ LocalInputs::PostActivation(report) => {
225
+ state.process_post_activation(report);
226
+ None
227
+ }
228
+ LocalInputs::LocalResolution(res) => {
229
+ state.local_resolution(res);
230
+ None
231
+ }
232
+ LocalInputs::RequestEviction(evict) => {
233
+ state.request_eviction(evict);
234
+ None
235
+ }
236
+ LocalInputs::GetStateInfo(gsi) => {
237
+ let _ = gsi.response_tx.send(WorkflowStateInfo {
238
+ cached_workflows: state.runs.len(),
239
+ outstanding_wft: state.outstanding_wfts(),
240
+ available_wft_permits: state.wft_semaphore.available_permits(),
241
+ });
242
+ None
243
+ }
244
+ }
245
+ }
246
+ WFStreamInput::PollerDead => {
247
+ debug!("WFT poller died, shutting down");
248
+ state.shutdown_token.cancel();
249
+ None
250
+ }
251
+ WFStreamInput::PollerError(e) => {
252
+ warn!("WFT poller errored, shutting down");
253
+ return Err(PollWfError::TonicError(e));
254
+ }
255
+ };
256
+
257
+ if let Some(ref act) = maybe_activation {
258
+ if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
259
+ run_handle.insert_outstanding_activation(act);
260
+ } else {
261
+ dbg_panic!("Tried to insert activation for missing run!");
262
+ }
263
+ }
264
+ state.reconcile_buffered();
265
+ if state.shutdown_done() {
266
+ return Err(PollWfError::ShutDown);
267
+ }
268
+
269
+ Ok(maybe_activation)
270
+ })
271
+ .filter_map(|o| {
272
+ future::ready(match o {
273
+ Ok(None) => None,
274
+ Ok(Some(v)) => Some(Ok(v)),
275
+ Err(e) => {
276
+ if !matches!(e, PollWfError::ShutDown) {
277
+ error!(
278
+ "Workflow processing encountered fatal error and must shut down {:?}",
279
+ e
280
+ );
281
+ }
282
+ Some(Err(e))
283
+ }
284
+ })
285
+ })
286
+ // Stop the stream once we have shut down
287
+ .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
288
+ }
289
+
290
+ fn process_run_update_response(
291
+ &mut self,
292
+ resp: RunUpdateResponseKind,
293
+ ) -> Option<ActivationOrAuto> {
294
+ debug!(resp=%resp, "Processing run update response from machines");
295
+ match resp {
296
+ RunUpdateResponseKind::Good(mut resp) => {
297
+ let run_handle = self
298
+ .runs
299
+ .get_mut(&resp.run_id)
300
+ .expect("Workflow must exist, it just sent us an update response");
301
+ run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
302
+ run_handle.more_pending_work = resp.more_pending_work;
303
+ run_handle.last_action_acked = true;
304
+ run_handle.most_recently_processed_event_number =
305
+ resp.most_recently_processed_event_number;
306
+
307
+ let r = match resp.outgoing_activation {
308
+ Some(ActivationOrAuto::LangActivation(mut activation)) => {
309
+ if resp.in_response_to_wft {
310
+ let wft = run_handle
311
+ .wft
312
+ .as_mut()
313
+ .expect("WFT must exist for run just updated with one");
314
+ // If there are in-poll queries, insert jobs for those queries into the
315
+ // activation, but only if we hit the cache. If we didn't, those queries
316
+ // will need to be dealt with once replay is over
317
+ if wft.hit_cache {
318
+ put_queries_in_act(&mut activation, wft);
319
+ }
320
+ }
321
+
322
+ if activation.jobs.is_empty() {
323
+ dbg_panic!("Should not send lang activation with no jobs");
324
+ }
325
+ Some(ActivationOrAuto::LangActivation(activation))
326
+ }
327
+ Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
328
+ if let Some(wft) = run_handle.wft.as_mut() {
329
+ put_queries_in_act(&mut act, wft);
330
+ Some(ActivationOrAuto::LangActivation(act))
331
+ } else {
332
+ dbg_panic!("Ready for queries but no WFT!");
333
+ None
334
+ }
335
+ }
336
+ a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
337
+ None => {
338
+ // If the response indicates there is no activation to send yet but there
339
+ // is more pending work, we should check again.
340
+ if run_handle.more_pending_work {
341
+ run_handle.check_more_activations();
342
+ None
343
+ } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
344
+ // If a run update came back and had nothing to do, but we're trying to
345
+ // evict, just do that now as long as there's no other outstanding work.
346
+ if run_handle.activation.is_none() && !run_handle.more_pending_work {
347
+ let mut evict_act = create_evict_activation(
348
+ resp.run_id,
349
+ reason.message.clone(),
350
+ reason.reason,
351
+ );
352
+ evict_act.history_length =
353
+ run_handle.most_recently_processed_event_number as u32;
354
+ Some(ActivationOrAuto::LangActivation(evict_act))
355
+ } else {
356
+ None
357
+ }
358
+ } else {
359
+ None
360
+ }
361
+ }
362
+ };
363
+ if let Some(f) = resp.fulfillable_complete.take() {
364
+ f.fulfill();
365
+ }
366
+
367
+ // After each run update, check if it's ready to handle any buffered poll
368
+ if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
369
+ && !run_handle.has_any_pending_work(false, true)
370
+ {
371
+ if let Some(bufft) = run_handle.buffered_resp.take() {
372
+ self.instantiate_or_update(bufft);
373
+ }
374
+ }
375
+ r
376
+ }
377
+ RunUpdateResponseKind::Fail(fail) => {
378
+ if let Some(r) = self.runs.get_mut(&fail.run_id) {
379
+ r.last_action_acked = true;
380
+ }
381
+
382
+ if let Some(resp_chan) = fail.completion_resp {
383
+ // Automatically fail the workflow task in the event we couldn't update machines
384
+ let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
385
+ WorkflowTaskFailedCause::NonDeterministicError
386
+ } else {
387
+ WorkflowTaskFailedCause::Unspecified
388
+ };
389
+ let wft_fail_str = format!("{:?}", fail.err);
390
+ self.failed_completion(
391
+ fail.run_id,
392
+ fail_cause,
393
+ fail.err.evict_reason(),
394
+ TFailure::application_failure(wft_fail_str, false).into(),
395
+ resp_chan,
396
+ );
397
+ } else {
398
+ // TODO: This should probably also fail workflow tasks, but that wasn't
399
+ // implemented pre-refactor either.
400
+ warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
401
+ self.request_eviction(RequestEvictMsg {
402
+ run_id: fail.run_id,
403
+ message: format!("Error while updating workflow: {:?}", fail.err),
404
+ reason: fail.err.evict_reason(),
405
+ });
406
+ }
407
+ None
408
+ }
409
+ }
410
+ }
411
+
412
+ #[instrument(skip(self, pwft),
413
+ fields(run_id=%pwft.wft.workflow_execution.run_id,
414
+ workflow_id=%pwft.wft.workflow_execution.workflow_id))]
415
+ fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
416
+ let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
417
+ (w.wft, w.permit)
418
+ } else {
419
+ return;
420
+ };
421
+
422
+ let run_id = work.workflow_execution.run_id.clone();
423
+ // If our cache is full and this WFT is for an unseen run we must first evict a run before
424
+ // we can deal with this task. So, buffer the task in that case.
425
+ if !self.runs.has_run(&run_id) && self.runs.is_full() {
426
+ self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
427
+ return;
428
+ }
429
+
430
+ let start_event_id = work.history.events.first().map(|e| e.event_id);
431
+ debug!(
432
+ run_id = %run_id,
433
+ task_token = %&work.task_token,
434
+ history_length = %work.history.events.len(),
435
+ start_event_id = ?start_event_id,
436
+ has_legacy_query = %work.legacy_query.is_some(),
437
+ attempt = %work.attempt,
438
+ "Applying new workflow task from server"
439
+ );
440
+
441
+ let wft_info = WorkflowTaskInfo {
442
+ attempt: work.attempt,
443
+ task_token: work.task_token,
444
+ wf_id: work.workflow_execution.workflow_id.clone(),
445
+ };
446
+ let poll_resp_is_incremental = work
447
+ .history
448
+ .events
449
+ .get(0)
450
+ .map(|ev| ev.event_id > 1)
451
+ .unwrap_or_default();
452
+ let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
453
+
454
+ let mut did_miss_cache = !poll_resp_is_incremental;
455
+
456
+ let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
457
+ debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
458
+ cache. Will fetch history");
459
+ self.metrics.sticky_cache_miss();
460
+ did_miss_cache = true;
461
+ NextPageToken::FetchFromStart
462
+ } else {
463
+ work.next_page_token.into()
464
+ };
465
+ let history_update = HistoryUpdate::new(
466
+ HistoryPaginator::new(
467
+ work.history,
468
+ work.workflow_execution.workflow_id.clone(),
469
+ run_id.clone(),
470
+ page_token,
471
+ self.client.clone(),
472
+ ),
473
+ work.previous_started_event_id,
474
+ );
475
+ let legacy_query_from_poll = work
476
+ .legacy_query
477
+ .take()
478
+ .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
479
+
480
+ let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
481
+ if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
482
+ error!(
483
+ "Server issued both normal and legacy queries. This should not happen. Please \
484
+ file a bug report."
485
+ );
486
+ self.request_eviction(RequestEvictMsg {
487
+ run_id,
488
+ message: "Server issued both normal and legacy query".to_string(),
489
+ reason: EvictionReason::Fatal,
490
+ });
491
+ return;
492
+ }
493
+ if let Some(lq) = legacy_query_from_poll {
494
+ pending_queries.push(lq);
495
+ }
496
+
497
+ let start_time = Instant::now();
498
+ let run_handle = self.runs.instantiate_or_update(
499
+ &run_id,
500
+ &work.workflow_execution.workflow_id,
501
+ &work.workflow_type,
502
+ history_update,
503
+ start_time,
504
+ );
505
+ run_handle.wft = Some(OutstandingTask {
506
+ info: wft_info,
507
+ hit_cache: !did_miss_cache,
508
+ pending_queries,
509
+ start_time,
510
+ permit,
511
+ })
512
+ }
513
+
514
+ fn process_completion(&mut self, complete: WFActCompleteMsg) {
515
+ match complete.completion {
516
+ ValidatedCompletion::Success { run_id, commands } => {
517
+ self.successful_completion(run_id, commands, complete.response_tx);
518
+ }
519
+ ValidatedCompletion::Fail { run_id, failure } => {
520
+ self.failed_completion(
521
+ run_id,
522
+ WorkflowTaskFailedCause::Unspecified,
523
+ EvictionReason::LangFail,
524
+ failure,
525
+ complete.response_tx,
526
+ );
527
+ }
528
+ }
529
+ // Always queue evictions after completion when we have a zero-size cache
530
+ if self.runs.cache_capacity() == 0 {
531
+ self.request_eviction_of_lru_run();
532
+ }
533
+ }
534
+
535
+ fn successful_completion(
536
+ &mut self,
537
+ run_id: String,
538
+ mut commands: Vec<WFCommand>,
539
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
540
+ ) {
541
+ let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
542
+ let (task_token, has_pending_query, start_time) =
543
+ if let Some(entry) = self.get_task(&run_id) {
544
+ (
545
+ entry.info.task_token.clone(),
546
+ !entry.pending_queries.is_empty(),
547
+ entry.start_time,
548
+ )
549
+ } else {
550
+ if !activation_was_only_eviction {
551
+ // Not an error if this was an eviction, since it's normal to issue eviction
552
+ // activations without an associated workflow task in that case.
553
+ dbg_panic!(
554
+ "Attempted to complete activation for run {} without associated workflow task",
555
+ run_id
556
+ );
557
+ }
558
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
559
+ return;
560
+ };
561
+
562
+ // If the only command from the activation is a legacy query response, that means we need
563
+ // to respond differently than a typical activation.
564
+ if matches!(&commands.as_slice(),
565
+ &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
566
+ {
567
+ let qr = match commands.remove(0) {
568
+ WFCommand::QueryResponse(qr) => qr,
569
+ _ => unreachable!("We just verified this is the only command"),
570
+ };
571
+ self.reply_to_complete(
572
+ &run_id,
573
+ ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
574
+ task_token,
575
+ action: ActivationAction::RespondLegacyQuery {
576
+ result: Box::new(qr),
577
+ },
578
+ }),
579
+ resp_chan,
580
+ );
581
+ } else {
582
+ // First strip out query responses from other commands that actually affect machines
583
+ // Would be prettier with `drain_filter`
584
+ let mut i = 0;
585
+ let mut query_responses = vec![];
586
+ while i < commands.len() {
587
+ if matches!(commands[i], WFCommand::QueryResponse(_)) {
588
+ if let WFCommand::QueryResponse(qr) = commands.remove(i) {
589
+ query_responses.push(qr);
590
+ }
591
+ } else {
592
+ i += 1;
593
+ }
594
+ }
595
+
596
+ let activation_was_eviction = self.activation_has_eviction(&run_id);
597
+ if let Some(rh) = self.runs.get_mut(&run_id) {
598
+ rh.send_completion(RunActivationCompletion {
599
+ task_token,
600
+ start_time,
601
+ commands,
602
+ activation_was_eviction,
603
+ activation_was_only_eviction,
604
+ has_pending_query,
605
+ query_responses,
606
+ resp_chan: Some(resp_chan),
607
+ });
608
+ } else {
609
+ dbg_panic!("Run {} missing during completion", run_id);
610
+ }
611
+ };
612
+ }
613
+
614
+ fn failed_completion(
615
+ &mut self,
616
+ run_id: String,
617
+ cause: WorkflowTaskFailedCause,
618
+ reason: EvictionReason,
619
+ failure: Failure,
620
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
621
+ ) {
622
+ let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
623
+ tt
624
+ } else {
625
+ dbg_panic!(
626
+ "No workflow task for run id {} found when trying to fail activation",
627
+ run_id
628
+ );
629
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
630
+ return;
631
+ };
632
+
633
+ if let Some(m) = self.run_metrics(&run_id) {
634
+ m.wf_task_failed();
635
+ }
636
+ let message = format!("Workflow activation completion failed: {:?}", &failure);
637
+ // Blow up any cached data associated with the workflow
638
+ let should_report = match self.request_eviction(RequestEvictMsg {
639
+ run_id: run_id.clone(),
640
+ message,
641
+ reason,
642
+ }) {
643
+ EvictionRequestResult::EvictionRequested(Some(attempt))
644
+ | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
645
+ _ => false,
646
+ };
647
+ // If the outstanding WFT is a legacy query task, report that we need to fail it
648
+ let outcome = if self
649
+ .runs
650
+ .get(&run_id)
651
+ .map(|rh| rh.pending_work_is_legacy_query())
652
+ .unwrap_or_default()
653
+ {
654
+ ActivationCompleteOutcome::ReportWFTFail(
655
+ FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
656
+ )
657
+ } else if should_report {
658
+ ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
659
+ tt, cause, failure,
660
+ ))
661
+ } else {
662
+ ActivationCompleteOutcome::DoNothing
663
+ };
664
+ self.reply_to_complete(&run_id, outcome, resp_chan);
665
+ }
666
+
667
+ fn process_post_activation(&mut self, report: PostActivationMsg) {
668
+ let run_id = &report.run_id;
669
+
670
+ // If we reported to server, we always want to mark it complete.
671
+ let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
672
+
673
+ if self
674
+ .get_activation(run_id)
675
+ .map(|a| a.has_eviction())
676
+ .unwrap_or_default()
677
+ {
678
+ self.evict_run(run_id);
679
+ };
680
+
681
+ if let Some(wft) = report.wft_from_complete {
682
+ debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
683
+ if let Some(t) = maybe_t {
684
+ self.instantiate_or_update(PermittedWFT {
685
+ wft,
686
+ permit: t.permit,
687
+ })
688
+ }
689
+ }
690
+
691
+ if let Some(rh) = self.runs.get_mut(run_id) {
692
+ // Delete the activation
693
+ rh.activation.take();
694
+ // Attempt to produce the next activation if needed
695
+ rh.check_more_activations();
696
+ }
697
+ }
698
+
699
+ fn local_resolution(&mut self, msg: LocalResolutionMsg) {
700
+ let run_id = msg.run_id;
701
+ if let Some(rh) = self.runs.get_mut(&run_id) {
702
+ rh.send_local_resolution(msg.res)
703
+ } else {
704
+ // It isn't an explicit error if the machine is missing when a local activity resolves.
705
+ // This can happen if an activity reports a timeout after we stopped caring about it.
706
+ debug!(run_id = %run_id,
707
+ "Tried to resolve a local activity for a run we are no longer tracking");
708
+ }
709
+ }
710
+
711
+ /// Request a workflow eviction. This will (eventually, after replay is done) queue up an
712
+ /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
713
+ /// until lang replies to that activation
714
+ fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
715
+ let activation_has_eviction = self.activation_has_eviction(&info.run_id);
716
+ if let Some(rh) = self.runs.get_mut(&info.run_id) {
717
+ let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
718
+ if !activation_has_eviction && rh.trying_to_evict.is_none() {
719
+ debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
720
+ rh.trying_to_evict = Some(info);
721
+ rh.check_more_activations();
722
+ EvictionRequestResult::EvictionRequested(attempts)
723
+ } else {
724
+ EvictionRequestResult::EvictionAlreadyRequested(attempts)
725
+ }
726
+ } else {
727
+ debug!(run_id=%info.run_id, "Eviction requested for unknown run");
728
+ EvictionRequestResult::NotFound
729
+ }
730
+ }
731
+
732
+ fn request_eviction_of_lru_run(&mut self) -> EvictionRequestResult {
733
+ if let Some(lru_run_id) = self.runs.current_lru_run() {
734
+ let run_id = lru_run_id.to_string();
735
+ self.request_eviction(RequestEvictMsg {
736
+ run_id,
737
+ message: "Workflow cache full".to_string(),
738
+ reason: EvictionReason::CacheFull,
739
+ })
740
+ } else {
741
+ // This branch shouldn't really be possible
742
+ EvictionRequestResult::NotFound
743
+ }
744
+ }
745
+
746
+ /// Evict a workflow from the cache by its run id. Any existing pending activations will be
747
+ /// destroyed, and any outstanding activations invalidated.
748
+ fn evict_run(&mut self, run_id: &str) {
749
+ debug!(run_id=%run_id, "Evicting run");
750
+
751
+ let mut did_take_buff = false;
752
+ // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
753
+ // there was one.
754
+ if let Some(mut rh) = self.runs.remove(run_id) {
755
+ rh.handle.abort();
756
+
757
+ if let Some(buff) = rh.buffered_resp.take() {
758
+ self.instantiate_or_update(buff);
759
+ did_take_buff = true;
760
+ }
761
+ }
762
+
763
+ if !did_take_buff {
764
+ // If there wasn't a buffered poll, there might be one for a different run which needs
765
+ // a free cache slot, and now there is.
766
+ if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
767
+ self.instantiate_or_update(buff);
768
+ }
769
+ }
770
+ }
771
+
772
+ fn complete_wft(
773
+ &mut self,
774
+ run_id: &str,
775
+ reported_wft_to_server: bool,
776
+ ) -> Option<OutstandingTask> {
777
+ // If the WFT completion wasn't sent to the server, but we did see the final event, we still
778
+ // want to clear the workflow task. This can really only happen in replay testing, where we
779
+ // will generate poll responses with complete history but no attached query, and such a WFT
780
+ // would never really exist. The server wouldn't send a workflow task with nothing to do,
781
+ // but they are very useful for testing complete replay.
782
+ let saw_final = self
783
+ .runs
784
+ .get(run_id)
785
+ .map(|r| r.have_seen_terminal_event)
786
+ .unwrap_or_default();
787
+ if !saw_final && !reported_wft_to_server {
788
+ return None;
789
+ }
790
+
791
+ if let Some(rh) = self.runs.get_mut(run_id) {
792
+ // Can't mark the WFT complete if there are pending queries, as doing so would destroy
793
+ // them.
794
+ if rh
795
+ .wft
796
+ .as_ref()
797
+ .map(|wft| !wft.pending_queries.is_empty())
798
+ .unwrap_or_default()
799
+ {
800
+ return None;
801
+ }
802
+
803
+ debug!("Marking WFT completed");
804
+ let retme = rh.wft.take();
805
+ if let Some(ot) = &retme {
806
+ if let Some(m) = self.run_metrics(run_id) {
807
+ m.wf_task_latency(ot.start_time.elapsed());
808
+ }
809
+ }
810
+ retme
811
+ } else {
812
+ None
813
+ }
814
+ }
815
+
816
+ /// Stores some work if there is any outstanding WFT or activation for the run. If there was
817
+ /// not, returns the work back out inside the option.
818
+ fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
819
+ let run_id = &work.wft.workflow_execution.run_id;
820
+ if let Some(mut run) = self.runs.get_mut(run_id) {
821
+ let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
822
+ let has_wft = run.wft.is_some();
823
+ let has_activation = run.activation.is_some();
824
+ if has_wft
825
+ || has_activation
826
+ || about_to_issue_evict
827
+ || run.more_pending_work
828
+ || !run.last_action_acked
829
+ {
830
+ debug!(run_id = %run_id, run = ?run,
831
+ "Got new WFT for a run with outstanding work, buffering it");
832
+ run.buffered_resp = Some(work);
833
+ None
834
+ } else {
835
+ Some(work)
836
+ }
837
+ } else {
838
+ Some(work)
839
+ }
840
+ }
841
+
842
+ fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
843
+ debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
844
+ // If there's already a buffered poll for the run, replace it.
845
+ if let Some(rh) = self
846
+ .buffered_polls_need_cache_slot
847
+ .iter_mut()
848
+ .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
849
+ {
850
+ *rh = work;
851
+ } else {
852
+ // Otherwise push it to the back
853
+ self.buffered_polls_need_cache_slot.push_back(work);
854
+ }
855
+ }
856
+
857
+ /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
858
+ /// waiting on a cache slot
859
+ fn reconcile_buffered(&mut self) {
860
+ // We must ensure that there are at least as many pending evictions as there are tasks
861
+ // that we might need to un-buffer (skipping runs which already have buffered tasks for
862
+ // themselves)
863
+ let num_in_buff = self.buffered_polls_need_cache_slot.len();
864
+ let mut evict_these = vec![];
865
+ let num_existing_evictions = self
866
+ .runs
867
+ .runs_lru_order()
868
+ .filter(|(_, h)| h.trying_to_evict.is_some())
869
+ .count();
870
+ let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
871
+ for (rid, handle) in self.runs.runs_lru_order() {
872
+ if num_evicts_needed == 0 {
873
+ break;
874
+ }
875
+ if handle.buffered_resp.is_none() {
876
+ num_evicts_needed -= 1;
877
+ evict_these.push(rid.to_string());
878
+ }
879
+ }
880
+ for run_id in evict_these {
881
+ self.request_eviction(RequestEvictMsg {
882
+ run_id,
883
+ message: "Workflow cache full".to_string(),
884
+ reason: EvictionReason::CacheFull,
885
+ });
886
+ }
887
+ }
888
+
889
+ fn reply_to_complete(
890
+ &self,
891
+ run_id: &str,
892
+ outcome: ActivationCompleteOutcome,
893
+ chan: oneshot::Sender<ActivationCompleteResult>,
894
+ ) {
895
+ let most_recently_processed_event = self
896
+ .runs
897
+ .peek(run_id)
898
+ .map(|rh| rh.most_recently_processed_event_number)
899
+ .unwrap_or_default();
900
+ chan.send(ActivationCompleteResult {
901
+ most_recently_processed_event,
902
+ outcome,
903
+ })
904
+ .expect("Rcv half of activation reply not dropped");
905
+ }
906
+
907
+ fn shutdown_done(&self) -> bool {
908
+ let all_runs_ready = self
909
+ .runs
910
+ .handles()
911
+ .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
912
+ if self.shutdown_token.is_cancelled() && all_runs_ready {
913
+ info!("Workflow shutdown is done");
914
+ true
915
+ } else {
916
+ false
917
+ }
918
+ }
919
+
920
+ fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
921
+ self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
922
+ }
923
+
924
+ fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
925
+ self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
926
+ }
927
+
928
+ fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
929
+ self.runs.get(run_id).map(|r| &r.metrics)
930
+ }
931
+
932
+ fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
933
+ self.runs
934
+ .get(run_id)
935
+ .and_then(|rh| rh.activation)
936
+ .map(OutstandingActivation::has_only_eviction)
937
+ .unwrap_or_default()
938
+ }
939
+
940
+ fn activation_has_eviction(&mut self, run_id: &str) -> bool {
941
+ self.runs
942
+ .get(run_id)
943
+ .and_then(|rh| rh.activation)
944
+ .map(OutstandingActivation::has_eviction)
945
+ .unwrap_or_default()
946
+ }
947
+
948
+ fn outstanding_wfts(&self) -> usize {
949
+ self.runs.handles().filter(|r| r.wft.is_some()).count()
950
+ }
951
+
952
+ // Useful when debugging
953
+ #[allow(dead_code)]
954
+ fn info_dump(&self, run_id: &str) {
955
+ if let Some(r) = self.runs.peek(run_id) {
956
+ info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
957
+ trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
958
+ last_action_acked=r.last_action_acked);
959
+ } else {
960
+ info!(run_id, "Run not found");
961
+ }
962
+ }
963
+ }
964
+
965
+ /// Drains pending queries from the workflow task and appends them to the activation's jobs
966
+ fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
967
+ // Nothing to do if there are no pending queries
968
+ if wft.pending_queries.is_empty() {
969
+ return;
970
+ }
971
+
972
+ let has_legacy = wft.has_pending_legacy_query();
973
+ // Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
974
+ // activity resolves while we've gotten a legacy query after heartbeating.
975
+ if has_legacy && !act.jobs.is_empty() {
976
+ return;
977
+ }
978
+
979
+ debug!(queries=?wft.pending_queries, "Dispatching queries");
980
+ let query_jobs = wft
981
+ .pending_queries
982
+ .drain(..)
983
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
984
+ act.jobs.extend(query_jobs);
985
+ }