temporalio 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (327) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +301 -0
  3. data/bridge/Cargo.lock +2888 -0
  4. data/bridge/Cargo.toml +27 -0
  5. data/bridge/sdk-core/ARCHITECTURE.md +76 -0
  6. data/bridge/sdk-core/Cargo.lock +2606 -0
  7. data/bridge/sdk-core/Cargo.toml +2 -0
  8. data/bridge/sdk-core/LICENSE.txt +23 -0
  9. data/bridge/sdk-core/README.md +104 -0
  10. data/bridge/sdk-core/arch_docs/diagrams/README.md +10 -0
  11. data/bridge/sdk-core/arch_docs/diagrams/sticky_queues.puml +40 -0
  12. data/bridge/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  13. data/bridge/sdk-core/arch_docs/sticky_queues.md +51 -0
  14. data/bridge/sdk-core/client/Cargo.toml +40 -0
  15. data/bridge/sdk-core/client/LICENSE.txt +23 -0
  16. data/bridge/sdk-core/client/src/lib.rs +1286 -0
  17. data/bridge/sdk-core/client/src/metrics.rs +165 -0
  18. data/bridge/sdk-core/client/src/raw.rs +932 -0
  19. data/bridge/sdk-core/client/src/retry.rs +751 -0
  20. data/bridge/sdk-core/client/src/workflow_handle/mod.rs +185 -0
  21. data/bridge/sdk-core/core/Cargo.toml +116 -0
  22. data/bridge/sdk-core/core/LICENSE.txt +23 -0
  23. data/bridge/sdk-core/core/benches/workflow_replay.rs +76 -0
  24. data/bridge/sdk-core/core/src/abstractions.rs +166 -0
  25. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +1014 -0
  26. data/bridge/sdk-core/core/src/core_tests/child_workflows.rs +221 -0
  27. data/bridge/sdk-core/core/src/core_tests/determinism.rs +107 -0
  28. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +925 -0
  29. data/bridge/sdk-core/core/src/core_tests/mod.rs +100 -0
  30. data/bridge/sdk-core/core/src/core_tests/queries.rs +894 -0
  31. data/bridge/sdk-core/core/src/core_tests/replay_flag.rs +65 -0
  32. data/bridge/sdk-core/core/src/core_tests/workers.rs +259 -0
  33. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +124 -0
  34. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +2090 -0
  35. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +515 -0
  36. data/bridge/sdk-core/core/src/lib.rs +282 -0
  37. data/bridge/sdk-core/core/src/pollers/mod.rs +54 -0
  38. data/bridge/sdk-core/core/src/pollers/poll_buffer.rs +297 -0
  39. data/bridge/sdk-core/core/src/protosext/mod.rs +428 -0
  40. data/bridge/sdk-core/core/src/replay/mod.rs +215 -0
  41. data/bridge/sdk-core/core/src/retry_logic.rs +202 -0
  42. data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
  43. data/bridge/sdk-core/core/src/telemetry/metrics.rs +428 -0
  44. data/bridge/sdk-core/core/src/telemetry/mod.rs +407 -0
  45. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +78 -0
  46. data/bridge/sdk-core/core/src/test_help/mod.rs +889 -0
  47. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +580 -0
  48. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +1048 -0
  49. data/bridge/sdk-core/core/src/worker/activities.rs +481 -0
  50. data/bridge/sdk-core/core/src/worker/client/mocks.rs +87 -0
  51. data/bridge/sdk-core/core/src/worker/client.rs +373 -0
  52. data/bridge/sdk-core/core/src/worker/mod.rs +570 -0
  53. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +37 -0
  54. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +101 -0
  55. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +532 -0
  56. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +907 -0
  57. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +294 -0
  58. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +167 -0
  59. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +858 -0
  60. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +136 -0
  61. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +157 -0
  62. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +129 -0
  63. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +1450 -0
  64. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +316 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +178 -0
  66. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +708 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +439 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +435 -0
  69. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +175 -0
  70. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +242 -0
  71. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +96 -0
  72. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +1200 -0
  73. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +272 -0
  74. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  75. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +655 -0
  76. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +1200 -0
  77. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +145 -0
  78. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  79. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +985 -0
  80. data/bridge/sdk-core/core-api/Cargo.toml +32 -0
  81. data/bridge/sdk-core/core-api/LICENSE.txt +23 -0
  82. data/bridge/sdk-core/core-api/src/errors.rs +95 -0
  83. data/bridge/sdk-core/core-api/src/lib.rs +109 -0
  84. data/bridge/sdk-core/core-api/src/telemetry.rs +147 -0
  85. data/bridge/sdk-core/core-api/src/worker.rs +148 -0
  86. data/bridge/sdk-core/etc/deps.svg +162 -0
  87. data/bridge/sdk-core/etc/dynamic-config.yaml +2 -0
  88. data/bridge/sdk-core/etc/otel-collector-config.yaml +36 -0
  89. data/bridge/sdk-core/etc/prometheus.yaml +6 -0
  90. data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
  91. data/bridge/sdk-core/fsm/Cargo.toml +18 -0
  92. data/bridge/sdk-core/fsm/LICENSE.txt +23 -0
  93. data/bridge/sdk-core/fsm/README.md +3 -0
  94. data/bridge/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +27 -0
  95. data/bridge/sdk-core/fsm/rustfsm_procmacro/LICENSE.txt +23 -0
  96. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +647 -0
  97. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/progress.rs +8 -0
  98. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.rs +18 -0
  99. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.stderr +12 -0
  100. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dynamic_dest_pass.rs +41 -0
  101. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.rs +14 -0
  102. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.stderr +11 -0
  103. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_arg_pass.rs +32 -0
  104. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_pass.rs +31 -0
  105. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/medium_complex_pass.rs +46 -0
  106. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.rs +29 -0
  107. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +12 -0
  108. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/simple_pass.rs +32 -0
  109. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.rs +18 -0
  110. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.stderr +5 -0
  111. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.rs +11 -0
  112. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.stderr +5 -0
  113. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.rs +11 -0
  114. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.stderr +5 -0
  115. data/bridge/sdk-core/fsm/rustfsm_trait/Cargo.toml +14 -0
  116. data/bridge/sdk-core/fsm/rustfsm_trait/LICENSE.txt +23 -0
  117. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +249 -0
  118. data/bridge/sdk-core/fsm/src/lib.rs +2 -0
  119. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
  120. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
  121. data/bridge/sdk-core/histories/fail_wf_task.bin +0 -0
  122. data/bridge/sdk-core/histories/timer_workflow_history.bin +0 -0
  123. data/bridge/sdk-core/integ-with-otel.sh +7 -0
  124. data/bridge/sdk-core/protos/api_upstream/README.md +9 -0
  125. data/bridge/sdk-core/protos/api_upstream/api-linter.yaml +40 -0
  126. data/bridge/sdk-core/protos/api_upstream/buf.yaml +9 -0
  127. data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
  128. data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
  129. data/bridge/sdk-core/protos/api_upstream/build/tools.go +29 -0
  130. data/bridge/sdk-core/protos/api_upstream/dependencies/gogoproto/gogo.proto +141 -0
  131. data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
  132. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +89 -0
  133. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +260 -0
  134. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +112 -0
  135. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +47 -0
  136. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +57 -0
  137. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +56 -0
  138. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +170 -0
  139. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +118 -0
  140. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/interaction_type.proto +39 -0
  141. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +51 -0
  142. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +50 -0
  143. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +41 -0
  144. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  145. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +59 -0
  146. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +40 -0
  147. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +122 -0
  148. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +108 -0
  149. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +114 -0
  150. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +56 -0
  151. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +758 -0
  152. data/bridge/sdk-core/protos/api_upstream/temporal/api/interaction/v1/message.proto +87 -0
  153. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +97 -0
  154. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +121 -0
  155. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +80 -0
  156. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +61 -0
  157. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +55 -0
  158. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +379 -0
  159. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +108 -0
  160. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +59 -0
  161. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +146 -0
  162. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +1168 -0
  163. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +415 -0
  164. data/bridge/sdk-core/protos/grpc/health/v1/health.proto +63 -0
  165. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  166. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +79 -0
  167. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +77 -0
  168. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +15 -0
  169. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +30 -0
  170. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  171. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +263 -0
  172. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +304 -0
  173. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +29 -0
  174. data/bridge/sdk-core/protos/testsrv_upstream/api-linter.yaml +38 -0
  175. data/bridge/sdk-core/protos/testsrv_upstream/buf.yaml +13 -0
  176. data/bridge/sdk-core/protos/testsrv_upstream/dependencies/gogoproto/gogo.proto +141 -0
  177. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +63 -0
  178. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +90 -0
  179. data/bridge/sdk-core/rustfmt.toml +1 -0
  180. data/bridge/sdk-core/sdk/Cargo.toml +47 -0
  181. data/bridge/sdk-core/sdk/LICENSE.txt +23 -0
  182. data/bridge/sdk-core/sdk/src/activity_context.rs +230 -0
  183. data/bridge/sdk-core/sdk/src/app_data.rs +37 -0
  184. data/bridge/sdk-core/sdk/src/interceptors.rs +50 -0
  185. data/bridge/sdk-core/sdk/src/lib.rs +794 -0
  186. data/bridge/sdk-core/sdk/src/payload_converter.rs +11 -0
  187. data/bridge/sdk-core/sdk/src/workflow_context/options.rs +295 -0
  188. data/bridge/sdk-core/sdk/src/workflow_context.rs +694 -0
  189. data/bridge/sdk-core/sdk/src/workflow_future.rs +499 -0
  190. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +30 -0
  191. data/bridge/sdk-core/sdk-core-protos/LICENSE.txt +23 -0
  192. data/bridge/sdk-core/sdk-core-protos/build.rs +107 -0
  193. data/bridge/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  194. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +544 -0
  195. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +230 -0
  196. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +1970 -0
  197. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  198. data/bridge/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  199. data/bridge/sdk-core/test-utils/Cargo.toml +36 -0
  200. data/bridge/sdk-core/test-utils/src/canned_histories.rs +1579 -0
  201. data/bridge/sdk-core/test-utils/src/histfetch.rs +28 -0
  202. data/bridge/sdk-core/test-utils/src/lib.rs +650 -0
  203. data/bridge/sdk-core/tests/integ_tests/client_tests.rs +36 -0
  204. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +128 -0
  205. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +221 -0
  206. data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +37 -0
  207. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +133 -0
  208. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +437 -0
  209. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +93 -0
  210. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +878 -0
  211. data/bridge/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  212. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +59 -0
  213. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +58 -0
  214. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +50 -0
  215. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +60 -0
  216. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +54 -0
  217. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +788 -0
  218. data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +53 -0
  219. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +113 -0
  220. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +223 -0
  221. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +93 -0
  222. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +167 -0
  223. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +99 -0
  224. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +131 -0
  225. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +75 -0
  226. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +597 -0
  227. data/bridge/sdk-core/tests/load_tests.rs +191 -0
  228. data/bridge/sdk-core/tests/main.rs +113 -0
  229. data/bridge/sdk-core/tests/runner.rs +93 -0
  230. data/bridge/src/connection.rs +186 -0
  231. data/bridge/src/lib.rs +239 -0
  232. data/bridge/src/runtime.rs +54 -0
  233. data/bridge/src/worker.rs +124 -0
  234. data/ext/Rakefile +9 -0
  235. data/lib/bridge.so +0 -0
  236. data/lib/gen/dependencies/gogoproto/gogo_pb.rb +14 -0
  237. data/lib/gen/temporal/api/batch/v1/message_pb.rb +50 -0
  238. data/lib/gen/temporal/api/command/v1/message_pb.rb +174 -0
  239. data/lib/gen/temporal/api/common/v1/message_pb.rb +69 -0
  240. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +33 -0
  241. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +39 -0
  242. data/lib/gen/temporal/api/enums/v1/common_pb.rb +42 -0
  243. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +68 -0
  244. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +77 -0
  245. data/lib/gen/temporal/api/enums/v1/interaction_type_pb.rb +25 -0
  246. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +37 -0
  247. data/lib/gen/temporal/api/enums/v1/query_pb.rb +31 -0
  248. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +24 -0
  249. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +28 -0
  250. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +30 -0
  251. data/lib/gen/temporal/api/enums/v1/update_pb.rb +23 -0
  252. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +89 -0
  253. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +84 -0
  254. data/lib/gen/temporal/api/failure/v1/message_pb.rb +83 -0
  255. data/lib/gen/temporal/api/filter/v1/message_pb.rb +40 -0
  256. data/lib/gen/temporal/api/history/v1/message_pb.rb +490 -0
  257. data/lib/gen/temporal/api/interaction/v1/message_pb.rb +49 -0
  258. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +63 -0
  259. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +85 -0
  260. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +20 -0
  261. data/lib/gen/temporal/api/query/v1/message_pb.rb +38 -0
  262. data/lib/gen/temporal/api/replication/v1/message_pb.rb +37 -0
  263. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +149 -0
  264. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +73 -0
  265. data/lib/gen/temporal/api/version/v1/message_pb.rb +41 -0
  266. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +111 -0
  267. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +788 -0
  268. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +20 -0
  269. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +58 -0
  270. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +57 -0
  271. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +222 -0
  272. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +57 -0
  273. data/lib/gen/temporal/sdk/core/common/common_pb.rb +22 -0
  274. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +34 -0
  275. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +27 -0
  276. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +165 -0
  277. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +196 -0
  278. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +34 -0
  279. data/lib/temporalio/activity/context.rb +97 -0
  280. data/lib/temporalio/activity/info.rb +67 -0
  281. data/lib/temporalio/activity.rb +85 -0
  282. data/lib/temporalio/bridge/error.rb +8 -0
  283. data/lib/temporalio/bridge.rb +14 -0
  284. data/lib/temporalio/client/implementation.rb +340 -0
  285. data/lib/temporalio/client/workflow_handle.rb +243 -0
  286. data/lib/temporalio/client.rb +131 -0
  287. data/lib/temporalio/connection.rb +751 -0
  288. data/lib/temporalio/data_converter.rb +191 -0
  289. data/lib/temporalio/error/failure.rb +194 -0
  290. data/lib/temporalio/error/workflow_failure.rb +19 -0
  291. data/lib/temporalio/errors.rb +40 -0
  292. data/lib/temporalio/failure_converter/base.rb +26 -0
  293. data/lib/temporalio/failure_converter/basic.rb +319 -0
  294. data/lib/temporalio/failure_converter.rb +7 -0
  295. data/lib/temporalio/interceptor/chain.rb +28 -0
  296. data/lib/temporalio/interceptor/client.rb +123 -0
  297. data/lib/temporalio/payload_codec/base.rb +32 -0
  298. data/lib/temporalio/payload_converter/base.rb +24 -0
  299. data/lib/temporalio/payload_converter/bytes.rb +27 -0
  300. data/lib/temporalio/payload_converter/composite.rb +49 -0
  301. data/lib/temporalio/payload_converter/encoding_base.rb +35 -0
  302. data/lib/temporalio/payload_converter/json.rb +26 -0
  303. data/lib/temporalio/payload_converter/nil.rb +26 -0
  304. data/lib/temporalio/payload_converter.rb +14 -0
  305. data/lib/temporalio/retry_policy.rb +82 -0
  306. data/lib/temporalio/retry_state.rb +35 -0
  307. data/lib/temporalio/runtime.rb +25 -0
  308. data/lib/temporalio/timeout_type.rb +29 -0
  309. data/lib/temporalio/version.rb +3 -0
  310. data/lib/temporalio/worker/activity_runner.rb +92 -0
  311. data/lib/temporalio/worker/activity_worker.rb +138 -0
  312. data/lib/temporalio/worker/reactor.rb +46 -0
  313. data/lib/temporalio/worker/runner.rb +63 -0
  314. data/lib/temporalio/worker/sync_worker.rb +88 -0
  315. data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
  316. data/lib/temporalio/worker.rb +198 -0
  317. data/lib/temporalio/workflow/execution_info.rb +54 -0
  318. data/lib/temporalio/workflow/execution_status.rb +36 -0
  319. data/lib/temporalio/workflow/id_reuse_policy.rb +36 -0
  320. data/lib/temporalio/workflow/query_reject_condition.rb +33 -0
  321. data/lib/temporalio.rb +12 -1
  322. data/lib/thermite_patch.rb +23 -0
  323. data/temporalio.gemspec +45 -0
  324. metadata +566 -9
  325. data/lib/temporal/version.rb +0 -3
  326. data/lib/temporal.rb +0 -4
  327. data/temporal.gemspec +0 -20
@@ -0,0 +1,985 @@
1
+ use crate::{
2
+ abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
3
+ protosext::ValidPollWFTQResponse,
4
+ telemetry::metrics::workflow_worker_type,
5
+ worker::{
6
+ workflow::{history_update::NextPageToken, run_cache::RunCache, *},
7
+ LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
8
+ },
9
+ MetricsContext,
10
+ };
11
+ use futures::{stream, stream::PollNext, Stream, StreamExt};
12
+ use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
13
+ use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
14
+ use temporal_sdk_core_protos::{
15
+ coresdk::{
16
+ workflow_activation::{
17
+ create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
18
+ workflow_activation_job,
19
+ },
20
+ workflow_completion::Failure,
21
+ },
22
+ temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
23
+ };
24
+ use tokio::sync::{mpsc::unbounded_channel, oneshot};
25
+ use tokio_stream::wrappers::UnboundedReceiverStream;
26
+ use tokio_util::sync::CancellationToken;
27
+ use tracing::{Level, Span};
28
+
29
+ /// This struct holds all the state needed for tracking what workflow runs are currently cached
30
+ /// and how WFTs should be dispatched to them, etc.
31
+ ///
32
+ /// See [WFStream::build] for more
33
+ pub(crate) struct WFStream {
34
+ runs: RunCache,
35
+ /// Buffered polls for new runs which need a cache slot to open up before we can handle them
36
+ buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
37
+
38
+ /// Client for accessing server for history pagination etc.
39
+ client: Arc<dyn WorkerClient>,
40
+
41
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
+ wft_semaphore: MeteredSemaphore,
43
+ shutdown_token: CancellationToken,
44
+ ignore_evicts_on_shutdown: bool,
45
+
46
+ metrics: MetricsContext,
47
+ }
48
+ impl WFStream {
49
+ fn record_span_fields(&mut self, run_id: &str, span: &Span) {
50
+ if let Some(run_handle) = self.runs.get_mut(run_id) {
51
+ if let Some(spid) = span.id() {
52
+ if run_handle.recorded_span_ids.contains(&spid) {
53
+ return;
54
+ }
55
+ run_handle.recorded_span_ids.insert(spid);
56
+
57
+ if let Some(wid) = run_handle.wft.as_ref().map(|wft| &wft.info.wf_id) {
58
+ span.record("workflow_id", wid.as_str());
59
+ }
60
+ }
61
+ }
62
+ }
63
+ }
64
+
65
+ /// All possible inputs to the [WFStream]
66
+ #[derive(derive_more::From, Debug)]
67
+ enum WFStreamInput {
68
+ NewWft(PermittedWFT),
69
+ Local(LocalInput),
70
+ /// The stream given to us which represents the poller (or a mock) terminated.
71
+ PollerDead,
72
+ /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
73
+ /// error while polling
74
+ PollerError(tonic::Status),
75
+ }
76
+ impl From<RunUpdateResponse> for WFStreamInput {
77
+ fn from(r: RunUpdateResponse) -> Self {
78
+ WFStreamInput::Local(LocalInput {
79
+ input: LocalInputs::RunUpdateResponse(r.kind),
80
+ span: r.span,
81
+ })
82
+ }
83
+ }
84
+ /// A non-poller-received input to the [WFStream]
85
+ #[derive(derive_more::DebugCustom)]
86
+ #[debug(fmt = "LocalInput {{ {:?} }}", input)]
87
+ pub(super) struct LocalInput {
88
+ pub input: LocalInputs,
89
+ pub span: Span,
90
+ }
91
+ /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
92
+ /// new polls.
93
+ #[derive(Debug, derive_more::From)]
94
+ pub(super) enum LocalInputs {
95
+ Completion(WFActCompleteMsg),
96
+ LocalResolution(LocalResolutionMsg),
97
+ PostActivation(PostActivationMsg),
98
+ RunUpdateResponse(RunUpdateResponseKind),
99
+ RequestEviction(RequestEvictMsg),
100
+ GetStateInfo(GetStateInfoMsg),
101
+ }
102
+ impl LocalInputs {
103
+ fn run_id(&self) -> Option<&str> {
104
+ Some(match self {
105
+ LocalInputs::Completion(c) => c.completion.run_id(),
106
+ LocalInputs::LocalResolution(lr) => &lr.run_id,
107
+ LocalInputs::PostActivation(pa) => &pa.run_id,
108
+ LocalInputs::RunUpdateResponse(rur) => rur.run_id(),
109
+ LocalInputs::RequestEviction(re) => &re.run_id,
110
+ LocalInputs::GetStateInfo(_) => return None,
111
+ })
112
+ }
113
+ }
114
+ #[derive(Debug, derive_more::From)]
115
+ #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
116
+ enum ExternalPollerInputs {
117
+ NewWft(PermittedWFT),
118
+ PollerDead,
119
+ PollerError(tonic::Status),
120
+ }
121
+ impl From<ExternalPollerInputs> for WFStreamInput {
122
+ fn from(l: ExternalPollerInputs) -> Self {
123
+ match l {
124
+ ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
125
+ ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
126
+ ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
127
+ }
128
+ }
129
+ }
130
+
131
+ impl WFStream {
132
+ /// Constructs workflow state management and returns a stream which outputs activations.
133
+ ///
134
+ /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
135
+ /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
136
+ /// come down.
137
+ /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
138
+ /// completions, local activities finishing, etc. See [LocalInputs].
139
+ ///
140
+ /// These inputs are combined, along with an internal feedback channel for run-specific updates,
141
+ /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
142
+ /// action on those inputs, and then may yield activations.
143
+ ///
144
+ /// Updating runs may need to do async work like fetching additional history. In order to
145
+ /// facilitate this, each run lives in its own task which is communicated with by sending
146
+ /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
147
+ pub(super) fn build(
148
+ basics: WorkflowBasics,
149
+ external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
150
+ local_rx: impl Stream<Item = LocalInput> + Send + 'static,
151
+ client: Arc<dyn WorkerClient>,
152
+ local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
153
+ + Send
154
+ + Sync
155
+ + 'static,
156
+ ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
157
+ let wft_semaphore = MeteredSemaphore::new(
158
+ basics.max_outstanding_wfts,
159
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
160
+ MetricsContext::available_task_slots,
161
+ );
162
+ let wft_sem_clone = wft_semaphore.clone();
163
+ let proceeder = stream::unfold(wft_sem_clone, |sem| async move {
164
+ Some((sem.acquire_owned().await.unwrap(), sem))
165
+ });
166
+ let poller_wfts = stream_when_allowed(external_wfts, proceeder);
167
+ let (run_update_tx, run_update_rx) = unbounded_channel();
168
+ let local_rx = stream::select(
169
+ local_rx.map(Into::into),
170
+ UnboundedReceiverStream::new(run_update_rx).map(Into::into),
171
+ );
172
+ let all_inputs = stream::select_with_strategy(
173
+ local_rx,
174
+ poller_wfts
175
+ .map(|(wft, permit)| match wft {
176
+ Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
177
+ Err(e) => ExternalPollerInputs::PollerError(e),
178
+ })
179
+ .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
180
+ .map(Into::into)
181
+ .boxed(),
182
+ // Priority always goes to the local stream
183
+ |_: &mut ()| PollNext::Left,
184
+ );
185
+ let mut state = WFStream {
186
+ buffered_polls_need_cache_slot: Default::default(),
187
+ runs: RunCache::new(
188
+ basics.max_cached_workflows,
189
+ basics.namespace.clone(),
190
+ run_update_tx,
191
+ Arc::new(local_activity_request_sink),
192
+ basics.metrics.clone(),
193
+ ),
194
+ client,
195
+ wft_semaphore,
196
+ shutdown_token: basics.shutdown_token,
197
+ ignore_evicts_on_shutdown: basics.ignore_evicts_on_shutdown,
198
+ metrics: basics.metrics,
199
+ };
200
+ all_inputs
201
+ .map(move |action| {
202
+ let span = span!(Level::DEBUG, "new_stream_input", action=?action);
203
+ let _span_g = span.enter();
204
+
205
+ let maybe_activation = match action {
206
+ WFStreamInput::NewWft(pwft) => {
207
+ debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
208
+ state.instantiate_or_update(pwft);
209
+ None
210
+ }
211
+ WFStreamInput::Local(local_input) => {
212
+ let _span_g = local_input.span.enter();
213
+ if let Some(rid) = local_input.input.run_id() {
214
+ state.record_span_fields(rid, &local_input.span);
215
+ }
216
+ match local_input.input {
217
+ LocalInputs::RunUpdateResponse(resp) => {
218
+ state.process_run_update_response(resp)
219
+ }
220
+ LocalInputs::Completion(completion) => {
221
+ state.process_completion(completion);
222
+ None
223
+ }
224
+ LocalInputs::PostActivation(report) => {
225
+ state.process_post_activation(report);
226
+ None
227
+ }
228
+ LocalInputs::LocalResolution(res) => {
229
+ state.local_resolution(res);
230
+ None
231
+ }
232
+ LocalInputs::RequestEviction(evict) => {
233
+ state.request_eviction(evict);
234
+ None
235
+ }
236
+ LocalInputs::GetStateInfo(gsi) => {
237
+ let _ = gsi.response_tx.send(WorkflowStateInfo {
238
+ cached_workflows: state.runs.len(),
239
+ outstanding_wft: state.outstanding_wfts(),
240
+ available_wft_permits: state.wft_semaphore.available_permits(),
241
+ });
242
+ None
243
+ }
244
+ }
245
+ }
246
+ WFStreamInput::PollerDead => {
247
+ debug!("WFT poller died, shutting down");
248
+ state.shutdown_token.cancel();
249
+ None
250
+ }
251
+ WFStreamInput::PollerError(e) => {
252
+ warn!("WFT poller errored, shutting down");
253
+ return Err(PollWfError::TonicError(e));
254
+ }
255
+ };
256
+
257
+ if let Some(ref act) = maybe_activation {
258
+ if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
259
+ run_handle.insert_outstanding_activation(act);
260
+ } else {
261
+ dbg_panic!("Tried to insert activation for missing run!");
262
+ }
263
+ }
264
+ state.reconcile_buffered();
265
+ if state.shutdown_done() {
266
+ return Err(PollWfError::ShutDown);
267
+ }
268
+
269
+ Ok(maybe_activation)
270
+ })
271
+ .filter_map(|o| {
272
+ future::ready(match o {
273
+ Ok(None) => None,
274
+ Ok(Some(v)) => Some(Ok(v)),
275
+ Err(e) => {
276
+ if !matches!(e, PollWfError::ShutDown) {
277
+ error!(
278
+ "Workflow processing encountered fatal error and must shut down {:?}",
279
+ e
280
+ );
281
+ }
282
+ Some(Err(e))
283
+ }
284
+ })
285
+ })
286
+ // Stop the stream once we have shut down
287
+ .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
288
+ }
289
+
290
+ fn process_run_update_response(
291
+ &mut self,
292
+ resp: RunUpdateResponseKind,
293
+ ) -> Option<ActivationOrAuto> {
294
+ debug!(resp=%resp, "Processing run update response from machines");
295
+ match resp {
296
+ RunUpdateResponseKind::Good(mut resp) => {
297
+ let run_handle = self
298
+ .runs
299
+ .get_mut(&resp.run_id)
300
+ .expect("Workflow must exist, it just sent us an update response");
301
+ run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
302
+ run_handle.more_pending_work = resp.more_pending_work;
303
+ run_handle.last_action_acked = true;
304
+ run_handle.most_recently_processed_event_number =
305
+ resp.most_recently_processed_event_number;
306
+
307
+ let r = match resp.outgoing_activation {
308
+ Some(ActivationOrAuto::LangActivation(mut activation)) => {
309
+ if resp.in_response_to_wft {
310
+ let wft = run_handle
311
+ .wft
312
+ .as_mut()
313
+ .expect("WFT must exist for run just updated with one");
314
+ // If there are in-poll queries, insert jobs for those queries into the
315
+ // activation, but only if we hit the cache. If we didn't, those queries
316
+ // will need to be dealt with once replay is over
317
+ if wft.hit_cache {
318
+ put_queries_in_act(&mut activation, wft);
319
+ }
320
+ }
321
+
322
+ if activation.jobs.is_empty() {
323
+ dbg_panic!("Should not send lang activation with no jobs");
324
+ }
325
+ Some(ActivationOrAuto::LangActivation(activation))
326
+ }
327
+ Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
328
+ if let Some(wft) = run_handle.wft.as_mut() {
329
+ put_queries_in_act(&mut act, wft);
330
+ Some(ActivationOrAuto::LangActivation(act))
331
+ } else {
332
+ dbg_panic!("Ready for queries but no WFT!");
333
+ None
334
+ }
335
+ }
336
+ a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
337
+ None => {
338
+ // If the response indicates there is no activation to send yet but there
339
+ // is more pending work, we should check again.
340
+ if run_handle.more_pending_work {
341
+ run_handle.check_more_activations();
342
+ None
343
+ } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
344
+ // If a run update came back and had nothing to do, but we're trying to
345
+ // evict, just do that now as long as there's no other outstanding work.
346
+ if run_handle.activation.is_none() && !run_handle.more_pending_work {
347
+ let mut evict_act = create_evict_activation(
348
+ resp.run_id,
349
+ reason.message.clone(),
350
+ reason.reason,
351
+ );
352
+ evict_act.history_length =
353
+ run_handle.most_recently_processed_event_number as u32;
354
+ Some(ActivationOrAuto::LangActivation(evict_act))
355
+ } else {
356
+ None
357
+ }
358
+ } else {
359
+ None
360
+ }
361
+ }
362
+ };
363
+ if let Some(f) = resp.fulfillable_complete.take() {
364
+ f.fulfill();
365
+ }
366
+
367
+ // After each run update, check if it's ready to handle any buffered poll
368
+ if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
369
+ && !run_handle.has_any_pending_work(false, true)
370
+ {
371
+ if let Some(bufft) = run_handle.buffered_resp.take() {
372
+ self.instantiate_or_update(bufft);
373
+ }
374
+ }
375
+ r
376
+ }
377
+ RunUpdateResponseKind::Fail(fail) => {
378
+ if let Some(r) = self.runs.get_mut(&fail.run_id) {
379
+ r.last_action_acked = true;
380
+ }
381
+
382
+ if let Some(resp_chan) = fail.completion_resp {
383
+ // Automatically fail the workflow task in the event we couldn't update machines
384
+ let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
385
+ WorkflowTaskFailedCause::NonDeterministicError
386
+ } else {
387
+ WorkflowTaskFailedCause::Unspecified
388
+ };
389
+ let wft_fail_str = format!("{:?}", fail.err);
390
+ self.failed_completion(
391
+ fail.run_id,
392
+ fail_cause,
393
+ fail.err.evict_reason(),
394
+ TFailure::application_failure(wft_fail_str, false).into(),
395
+ resp_chan,
396
+ );
397
+ } else {
398
+ // TODO: This should probably also fail workflow tasks, but that wasn't
399
+ // implemented pre-refactor either.
400
+ warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
401
+ self.request_eviction(RequestEvictMsg {
402
+ run_id: fail.run_id,
403
+ message: format!("Error while updating workflow: {:?}", fail.err),
404
+ reason: fail.err.evict_reason(),
405
+ });
406
+ }
407
+ None
408
+ }
409
+ }
410
+ }
411
+
412
+ #[instrument(skip(self, pwft),
413
+ fields(run_id=%pwft.wft.workflow_execution.run_id,
414
+ workflow_id=%pwft.wft.workflow_execution.workflow_id))]
415
+ fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
416
+ let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
417
+ (w.wft, w.permit)
418
+ } else {
419
+ return;
420
+ };
421
+
422
+ let run_id = work.workflow_execution.run_id.clone();
423
+ // If our cache is full and this WFT is for an unseen run we must first evict a run before
424
+ // we can deal with this task. So, buffer the task in that case.
425
+ if !self.runs.has_run(&run_id) && self.runs.is_full() {
426
+ self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
427
+ return;
428
+ }
429
+
430
+ let start_event_id = work.history.events.first().map(|e| e.event_id);
431
+ debug!(
432
+ run_id = %run_id,
433
+ task_token = %&work.task_token,
434
+ history_length = %work.history.events.len(),
435
+ start_event_id = ?start_event_id,
436
+ has_legacy_query = %work.legacy_query.is_some(),
437
+ attempt = %work.attempt,
438
+ "Applying new workflow task from server"
439
+ );
440
+
441
+ let wft_info = WorkflowTaskInfo {
442
+ attempt: work.attempt,
443
+ task_token: work.task_token,
444
+ wf_id: work.workflow_execution.workflow_id.clone(),
445
+ };
446
+ let poll_resp_is_incremental = work
447
+ .history
448
+ .events
449
+ .get(0)
450
+ .map(|ev| ev.event_id > 1)
451
+ .unwrap_or_default();
452
+ let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
453
+
454
+ let mut did_miss_cache = !poll_resp_is_incremental;
455
+
456
+ let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
457
+ debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
458
+ cache. Will fetch history");
459
+ self.metrics.sticky_cache_miss();
460
+ did_miss_cache = true;
461
+ NextPageToken::FetchFromStart
462
+ } else {
463
+ work.next_page_token.into()
464
+ };
465
+ let history_update = HistoryUpdate::new(
466
+ HistoryPaginator::new(
467
+ work.history,
468
+ work.workflow_execution.workflow_id.clone(),
469
+ run_id.clone(),
470
+ page_token,
471
+ self.client.clone(),
472
+ ),
473
+ work.previous_started_event_id,
474
+ );
475
+ let legacy_query_from_poll = work
476
+ .legacy_query
477
+ .take()
478
+ .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
479
+
480
+ let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
481
+ if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
482
+ error!(
483
+ "Server issued both normal and legacy queries. This should not happen. Please \
484
+ file a bug report."
485
+ );
486
+ self.request_eviction(RequestEvictMsg {
487
+ run_id,
488
+ message: "Server issued both normal and legacy query".to_string(),
489
+ reason: EvictionReason::Fatal,
490
+ });
491
+ return;
492
+ }
493
+ if let Some(lq) = legacy_query_from_poll {
494
+ pending_queries.push(lq);
495
+ }
496
+
497
+ let start_time = Instant::now();
498
+ let run_handle = self.runs.instantiate_or_update(
499
+ &run_id,
500
+ &work.workflow_execution.workflow_id,
501
+ &work.workflow_type,
502
+ history_update,
503
+ start_time,
504
+ );
505
+ run_handle.wft = Some(OutstandingTask {
506
+ info: wft_info,
507
+ hit_cache: !did_miss_cache,
508
+ pending_queries,
509
+ start_time,
510
+ permit,
511
+ })
512
+ }
513
+
514
+ fn process_completion(&mut self, complete: WFActCompleteMsg) {
515
+ match complete.completion {
516
+ ValidatedCompletion::Success { run_id, commands } => {
517
+ self.successful_completion(run_id, commands, complete.response_tx);
518
+ }
519
+ ValidatedCompletion::Fail { run_id, failure } => {
520
+ self.failed_completion(
521
+ run_id,
522
+ WorkflowTaskFailedCause::Unspecified,
523
+ EvictionReason::LangFail,
524
+ failure,
525
+ complete.response_tx,
526
+ );
527
+ }
528
+ }
529
+ // Always queue evictions after completion when we have a zero-size cache
530
+ if self.runs.cache_capacity() == 0 {
531
+ self.request_eviction_of_lru_run();
532
+ }
533
+ }
534
+
535
+ fn successful_completion(
536
+ &mut self,
537
+ run_id: String,
538
+ mut commands: Vec<WFCommand>,
539
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
540
+ ) {
541
+ let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
542
+ let (task_token, has_pending_query, start_time) =
543
+ if let Some(entry) = self.get_task(&run_id) {
544
+ (
545
+ entry.info.task_token.clone(),
546
+ !entry.pending_queries.is_empty(),
547
+ entry.start_time,
548
+ )
549
+ } else {
550
+ if !activation_was_only_eviction {
551
+ // Not an error if this was an eviction, since it's normal to issue eviction
552
+ // activations without an associated workflow task in that case.
553
+ dbg_panic!(
554
+ "Attempted to complete activation for run {} without associated workflow task",
555
+ run_id
556
+ );
557
+ }
558
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
559
+ return;
560
+ };
561
+
562
+ // If the only command from the activation is a legacy query response, that means we need
563
+ // to respond differently than a typical activation.
564
+ if matches!(&commands.as_slice(),
565
+ &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
566
+ {
567
+ let qr = match commands.remove(0) {
568
+ WFCommand::QueryResponse(qr) => qr,
569
+ _ => unreachable!("We just verified this is the only command"),
570
+ };
571
+ self.reply_to_complete(
572
+ &run_id,
573
+ ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
574
+ task_token,
575
+ action: ActivationAction::RespondLegacyQuery {
576
+ result: Box::new(qr),
577
+ },
578
+ }),
579
+ resp_chan,
580
+ );
581
+ } else {
582
+ // First strip out query responses from other commands that actually affect machines
583
+ // Would be prettier with `drain_filter`
584
+ let mut i = 0;
585
+ let mut query_responses = vec![];
586
+ while i < commands.len() {
587
+ if matches!(commands[i], WFCommand::QueryResponse(_)) {
588
+ if let WFCommand::QueryResponse(qr) = commands.remove(i) {
589
+ query_responses.push(qr);
590
+ }
591
+ } else {
592
+ i += 1;
593
+ }
594
+ }
595
+
596
+ let activation_was_eviction = self.activation_has_eviction(&run_id);
597
+ if let Some(rh) = self.runs.get_mut(&run_id) {
598
+ rh.send_completion(RunActivationCompletion {
599
+ task_token,
600
+ start_time,
601
+ commands,
602
+ activation_was_eviction,
603
+ activation_was_only_eviction,
604
+ has_pending_query,
605
+ query_responses,
606
+ resp_chan: Some(resp_chan),
607
+ });
608
+ } else {
609
+ dbg_panic!("Run {} missing during completion", run_id);
610
+ }
611
+ };
612
+ }
613
+
614
+ fn failed_completion(
615
+ &mut self,
616
+ run_id: String,
617
+ cause: WorkflowTaskFailedCause,
618
+ reason: EvictionReason,
619
+ failure: Failure,
620
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
621
+ ) {
622
+ let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
623
+ tt
624
+ } else {
625
+ dbg_panic!(
626
+ "No workflow task for run id {} found when trying to fail activation",
627
+ run_id
628
+ );
629
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
630
+ return;
631
+ };
632
+
633
+ if let Some(m) = self.run_metrics(&run_id) {
634
+ m.wf_task_failed();
635
+ }
636
+ let message = format!("Workflow activation completion failed: {:?}", &failure);
637
+ // Blow up any cached data associated with the workflow
638
+ let should_report = match self.request_eviction(RequestEvictMsg {
639
+ run_id: run_id.clone(),
640
+ message,
641
+ reason,
642
+ }) {
643
+ EvictionRequestResult::EvictionRequested(Some(attempt))
644
+ | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
645
+ _ => false,
646
+ };
647
+ // If the outstanding WFT is a legacy query task, report that we need to fail it
648
+ let outcome = if self
649
+ .runs
650
+ .get(&run_id)
651
+ .map(|rh| rh.pending_work_is_legacy_query())
652
+ .unwrap_or_default()
653
+ {
654
+ ActivationCompleteOutcome::ReportWFTFail(
655
+ FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
656
+ )
657
+ } else if should_report {
658
+ ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
659
+ tt, cause, failure,
660
+ ))
661
+ } else {
662
+ ActivationCompleteOutcome::DoNothing
663
+ };
664
+ self.reply_to_complete(&run_id, outcome, resp_chan);
665
+ }
666
+
667
+ fn process_post_activation(&mut self, report: PostActivationMsg) {
668
+ let run_id = &report.run_id;
669
+
670
+ // If we reported to server, we always want to mark it complete.
671
+ let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
672
+
673
+ if self
674
+ .get_activation(run_id)
675
+ .map(|a| a.has_eviction())
676
+ .unwrap_or_default()
677
+ {
678
+ self.evict_run(run_id);
679
+ };
680
+
681
+ if let Some(wft) = report.wft_from_complete {
682
+ debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
683
+ if let Some(t) = maybe_t {
684
+ self.instantiate_or_update(PermittedWFT {
685
+ wft,
686
+ permit: t.permit,
687
+ })
688
+ }
689
+ }
690
+
691
+ if let Some(rh) = self.runs.get_mut(run_id) {
692
+ // Delete the activation
693
+ rh.activation.take();
694
+ // Attempt to produce the next activation if needed
695
+ rh.check_more_activations();
696
+ }
697
+ }
698
+
699
+ fn local_resolution(&mut self, msg: LocalResolutionMsg) {
700
+ let run_id = msg.run_id;
701
+ if let Some(rh) = self.runs.get_mut(&run_id) {
702
+ rh.send_local_resolution(msg.res)
703
+ } else {
704
+ // It isn't an explicit error if the machine is missing when a local activity resolves.
705
+ // This can happen if an activity reports a timeout after we stopped caring about it.
706
+ debug!(run_id = %run_id,
707
+ "Tried to resolve a local activity for a run we are no longer tracking");
708
+ }
709
+ }
710
+
711
+ /// Request a workflow eviction. This will (eventually, after replay is done) queue up an
712
+ /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
713
+ /// until lang replies to that activation
714
+ fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
715
+ let activation_has_eviction = self.activation_has_eviction(&info.run_id);
716
+ if let Some(rh) = self.runs.get_mut(&info.run_id) {
717
+ let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
718
+ if !activation_has_eviction && rh.trying_to_evict.is_none() {
719
+ debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
720
+ rh.trying_to_evict = Some(info);
721
+ rh.check_more_activations();
722
+ EvictionRequestResult::EvictionRequested(attempts)
723
+ } else {
724
+ EvictionRequestResult::EvictionAlreadyRequested(attempts)
725
+ }
726
+ } else {
727
+ debug!(run_id=%info.run_id, "Eviction requested for unknown run");
728
+ EvictionRequestResult::NotFound
729
+ }
730
+ }
731
+
732
+ fn request_eviction_of_lru_run(&mut self) -> EvictionRequestResult {
733
+ if let Some(lru_run_id) = self.runs.current_lru_run() {
734
+ let run_id = lru_run_id.to_string();
735
+ self.request_eviction(RequestEvictMsg {
736
+ run_id,
737
+ message: "Workflow cache full".to_string(),
738
+ reason: EvictionReason::CacheFull,
739
+ })
740
+ } else {
741
+ // This branch shouldn't really be possible
742
+ EvictionRequestResult::NotFound
743
+ }
744
+ }
745
+
746
+ /// Evict a workflow from the cache by its run id. Any existing pending activations will be
747
+ /// destroyed, and any outstanding activations invalidated.
748
+ fn evict_run(&mut self, run_id: &str) {
749
+ debug!(run_id=%run_id, "Evicting run");
750
+
751
+ let mut did_take_buff = false;
752
+ // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
753
+ // there was one.
754
+ if let Some(mut rh) = self.runs.remove(run_id) {
755
+ rh.handle.abort();
756
+
757
+ if let Some(buff) = rh.buffered_resp.take() {
758
+ self.instantiate_or_update(buff);
759
+ did_take_buff = true;
760
+ }
761
+ }
762
+
763
+ if !did_take_buff {
764
+ // If there wasn't a buffered poll, there might be one for a different run which needs
765
+ // a free cache slot, and now there is.
766
+ if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
767
+ self.instantiate_or_update(buff);
768
+ }
769
+ }
770
+ }
771
+
772
+ fn complete_wft(
773
+ &mut self,
774
+ run_id: &str,
775
+ reported_wft_to_server: bool,
776
+ ) -> Option<OutstandingTask> {
777
+ // If the WFT completion wasn't sent to the server, but we did see the final event, we still
778
+ // want to clear the workflow task. This can really only happen in replay testing, where we
779
+ // will generate poll responses with complete history but no attached query, and such a WFT
780
+ // would never really exist. The server wouldn't send a workflow task with nothing to do,
781
+ // but they are very useful for testing complete replay.
782
+ let saw_final = self
783
+ .runs
784
+ .get(run_id)
785
+ .map(|r| r.have_seen_terminal_event)
786
+ .unwrap_or_default();
787
+ if !saw_final && !reported_wft_to_server {
788
+ return None;
789
+ }
790
+
791
+ if let Some(rh) = self.runs.get_mut(run_id) {
792
+ // Can't mark the WFT complete if there are pending queries, as doing so would destroy
793
+ // them.
794
+ if rh
795
+ .wft
796
+ .as_ref()
797
+ .map(|wft| !wft.pending_queries.is_empty())
798
+ .unwrap_or_default()
799
+ {
800
+ return None;
801
+ }
802
+
803
+ debug!("Marking WFT completed");
804
+ let retme = rh.wft.take();
805
+ if let Some(ot) = &retme {
806
+ if let Some(m) = self.run_metrics(run_id) {
807
+ m.wf_task_latency(ot.start_time.elapsed());
808
+ }
809
+ }
810
+ retme
811
+ } else {
812
+ None
813
+ }
814
+ }
815
+
816
+ /// Stores some work if there is any outstanding WFT or activation for the run. If there was
817
+ /// not, returns the work back out inside the option.
818
+ fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
819
+ let run_id = &work.wft.workflow_execution.run_id;
820
+ if let Some(mut run) = self.runs.get_mut(run_id) {
821
+ let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
822
+ let has_wft = run.wft.is_some();
823
+ let has_activation = run.activation.is_some();
824
+ if has_wft
825
+ || has_activation
826
+ || about_to_issue_evict
827
+ || run.more_pending_work
828
+ || !run.last_action_acked
829
+ {
830
+ debug!(run_id = %run_id, run = ?run,
831
+ "Got new WFT for a run with outstanding work, buffering it");
832
+ run.buffered_resp = Some(work);
833
+ None
834
+ } else {
835
+ Some(work)
836
+ }
837
+ } else {
838
+ Some(work)
839
+ }
840
+ }
841
+
842
+ fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
843
+ debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
844
+ // If there's already a buffered poll for the run, replace it.
845
+ if let Some(rh) = self
846
+ .buffered_polls_need_cache_slot
847
+ .iter_mut()
848
+ .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
849
+ {
850
+ *rh = work;
851
+ } else {
852
+ // Otherwise push it to the back
853
+ self.buffered_polls_need_cache_slot.push_back(work);
854
+ }
855
+ }
856
+
857
+ /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
858
+ /// waiting on a cache slot
859
+ fn reconcile_buffered(&mut self) {
860
+ // We must ensure that there are at least as many pending evictions as there are tasks
861
+ // that we might need to un-buffer (skipping runs which already have buffered tasks for
862
+ // themselves)
863
+ let num_in_buff = self.buffered_polls_need_cache_slot.len();
864
+ let mut evict_these = vec![];
865
+ let num_existing_evictions = self
866
+ .runs
867
+ .runs_lru_order()
868
+ .filter(|(_, h)| h.trying_to_evict.is_some())
869
+ .count();
870
+ let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
871
+ for (rid, handle) in self.runs.runs_lru_order() {
872
+ if num_evicts_needed == 0 {
873
+ break;
874
+ }
875
+ if handle.buffered_resp.is_none() {
876
+ num_evicts_needed -= 1;
877
+ evict_these.push(rid.to_string());
878
+ }
879
+ }
880
+ for run_id in evict_these {
881
+ self.request_eviction(RequestEvictMsg {
882
+ run_id,
883
+ message: "Workflow cache full".to_string(),
884
+ reason: EvictionReason::CacheFull,
885
+ });
886
+ }
887
+ }
888
+
889
+ fn reply_to_complete(
890
+ &self,
891
+ run_id: &str,
892
+ outcome: ActivationCompleteOutcome,
893
+ chan: oneshot::Sender<ActivationCompleteResult>,
894
+ ) {
895
+ let most_recently_processed_event = self
896
+ .runs
897
+ .peek(run_id)
898
+ .map(|rh| rh.most_recently_processed_event_number)
899
+ .unwrap_or_default();
900
+ chan.send(ActivationCompleteResult {
901
+ most_recently_processed_event,
902
+ outcome,
903
+ })
904
+ .expect("Rcv half of activation reply not dropped");
905
+ }
906
+
907
+ fn shutdown_done(&self) -> bool {
908
+ let all_runs_ready = self
909
+ .runs
910
+ .handles()
911
+ .all(|r| !r.has_any_pending_work(self.ignore_evicts_on_shutdown, false));
912
+ if self.shutdown_token.is_cancelled() && all_runs_ready {
913
+ info!("Workflow shutdown is done");
914
+ true
915
+ } else {
916
+ false
917
+ }
918
+ }
919
+
920
+ fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
921
+ self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
922
+ }
923
+
924
+ fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
925
+ self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
926
+ }
927
+
928
+ fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
929
+ self.runs.get(run_id).map(|r| &r.metrics)
930
+ }
931
+
932
+ fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
933
+ self.runs
934
+ .get(run_id)
935
+ .and_then(|rh| rh.activation)
936
+ .map(OutstandingActivation::has_only_eviction)
937
+ .unwrap_or_default()
938
+ }
939
+
940
+ fn activation_has_eviction(&mut self, run_id: &str) -> bool {
941
+ self.runs
942
+ .get(run_id)
943
+ .and_then(|rh| rh.activation)
944
+ .map(OutstandingActivation::has_eviction)
945
+ .unwrap_or_default()
946
+ }
947
+
948
+ fn outstanding_wfts(&self) -> usize {
949
+ self.runs.handles().filter(|r| r.wft.is_some()).count()
950
+ }
951
+
952
+ // Useful when debugging
953
+ #[allow(dead_code)]
954
+ fn info_dump(&self, run_id: &str) {
955
+ if let Some(r) = self.runs.peek(run_id) {
956
+ info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
957
+ trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
958
+ last_action_acked=r.last_action_acked);
959
+ } else {
960
+ info!(run_id, "Run not found");
961
+ }
962
+ }
963
+ }
964
+
965
+ /// Drains pending queries from the workflow task and appends them to the activation's jobs
966
+ fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
967
+ // Nothing to do if there are no pending queries
968
+ if wft.pending_queries.is_empty() {
969
+ return;
970
+ }
971
+
972
+ let has_legacy = wft.has_pending_legacy_query();
973
+ // Cannot dispatch legacy query if there are any other jobs - which can happen if, ex, a local
974
+ // activity resolves while we've gotten a legacy query after heartbeating.
975
+ if has_legacy && !act.jobs.is_empty() {
976
+ return;
977
+ }
978
+
979
+ debug!(queries=?wft.pending_queries, "Dispatching queries");
980
+ let query_jobs = wft
981
+ .pending_queries
982
+ .drain(..)
983
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
984
+ act.jobs.extend(query_jobs);
985
+ }