temporalio 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (317) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +20 -0
  4. data/README.md +130 -0
  5. data/bridge/Cargo.lock +2865 -0
  6. data/bridge/Cargo.toml +26 -0
  7. data/bridge/sdk-core/ARCHITECTURE.md +76 -0
  8. data/bridge/sdk-core/Cargo.lock +2606 -0
  9. data/bridge/sdk-core/Cargo.toml +2 -0
  10. data/bridge/sdk-core/LICENSE.txt +23 -0
  11. data/bridge/sdk-core/README.md +107 -0
  12. data/bridge/sdk-core/arch_docs/diagrams/README.md +10 -0
  13. data/bridge/sdk-core/arch_docs/diagrams/sticky_queues.puml +40 -0
  14. data/bridge/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  15. data/bridge/sdk-core/arch_docs/sticky_queues.md +51 -0
  16. data/bridge/sdk-core/bridge-ffi/Cargo.toml +24 -0
  17. data/bridge/sdk-core/bridge-ffi/LICENSE.txt +23 -0
  18. data/bridge/sdk-core/bridge-ffi/build.rs +25 -0
  19. data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +249 -0
  20. data/bridge/sdk-core/bridge-ffi/src/lib.rs +825 -0
  21. data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +211 -0
  22. data/bridge/sdk-core/client/Cargo.toml +40 -0
  23. data/bridge/sdk-core/client/LICENSE.txt +23 -0
  24. data/bridge/sdk-core/client/src/lib.rs +1294 -0
  25. data/bridge/sdk-core/client/src/metrics.rs +165 -0
  26. data/bridge/sdk-core/client/src/raw.rs +931 -0
  27. data/bridge/sdk-core/client/src/retry.rs +674 -0
  28. data/bridge/sdk-core/client/src/workflow_handle/mod.rs +185 -0
  29. data/bridge/sdk-core/core/Cargo.toml +116 -0
  30. data/bridge/sdk-core/core/LICENSE.txt +23 -0
  31. data/bridge/sdk-core/core/benches/workflow_replay.rs +73 -0
  32. data/bridge/sdk-core/core/src/abstractions.rs +166 -0
  33. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +911 -0
  34. data/bridge/sdk-core/core/src/core_tests/child_workflows.rs +221 -0
  35. data/bridge/sdk-core/core/src/core_tests/determinism.rs +107 -0
  36. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +515 -0
  37. data/bridge/sdk-core/core/src/core_tests/mod.rs +100 -0
  38. data/bridge/sdk-core/core/src/core_tests/queries.rs +736 -0
  39. data/bridge/sdk-core/core/src/core_tests/replay_flag.rs +65 -0
  40. data/bridge/sdk-core/core/src/core_tests/workers.rs +259 -0
  41. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +124 -0
  42. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +2070 -0
  43. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +515 -0
  44. data/bridge/sdk-core/core/src/lib.rs +175 -0
  45. data/bridge/sdk-core/core/src/log_export.rs +62 -0
  46. data/bridge/sdk-core/core/src/pollers/mod.rs +54 -0
  47. data/bridge/sdk-core/core/src/pollers/poll_buffer.rs +297 -0
  48. data/bridge/sdk-core/core/src/protosext/mod.rs +428 -0
  49. data/bridge/sdk-core/core/src/replay/mod.rs +71 -0
  50. data/bridge/sdk-core/core/src/retry_logic.rs +202 -0
  51. data/bridge/sdk-core/core/src/telemetry/metrics.rs +383 -0
  52. data/bridge/sdk-core/core/src/telemetry/mod.rs +412 -0
  53. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +77 -0
  54. data/bridge/sdk-core/core/src/test_help/mod.rs +875 -0
  55. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +580 -0
  56. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +1042 -0
  57. data/bridge/sdk-core/core/src/worker/activities.rs +464 -0
  58. data/bridge/sdk-core/core/src/worker/client/mocks.rs +87 -0
  59. data/bridge/sdk-core/core/src/worker/client.rs +347 -0
  60. data/bridge/sdk-core/core/src/worker/mod.rs +566 -0
  61. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +37 -0
  62. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +110 -0
  63. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +458 -0
  64. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +911 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +298 -0
  66. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +171 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +860 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +140 -0
  69. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +161 -0
  70. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +133 -0
  71. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +1448 -0
  72. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +342 -0
  73. data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +127 -0
  74. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +712 -0
  75. data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +71 -0
  76. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +443 -0
  77. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +439 -0
  78. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +169 -0
  79. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +246 -0
  80. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +96 -0
  81. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +1184 -0
  82. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +277 -0
  83. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  84. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +647 -0
  85. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +1143 -0
  86. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +145 -0
  87. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  88. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +940 -0
  89. data/bridge/sdk-core/core-api/Cargo.toml +31 -0
  90. data/bridge/sdk-core/core-api/LICENSE.txt +23 -0
  91. data/bridge/sdk-core/core-api/src/errors.rs +95 -0
  92. data/bridge/sdk-core/core-api/src/lib.rs +151 -0
  93. data/bridge/sdk-core/core-api/src/worker.rs +135 -0
  94. data/bridge/sdk-core/etc/deps.svg +187 -0
  95. data/bridge/sdk-core/etc/dynamic-config.yaml +2 -0
  96. data/bridge/sdk-core/etc/otel-collector-config.yaml +36 -0
  97. data/bridge/sdk-core/etc/prometheus.yaml +6 -0
  98. data/bridge/sdk-core/fsm/Cargo.toml +18 -0
  99. data/bridge/sdk-core/fsm/LICENSE.txt +23 -0
  100. data/bridge/sdk-core/fsm/README.md +3 -0
  101. data/bridge/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +27 -0
  102. data/bridge/sdk-core/fsm/rustfsm_procmacro/LICENSE.txt +23 -0
  103. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +647 -0
  104. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/progress.rs +8 -0
  105. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.rs +18 -0
  106. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.stderr +12 -0
  107. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dynamic_dest_pass.rs +41 -0
  108. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.rs +14 -0
  109. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.stderr +11 -0
  110. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_arg_pass.rs +32 -0
  111. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_pass.rs +31 -0
  112. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/medium_complex_pass.rs +46 -0
  113. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.rs +29 -0
  114. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +12 -0
  115. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/simple_pass.rs +32 -0
  116. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.rs +18 -0
  117. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.stderr +5 -0
  118. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.rs +11 -0
  119. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.stderr +5 -0
  120. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.rs +11 -0
  121. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.stderr +5 -0
  122. data/bridge/sdk-core/fsm/rustfsm_trait/Cargo.toml +14 -0
  123. data/bridge/sdk-core/fsm/rustfsm_trait/LICENSE.txt +23 -0
  124. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +249 -0
  125. data/bridge/sdk-core/fsm/src/lib.rs +2 -0
  126. data/bridge/sdk-core/histories/fail_wf_task.bin +0 -0
  127. data/bridge/sdk-core/histories/timer_workflow_history.bin +0 -0
  128. data/bridge/sdk-core/integ-with-otel.sh +7 -0
  129. data/bridge/sdk-core/protos/api_upstream/README.md +9 -0
  130. data/bridge/sdk-core/protos/api_upstream/api-linter.yaml +40 -0
  131. data/bridge/sdk-core/protos/api_upstream/buf.yaml +12 -0
  132. data/bridge/sdk-core/protos/api_upstream/dependencies/gogoproto/gogo.proto +141 -0
  133. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +86 -0
  134. data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  135. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +259 -0
  136. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +112 -0
  137. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +46 -0
  138. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  139. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +57 -0
  140. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +55 -0
  141. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +168 -0
  142. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +97 -0
  143. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +51 -0
  144. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +50 -0
  145. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +41 -0
  146. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  147. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +59 -0
  148. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +51 -0
  149. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +122 -0
  150. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +108 -0
  151. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +114 -0
  152. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +56 -0
  153. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +751 -0
  154. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +97 -0
  155. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +161 -0
  156. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +99 -0
  157. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +61 -0
  158. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +55 -0
  159. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  160. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +108 -0
  161. data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +46 -0
  162. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +59 -0
  163. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +145 -0
  164. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +1124 -0
  165. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +401 -0
  166. data/bridge/sdk-core/protos/grpc/health/v1/health.proto +63 -0
  167. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  168. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +79 -0
  169. data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +210 -0
  170. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +77 -0
  171. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +15 -0
  172. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +30 -0
  173. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  174. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +261 -0
  175. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +297 -0
  176. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +29 -0
  177. data/bridge/sdk-core/protos/testsrv_upstream/api-linter.yaml +38 -0
  178. data/bridge/sdk-core/protos/testsrv_upstream/buf.yaml +13 -0
  179. data/bridge/sdk-core/protos/testsrv_upstream/dependencies/gogoproto/gogo.proto +141 -0
  180. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +63 -0
  181. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +90 -0
  182. data/bridge/sdk-core/rustfmt.toml +1 -0
  183. data/bridge/sdk-core/sdk/Cargo.toml +47 -0
  184. data/bridge/sdk-core/sdk/LICENSE.txt +23 -0
  185. data/bridge/sdk-core/sdk/src/activity_context.rs +230 -0
  186. data/bridge/sdk-core/sdk/src/app_data.rs +37 -0
  187. data/bridge/sdk-core/sdk/src/conversions.rs +8 -0
  188. data/bridge/sdk-core/sdk/src/interceptors.rs +17 -0
  189. data/bridge/sdk-core/sdk/src/lib.rs +792 -0
  190. data/bridge/sdk-core/sdk/src/payload_converter.rs +11 -0
  191. data/bridge/sdk-core/sdk/src/workflow_context/options.rs +295 -0
  192. data/bridge/sdk-core/sdk/src/workflow_context.rs +683 -0
  193. data/bridge/sdk-core/sdk/src/workflow_future.rs +503 -0
  194. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +30 -0
  195. data/bridge/sdk-core/sdk-core-protos/LICENSE.txt +23 -0
  196. data/bridge/sdk-core/sdk-core-protos/build.rs +108 -0
  197. data/bridge/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  198. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +497 -0
  199. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +230 -0
  200. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +1910 -0
  201. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  202. data/bridge/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  203. data/bridge/sdk-core/test-utils/Cargo.toml +35 -0
  204. data/bridge/sdk-core/test-utils/src/canned_histories.rs +1579 -0
  205. data/bridge/sdk-core/test-utils/src/histfetch.rs +28 -0
  206. data/bridge/sdk-core/test-utils/src/lib.rs +598 -0
  207. data/bridge/sdk-core/tests/integ_tests/client_tests.rs +36 -0
  208. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +128 -0
  209. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +218 -0
  210. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +146 -0
  211. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +437 -0
  212. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +93 -0
  213. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +878 -0
  214. data/bridge/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  215. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +59 -0
  216. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +58 -0
  217. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +50 -0
  218. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +60 -0
  219. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +54 -0
  220. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +634 -0
  221. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +113 -0
  222. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +137 -0
  223. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +93 -0
  224. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +167 -0
  225. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +99 -0
  226. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +131 -0
  227. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +75 -0
  228. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +587 -0
  229. data/bridge/sdk-core/tests/load_tests.rs +191 -0
  230. data/bridge/sdk-core/tests/main.rs +111 -0
  231. data/bridge/sdk-core/tests/runner.rs +93 -0
  232. data/bridge/src/connection.rs +167 -0
  233. data/bridge/src/lib.rs +180 -0
  234. data/bridge/src/runtime.rs +47 -0
  235. data/bridge/src/worker.rs +73 -0
  236. data/ext/Rakefile +9 -0
  237. data/lib/bridge.so +0 -0
  238. data/lib/gen/dependencies/gogoproto/gogo_pb.rb +14 -0
  239. data/lib/gen/temporal/api/batch/v1/message_pb.rb +48 -0
  240. data/lib/gen/temporal/api/cluster/v1/message_pb.rb +67 -0
  241. data/lib/gen/temporal/api/command/v1/message_pb.rb +166 -0
  242. data/lib/gen/temporal/api/common/v1/message_pb.rb +69 -0
  243. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +32 -0
  244. data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +26 -0
  245. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +37 -0
  246. data/lib/gen/temporal/api/enums/v1/common_pb.rb +41 -0
  247. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +67 -0
  248. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +71 -0
  249. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +37 -0
  250. data/lib/gen/temporal/api/enums/v1/query_pb.rb +31 -0
  251. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +24 -0
  252. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +28 -0
  253. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +30 -0
  254. data/lib/gen/temporal/api/enums/v1/update_pb.rb +28 -0
  255. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +89 -0
  256. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +84 -0
  257. data/lib/gen/temporal/api/failure/v1/message_pb.rb +83 -0
  258. data/lib/gen/temporal/api/filter/v1/message_pb.rb +40 -0
  259. data/lib/gen/temporal/api/history/v1/message_pb.rb +489 -0
  260. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +63 -0
  261. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +125 -0
  262. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +20 -0
  263. data/lib/gen/temporal/api/query/v1/message_pb.rb +38 -0
  264. data/lib/gen/temporal/api/replication/v1/message_pb.rb +37 -0
  265. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +128 -0
  266. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +73 -0
  267. data/lib/gen/temporal/api/update/v1/message_pb.rb +26 -0
  268. data/lib/gen/temporal/api/version/v1/message_pb.rb +41 -0
  269. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +110 -0
  270. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +771 -0
  271. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +20 -0
  272. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +58 -0
  273. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +57 -0
  274. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +222 -0
  275. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +57 -0
  276. data/lib/gen/temporal/sdk/core/common/common_pb.rb +22 -0
  277. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +34 -0
  278. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +27 -0
  279. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +164 -0
  280. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +192 -0
  281. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +34 -0
  282. data/lib/temporal/bridge.rb +14 -0
  283. data/lib/temporal/client/implementation.rb +339 -0
  284. data/lib/temporal/client/workflow_handle.rb +243 -0
  285. data/lib/temporal/client.rb +144 -0
  286. data/lib/temporal/connection.rb +736 -0
  287. data/lib/temporal/data_converter.rb +150 -0
  288. data/lib/temporal/error/failure.rb +194 -0
  289. data/lib/temporal/error/workflow_failure.rb +17 -0
  290. data/lib/temporal/errors.rb +22 -0
  291. data/lib/temporal/failure_converter/base.rb +26 -0
  292. data/lib/temporal/failure_converter/basic.rb +313 -0
  293. data/lib/temporal/failure_converter.rb +8 -0
  294. data/lib/temporal/interceptor/chain.rb +27 -0
  295. data/lib/temporal/interceptor/client.rb +102 -0
  296. data/lib/temporal/payload_codec/base.rb +32 -0
  297. data/lib/temporal/payload_converter/base.rb +24 -0
  298. data/lib/temporal/payload_converter/bytes.rb +26 -0
  299. data/lib/temporal/payload_converter/composite.rb +47 -0
  300. data/lib/temporal/payload_converter/encoding_base.rb +35 -0
  301. data/lib/temporal/payload_converter/json.rb +25 -0
  302. data/lib/temporal/payload_converter/nil.rb +25 -0
  303. data/lib/temporal/payload_converter.rb +14 -0
  304. data/lib/temporal/retry_policy.rb +82 -0
  305. data/lib/temporal/retry_state.rb +35 -0
  306. data/lib/temporal/runtime.rb +22 -0
  307. data/lib/temporal/timeout_type.rb +29 -0
  308. data/lib/temporal/version.rb +3 -0
  309. data/lib/temporal/workflow/execution_info.rb +54 -0
  310. data/lib/temporal/workflow/execution_status.rb +36 -0
  311. data/lib/temporal/workflow/id_reuse_policy.rb +36 -0
  312. data/lib/temporal/workflow/query_reject_condition.rb +33 -0
  313. data/lib/temporal.rb +8 -0
  314. data/lib/temporalio.rb +3 -0
  315. data/lib/thermite_patch.rb +23 -0
  316. data/temporalio.gemspec +41 -0
  317. metadata +583 -0
@@ -0,0 +1,940 @@
1
+ use crate::{
2
+ abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
3
+ protosext::ValidPollWFTQResponse,
4
+ telemetry::metrics::workflow_worker_type,
5
+ worker::{
6
+ workflow::{history_update::NextPageToken, run_cache::RunCache, *},
7
+ LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
8
+ },
9
+ MetricsContext,
10
+ };
11
+ use futures::{stream, stream::PollNext, Stream, StreamExt};
12
+ use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
13
+ use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
14
+ use temporal_sdk_core_protos::{
15
+ coresdk::{
16
+ workflow_activation::{
17
+ create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
18
+ workflow_activation_job,
19
+ },
20
+ workflow_completion::Failure,
21
+ },
22
+ temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
23
+ };
24
+ use tokio::sync::{mpsc::unbounded_channel, oneshot};
25
+ use tokio_stream::wrappers::UnboundedReceiverStream;
26
+ use tokio_util::sync::CancellationToken;
27
+ use tracing::{Level, Span};
28
+
29
+ /// This struct holds all the state needed for tracking what workflow runs are currently cached
30
+ /// and how WFTs should be dispatched to them, etc.
31
+ ///
32
+ /// See [WFStream::build] for more
33
+ pub(crate) struct WFStream {
34
+ runs: RunCache,
35
+ /// Buffered polls for new runs which need a cache slot to open up before we can handle them
36
+ buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
37
+
38
+ /// Client for accessing server for history pagination etc.
39
+ client: Arc<dyn WorkerClient>,
40
+
41
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
+ wft_semaphore: MeteredSemaphore,
43
+ shutdown_token: CancellationToken,
44
+
45
+ metrics: MetricsContext,
46
+ }
47
+ /// All possible inputs to the [WFStream]
48
+ #[derive(derive_more::From, Debug)]
49
+ enum WFStreamInput {
50
+ NewWft(PermittedWFT),
51
+ Local(LocalInput),
52
+ /// The stream given to us which represents the poller (or a mock) terminated.
53
+ PollerDead,
54
+ /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
55
+ /// error while polling
56
+ PollerError(tonic::Status),
57
+ }
58
+ impl From<RunUpdateResponse> for WFStreamInput {
59
+ fn from(r: RunUpdateResponse) -> Self {
60
+ WFStreamInput::Local(LocalInput {
61
+ input: LocalInputs::RunUpdateResponse(r.kind),
62
+ span: r.span,
63
+ })
64
+ }
65
+ }
66
+ /// A non-poller-received input to the [WFStream]
67
+ #[derive(derive_more::DebugCustom)]
68
+ #[debug(fmt = "LocalInput {{ {:?} }}", input)]
69
+ pub(super) struct LocalInput {
70
+ pub input: LocalInputs,
71
+ pub span: Span,
72
+ }
73
+ /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
74
+ /// new polls.
75
+ #[derive(Debug, derive_more::From)]
76
+ pub(super) enum LocalInputs {
77
+ Completion(WFActCompleteMsg),
78
+ LocalResolution(LocalResolutionMsg),
79
+ PostActivation(PostActivationMsg),
80
+ RunUpdateResponse(RunUpdateResponseKind),
81
+ RequestEviction(RequestEvictMsg),
82
+ GetStateInfo(GetStateInfoMsg),
83
+ }
84
+ #[derive(Debug, derive_more::From)]
85
+ #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
86
+ enum ExternalPollerInputs {
87
+ NewWft(PermittedWFT),
88
+ PollerDead,
89
+ PollerError(tonic::Status),
90
+ }
91
+ impl From<ExternalPollerInputs> for WFStreamInput {
92
+ fn from(l: ExternalPollerInputs) -> Self {
93
+ match l {
94
+ ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
95
+ ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
96
+ ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
97
+ }
98
+ }
99
+ }
100
+
101
+ impl WFStream {
102
+ /// Constructs workflow state management and returns a stream which outputs activations.
103
+ ///
104
+ /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
105
+ /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
106
+ /// come down.
107
+ /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
108
+ /// completions, local activities finishing, etc. See [LocalInputs].
109
+ ///
110
+ /// These inputs are combined, along with an internal feedback channel for run-specific updates,
111
+ /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
112
+ /// action on those inputs, and then may yield activations.
113
+ ///
114
+ /// Updating runs may need to do async work like fetching additional history. In order to
115
+ /// facilitate this, each run lives in its own task which is communicated with by sending
116
+ /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
117
+ pub(super) fn build(
118
+ basics: WorkflowBasics,
119
+ external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
120
+ local_rx: impl Stream<Item = LocalInput> + Send + 'static,
121
+ client: Arc<dyn WorkerClient>,
122
+ local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
123
+ + Send
124
+ + Sync
125
+ + 'static,
126
+ ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
127
+ let wft_semaphore = MeteredSemaphore::new(
128
+ basics.max_outstanding_wfts,
129
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
130
+ MetricsContext::available_task_slots,
131
+ );
132
+ let wft_sem_clone = wft_semaphore.clone();
133
+ let proceeder = move || {
134
+ let wft_sem_clone = wft_sem_clone.clone();
135
+ async move { wft_sem_clone.acquire_owned().await.unwrap() }
136
+ };
137
+ let poller_wfts = stream_when_allowed(external_wfts, proceeder);
138
+ let (run_update_tx, run_update_rx) = unbounded_channel();
139
+ let local_rx = stream::select(
140
+ local_rx.map(Into::into),
141
+ UnboundedReceiverStream::new(run_update_rx).map(Into::into),
142
+ );
143
+ let all_inputs = stream::select_with_strategy(
144
+ local_rx,
145
+ poller_wfts
146
+ .map(|(wft, permit)| match wft {
147
+ Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
148
+ Err(e) => ExternalPollerInputs::PollerError(e),
149
+ })
150
+ .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
151
+ .map(Into::into)
152
+ .boxed(),
153
+ // Priority always goes to the local stream
154
+ |_: &mut ()| PollNext::Left,
155
+ );
156
+ let mut state = WFStream {
157
+ buffered_polls_need_cache_slot: Default::default(),
158
+ runs: RunCache::new(
159
+ basics.max_cached_workflows,
160
+ basics.namespace.clone(),
161
+ run_update_tx,
162
+ Arc::new(local_activity_request_sink),
163
+ basics.metrics.clone(),
164
+ ),
165
+ client,
166
+ wft_semaphore,
167
+ shutdown_token: basics.shutdown_token,
168
+ metrics: basics.metrics,
169
+ };
170
+ all_inputs
171
+ .map(move |action| {
172
+ let span = span!(Level::DEBUG, "new_stream_input", action=?action);
173
+ let _span_g = span.enter();
174
+
175
+ let maybe_activation = match action {
176
+ WFStreamInput::NewWft(pwft) => {
177
+ debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
178
+ state.instantiate_or_update(pwft);
179
+ None
180
+ }
181
+ WFStreamInput::Local(local_input) => {
182
+ let _span_g = local_input.span.enter();
183
+ match local_input.input {
184
+ LocalInputs::RunUpdateResponse(resp) => {
185
+ state.process_run_update_response(resp)
186
+ }
187
+ LocalInputs::Completion(completion) => {
188
+ state.process_completion(completion);
189
+ None
190
+ }
191
+ LocalInputs::PostActivation(report) => {
192
+ state.process_post_activation(report);
193
+ None
194
+ }
195
+ LocalInputs::LocalResolution(res) => {
196
+ state.local_resolution(res);
197
+ None
198
+ }
199
+ LocalInputs::RequestEviction(evict) => {
200
+ state.request_eviction(evict);
201
+ None
202
+ }
203
+ LocalInputs::GetStateInfo(gsi) => {
204
+ let _ = gsi.response_tx.send(WorkflowStateInfo {
205
+ cached_workflows: state.runs.len(),
206
+ outstanding_wft: state.outstanding_wfts(),
207
+ available_wft_permits: state.wft_semaphore.available_permits(),
208
+ });
209
+ None
210
+ }
211
+ }
212
+ }
213
+ WFStreamInput::PollerDead => {
214
+ debug!("WFT poller died, shutting down");
215
+ state.shutdown_token.cancel();
216
+ None
217
+ }
218
+ WFStreamInput::PollerError(e) => {
219
+ warn!("WFT poller errored, shutting down");
220
+ return Err(PollWfError::TonicError(e));
221
+ }
222
+ };
223
+
224
+ if let Some(ref act) = maybe_activation {
225
+ if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
226
+ run_handle.insert_outstanding_activation(act);
227
+ } else {
228
+ dbg_panic!("Tried to insert activation for missing run!");
229
+ }
230
+ }
231
+ state.reconcile_buffered();
232
+ if state.shutdown_done() {
233
+ return Err(PollWfError::ShutDown);
234
+ }
235
+
236
+ Ok(maybe_activation)
237
+ })
238
+ .filter_map(|o| {
239
+ future::ready(match o {
240
+ Ok(None) => None,
241
+ Ok(Some(v)) => Some(Ok(v)),
242
+ Err(e) => {
243
+ if !matches!(e, PollWfError::ShutDown) {
244
+ error!(
245
+ "Workflow processing encountered fatal error and must shut down {:?}",
246
+ e
247
+ );
248
+ }
249
+ Some(Err(e))
250
+ }
251
+ })
252
+ })
253
+ // Stop the stream once we have shut down
254
+ .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
255
+ }
256
+
257
+ fn process_run_update_response(
258
+ &mut self,
259
+ resp: RunUpdateResponseKind,
260
+ ) -> Option<ActivationOrAuto> {
261
+ debug!(resp=%resp, "Processing run update response from machines");
262
+ match resp {
263
+ RunUpdateResponseKind::Good(mut resp) => {
264
+ let run_handle = self
265
+ .runs
266
+ .get_mut(&resp.run_id)
267
+ .expect("Workflow must exist, it just sent us an update response");
268
+ run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
269
+ run_handle.more_pending_work = resp.more_pending_work;
270
+ run_handle.last_action_acked = true;
271
+ run_handle.most_recently_processed_event_number =
272
+ resp.most_recently_processed_event_number;
273
+
274
+ let r = match resp.outgoing_activation {
275
+ Some(ActivationOrAuto::LangActivation(mut activation)) => {
276
+ if resp.in_response_to_wft {
277
+ let wft = run_handle
278
+ .wft
279
+ .as_mut()
280
+ .expect("WFT must exist for run just updated with one");
281
+ // If there are in-poll queries, insert jobs for those queries into the
282
+ // activation, but only if we hit the cache. If we didn't, those queries
283
+ // will need to be dealt with once replay is over
284
+ if !wft.pending_queries.is_empty() && wft.hit_cache {
285
+ put_queries_in_act(&mut activation, wft);
286
+ }
287
+ }
288
+
289
+ if activation.jobs.is_empty() {
290
+ dbg_panic!("Should not send lang activation with no jobs");
291
+ }
292
+ Some(ActivationOrAuto::LangActivation(activation))
293
+ }
294
+ Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
295
+ if let Some(wft) = run_handle.wft.as_mut() {
296
+ put_queries_in_act(&mut act, wft);
297
+ Some(ActivationOrAuto::LangActivation(act))
298
+ } else {
299
+ dbg_panic!("Ready for queries but no WFT!");
300
+ None
301
+ }
302
+ }
303
+ a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
304
+ None => {
305
+ // If the response indicates there is no activation to send yet but there
306
+ // is more pending work, we should check again.
307
+ if run_handle.more_pending_work {
308
+ run_handle.check_more_activations();
309
+ None
310
+ } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
311
+ // If a run update came back and had nothing to do, but we're trying to
312
+ // evict, just do that now as long as there's no other outstanding work.
313
+ if run_handle.activation.is_none() && !run_handle.more_pending_work {
314
+ let mut evict_act = create_evict_activation(
315
+ resp.run_id,
316
+ reason.message.clone(),
317
+ reason.reason,
318
+ );
319
+ evict_act.history_length =
320
+ run_handle.most_recently_processed_event_number as u32;
321
+ Some(ActivationOrAuto::LangActivation(evict_act))
322
+ } else {
323
+ None
324
+ }
325
+ } else {
326
+ None
327
+ }
328
+ }
329
+ };
330
+ if let Some(f) = resp.fulfillable_complete.take() {
331
+ f.fulfill();
332
+ }
333
+
334
+ // After each run update, check if it's ready to handle any buffered poll
335
+ if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
336
+ && !run_handle.has_any_pending_work(false, true)
337
+ {
338
+ if let Some(bufft) = run_handle.buffered_resp.take() {
339
+ self.instantiate_or_update(bufft);
340
+ }
341
+ }
342
+ r
343
+ }
344
+ RunUpdateResponseKind::Fail(fail) => {
345
+ if let Some(r) = self.runs.get_mut(&fail.run_id) {
346
+ r.last_action_acked = true;
347
+ }
348
+
349
+ if let Some(resp_chan) = fail.completion_resp {
350
+ // Automatically fail the workflow task in the event we couldn't update machines
351
+ let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
352
+ WorkflowTaskFailedCause::NonDeterministicError
353
+ } else {
354
+ WorkflowTaskFailedCause::Unspecified
355
+ };
356
+ let wft_fail_str = format!("{:?}", fail.err);
357
+ self.failed_completion(
358
+ fail.run_id,
359
+ fail_cause,
360
+ fail.err.evict_reason(),
361
+ TFailure::application_failure(wft_fail_str, false).into(),
362
+ resp_chan,
363
+ );
364
+ } else {
365
+ // TODO: This should probably also fail workflow tasks, but that wasn't
366
+ // implemented pre-refactor either.
367
+ warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
368
+ self.request_eviction(RequestEvictMsg {
369
+ run_id: fail.run_id,
370
+ message: format!("Error while updating workflow: {:?}", fail.err),
371
+ reason: fail.err.evict_reason(),
372
+ });
373
+ }
374
+ None
375
+ }
376
+ }
377
+ }
378
+
379
+ #[instrument(level = "debug", skip(self, pwft),
380
+ fields(run_id=%pwft.wft.workflow_execution.run_id))]
381
+ fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
382
+ let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
383
+ (w.wft, w.permit)
384
+ } else {
385
+ return;
386
+ };
387
+
388
+ let run_id = work.workflow_execution.run_id.clone();
389
+ // If our cache is full and this WFT is for an unseen run we must first evict a run before
390
+ // we can deal with this task. So, buffer the task in that case.
391
+ if !self.runs.has_run(&run_id) && self.runs.is_full() {
392
+ self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
393
+ return;
394
+ }
395
+
396
+ let start_event_id = work.history.events.first().map(|e| e.event_id);
397
+ debug!(
398
+ run_id = %run_id,
399
+ task_token = %&work.task_token,
400
+ history_length = %work.history.events.len(),
401
+ start_event_id = ?start_event_id,
402
+ has_legacy_query = %work.legacy_query.is_some(),
403
+ attempt = %work.attempt,
404
+ "Applying new workflow task from server"
405
+ );
406
+
407
+ let wft_info = WorkflowTaskInfo {
408
+ attempt: work.attempt,
409
+ task_token: work.task_token,
410
+ };
411
+ let poll_resp_is_incremental = work
412
+ .history
413
+ .events
414
+ .get(0)
415
+ .map(|ev| ev.event_id > 1)
416
+ .unwrap_or_default();
417
+ let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
418
+
419
+ let mut did_miss_cache = !poll_resp_is_incremental;
420
+
421
+ let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
422
+ debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
423
+ cache. Will fetch history");
424
+ self.metrics.sticky_cache_miss();
425
+ did_miss_cache = true;
426
+ NextPageToken::FetchFromStart
427
+ } else {
428
+ work.next_page_token.into()
429
+ };
430
+ let history_update = HistoryUpdate::new(
431
+ HistoryPaginator::new(
432
+ work.history,
433
+ work.workflow_execution.workflow_id.clone(),
434
+ run_id.clone(),
435
+ page_token,
436
+ self.client.clone(),
437
+ ),
438
+ work.previous_started_event_id,
439
+ );
440
+ let legacy_query_from_poll = work
441
+ .legacy_query
442
+ .take()
443
+ .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
444
+
445
+ let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
446
+ if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
447
+ error!(
448
+ "Server issued both normal and legacy queries. This should not happen. Please \
449
+ file a bug report."
450
+ );
451
+ self.request_eviction(RequestEvictMsg {
452
+ run_id,
453
+ message: "Server issued both normal and legacy query".to_string(),
454
+ reason: EvictionReason::Fatal,
455
+ });
456
+ return;
457
+ }
458
+ if let Some(lq) = legacy_query_from_poll {
459
+ pending_queries.push(lq);
460
+ }
461
+
462
+ let start_time = Instant::now();
463
+ let run_handle = self.runs.instantiate_or_update(
464
+ &run_id,
465
+ &work.workflow_execution.workflow_id,
466
+ &work.workflow_type,
467
+ history_update,
468
+ start_time,
469
+ );
470
+ run_handle.wft = Some(OutstandingTask {
471
+ info: wft_info,
472
+ hit_cache: !did_miss_cache,
473
+ pending_queries,
474
+ start_time,
475
+ permit,
476
+ })
477
+ }
478
+
479
+ #[instrument(level = "debug", skip(self, complete),
480
+ fields(run_id=%complete.completion.run_id()))]
481
+ fn process_completion(&mut self, complete: WFActCompleteMsg) {
482
+ match complete.completion {
483
+ ValidatedCompletion::Success { run_id, commands } => {
484
+ self.successful_completion(run_id, commands, complete.response_tx);
485
+ }
486
+ ValidatedCompletion::Fail { run_id, failure } => {
487
+ self.failed_completion(
488
+ run_id,
489
+ WorkflowTaskFailedCause::Unspecified,
490
+ EvictionReason::LangFail,
491
+ failure,
492
+ complete.response_tx,
493
+ );
494
+ }
495
+ }
496
+ // Always queue evictions after completion when we have a zero-size cache
497
+ if self.runs.cache_capacity() == 0 {
498
+ self.request_eviction_of_lru_run();
499
+ }
500
+ }
501
+
502
+ fn successful_completion(
503
+ &mut self,
504
+ run_id: String,
505
+ mut commands: Vec<WFCommand>,
506
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
507
+ ) {
508
+ let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
509
+ let (task_token, has_pending_query, start_time) =
510
+ if let Some(entry) = self.get_task(&run_id) {
511
+ (
512
+ entry.info.task_token.clone(),
513
+ !entry.pending_queries.is_empty(),
514
+ entry.start_time,
515
+ )
516
+ } else {
517
+ if !activation_was_only_eviction {
518
+ // Not an error if this was an eviction, since it's normal to issue eviction
519
+ // activations without an associated workflow task in that case.
520
+ dbg_panic!(
521
+ "Attempted to complete activation for run {} without associated workflow task",
522
+ run_id
523
+ );
524
+ }
525
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
526
+ return;
527
+ };
528
+
529
+ // If the only command from the activation is a legacy query response, that means we need
530
+ // to respond differently than a typical activation.
531
+ if matches!(&commands.as_slice(),
532
+ &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
533
+ {
534
+ let qr = match commands.remove(0) {
535
+ WFCommand::QueryResponse(qr) => qr,
536
+ _ => unreachable!("We just verified this is the only command"),
537
+ };
538
+ self.reply_to_complete(
539
+ &run_id,
540
+ ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
541
+ task_token,
542
+ action: ActivationAction::RespondLegacyQuery {
543
+ result: Box::new(qr),
544
+ },
545
+ }),
546
+ resp_chan,
547
+ );
548
+ } else {
549
+ // First strip out query responses from other commands that actually affect machines
550
+ // Would be prettier with `drain_filter`
551
+ let mut i = 0;
552
+ let mut query_responses = vec![];
553
+ while i < commands.len() {
554
+ if matches!(commands[i], WFCommand::QueryResponse(_)) {
555
+ if let WFCommand::QueryResponse(qr) = commands.remove(i) {
556
+ query_responses.push(qr);
557
+ }
558
+ } else {
559
+ i += 1;
560
+ }
561
+ }
562
+
563
+ let activation_was_eviction = self.activation_has_eviction(&run_id);
564
+ if let Some(rh) = self.runs.get_mut(&run_id) {
565
+ rh.send_completion(RunActivationCompletion {
566
+ task_token,
567
+ start_time,
568
+ commands,
569
+ activation_was_eviction,
570
+ activation_was_only_eviction,
571
+ has_pending_query,
572
+ query_responses,
573
+ resp_chan: Some(resp_chan),
574
+ });
575
+ } else {
576
+ dbg_panic!("Run {} missing during completion", run_id);
577
+ }
578
+ };
579
+ }
580
+
581
+ fn failed_completion(
582
+ &mut self,
583
+ run_id: String,
584
+ cause: WorkflowTaskFailedCause,
585
+ reason: EvictionReason,
586
+ failure: Failure,
587
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
588
+ ) {
589
+ let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
590
+ tt
591
+ } else {
592
+ dbg_panic!(
593
+ "No workflow task for run id {} found when trying to fail activation",
594
+ run_id
595
+ );
596
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
597
+ return;
598
+ };
599
+
600
+ if let Some(m) = self.run_metrics(&run_id) {
601
+ m.wf_task_failed();
602
+ }
603
+ let message = format!("Workflow activation completion failed: {:?}", &failure);
604
+ // Blow up any cached data associated with the workflow
605
+ let should_report = match self.request_eviction(RequestEvictMsg {
606
+ run_id: run_id.clone(),
607
+ message,
608
+ reason,
609
+ }) {
610
+ EvictionRequestResult::EvictionRequested(Some(attempt))
611
+ | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
612
+ _ => false,
613
+ };
614
+ // If the outstanding WFT is a legacy query task, report that we need to fail it
615
+ let outcome = if self
616
+ .runs
617
+ .get(&run_id)
618
+ .map(|rh| rh.pending_work_is_legacy_query())
619
+ .unwrap_or_default()
620
+ {
621
+ ActivationCompleteOutcome::ReportWFTFail(
622
+ FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
623
+ )
624
+ } else if should_report {
625
+ ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
626
+ tt, cause, failure,
627
+ ))
628
+ } else {
629
+ ActivationCompleteOutcome::DoNothing
630
+ };
631
+ self.reply_to_complete(&run_id, outcome, resp_chan);
632
+ }
633
+
634
+ fn process_post_activation(&mut self, report: PostActivationMsg) {
635
+ let run_id = &report.run_id;
636
+
637
+ // If we reported to server, we always want to mark it complete.
638
+ let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
639
+
640
+ if self
641
+ .get_activation(run_id)
642
+ .map(|a| a.has_eviction())
643
+ .unwrap_or_default()
644
+ {
645
+ self.evict_run(run_id);
646
+ };
647
+
648
+ if let Some(wft) = report.wft_from_complete {
649
+ debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
650
+ if let Some(t) = maybe_t {
651
+ self.instantiate_or_update(PermittedWFT {
652
+ wft,
653
+ permit: t.permit,
654
+ })
655
+ }
656
+ }
657
+
658
+ if let Some(rh) = self.runs.get_mut(run_id) {
659
+ // Delete the activation
660
+ rh.activation.take();
661
+ // Attempt to produce the next activation if needed
662
+ rh.check_more_activations();
663
+ }
664
+ }
665
+
666
+ fn local_resolution(&mut self, msg: LocalResolutionMsg) {
667
+ let run_id = msg.run_id;
668
+ if let Some(rh) = self.runs.get_mut(&run_id) {
669
+ rh.send_local_resolution(msg.res)
670
+ } else {
671
+ // It isn't an explicit error if the machine is missing when a local activity resolves.
672
+ // This can happen if an activity reports a timeout after we stopped caring about it.
673
+ debug!(run_id = %run_id,
674
+ "Tried to resolve a local activity for a run we are no longer tracking");
675
+ }
676
+ }
677
+
678
+ /// Request a workflow eviction. This will (eventually, after replay is done) queue up an
679
+ /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
680
+ /// until lang replies to that activation
681
+ fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
682
+ let activation_has_eviction = self.activation_has_eviction(&info.run_id);
683
+ if let Some(rh) = self.runs.get_mut(&info.run_id) {
684
+ let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
685
+ if !activation_has_eviction && rh.trying_to_evict.is_none() {
686
+ debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
687
+ rh.trying_to_evict = Some(info);
688
+ rh.check_more_activations();
689
+ EvictionRequestResult::EvictionRequested(attempts)
690
+ } else {
691
+ EvictionRequestResult::EvictionAlreadyRequested(attempts)
692
+ }
693
+ } else {
694
+ warn!(run_id=%info.run_id, "Eviction requested for unknown run");
695
+ EvictionRequestResult::NotFound
696
+ }
697
+ }
698
+
699
+ fn request_eviction_of_lru_run(&mut self) -> EvictionRequestResult {
700
+ if let Some(lru_run_id) = self.runs.current_lru_run() {
701
+ let run_id = lru_run_id.to_string();
702
+ self.request_eviction(RequestEvictMsg {
703
+ run_id,
704
+ message: "Workflow cache full".to_string(),
705
+ reason: EvictionReason::CacheFull,
706
+ })
707
+ } else {
708
+ // This branch shouldn't really be possible
709
+ EvictionRequestResult::NotFound
710
+ }
711
+ }
712
+
713
+ /// Evict a workflow from the cache by its run id. Any existing pending activations will be
714
+ /// destroyed, and any outstanding activations invalidated.
715
+ fn evict_run(&mut self, run_id: &str) {
716
+ debug!(run_id=%run_id, "Evicting run");
717
+
718
+ let mut did_take_buff = false;
719
+ // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
720
+ // there was one.
721
+ if let Some(mut rh) = self.runs.remove(run_id) {
722
+ rh.handle.abort();
723
+
724
+ if let Some(buff) = rh.buffered_resp.take() {
725
+ self.instantiate_or_update(buff);
726
+ did_take_buff = true;
727
+ }
728
+ }
729
+
730
+ if !did_take_buff {
731
+ // If there wasn't a buffered poll, there might be one for a different run which needs
732
+ // a free cache slot, and now there is.
733
+ if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
734
+ self.instantiate_or_update(buff);
735
+ }
736
+ }
737
+ }
738
+
739
+ fn complete_wft(
740
+ &mut self,
741
+ run_id: &str,
742
+ reported_wft_to_server: bool,
743
+ ) -> Option<OutstandingTask> {
744
+ // If the WFT completion wasn't sent to the server, but we did see the final event, we still
745
+ // want to clear the workflow task. This can really only happen in replay testing, where we
746
+ // will generate poll responses with complete history but no attached query, and such a WFT
747
+ // would never really exist. The server wouldn't send a workflow task with nothing to do,
748
+ // but they are very useful for testing complete replay.
749
+ let saw_final = self
750
+ .runs
751
+ .get(run_id)
752
+ .map(|r| r.have_seen_terminal_event)
753
+ .unwrap_or_default();
754
+ if !saw_final && !reported_wft_to_server {
755
+ return None;
756
+ }
757
+
758
+ if let Some(rh) = self.runs.get_mut(run_id) {
759
+ // Can't mark the WFT complete if there are pending queries, as doing so would destroy
760
+ // them.
761
+ if rh
762
+ .wft
763
+ .as_ref()
764
+ .map(|wft| !wft.pending_queries.is_empty())
765
+ .unwrap_or_default()
766
+ {
767
+ return None;
768
+ }
769
+
770
+ debug!("Marking WFT completed");
771
+ let retme = rh.wft.take();
772
+ if let Some(ot) = &retme {
773
+ if let Some(m) = self.run_metrics(run_id) {
774
+ m.wf_task_latency(ot.start_time.elapsed());
775
+ }
776
+ }
777
+ retme
778
+ } else {
779
+ None
780
+ }
781
+ }
782
+
783
+ /// Stores some work if there is any outstanding WFT or activation for the run. If there was
784
+ /// not, returns the work back out inside the option.
785
+ fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
786
+ let run_id = &work.wft.workflow_execution.run_id;
787
+ if let Some(mut run) = self.runs.get_mut(run_id) {
788
+ let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
789
+ let has_wft = run.wft.is_some();
790
+ let has_activation = run.activation.is_some();
791
+ if has_wft
792
+ || has_activation
793
+ || about_to_issue_evict
794
+ || run.more_pending_work
795
+ || !run.last_action_acked
796
+ {
797
+ debug!(run_id = %run_id, run = ?run,
798
+ "Got new WFT for a run with outstanding work, buffering it");
799
+ run.buffered_resp = Some(work);
800
+ None
801
+ } else {
802
+ Some(work)
803
+ }
804
+ } else {
805
+ Some(work)
806
+ }
807
+ }
808
+
809
+ fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
810
+ debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
811
+ // If there's already a buffered poll for the run, replace it.
812
+ if let Some(rh) = self
813
+ .buffered_polls_need_cache_slot
814
+ .iter_mut()
815
+ .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
816
+ {
817
+ *rh = work;
818
+ } else {
819
+ // Otherwise push it to the back
820
+ self.buffered_polls_need_cache_slot.push_back(work);
821
+ }
822
+ }
823
+
824
+ /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
825
+ /// waiting on a cache slot
826
+ fn reconcile_buffered(&mut self) {
827
+ // We must ensure that there are at least as many pending evictions as there are tasks
828
+ // that we might need to un-buffer (skipping runs which already have buffered tasks for
829
+ // themselves)
830
+ let num_in_buff = self.buffered_polls_need_cache_slot.len();
831
+ let mut evict_these = vec![];
832
+ let num_existing_evictions = self
833
+ .runs
834
+ .runs_lru_order()
835
+ .filter(|(_, h)| h.trying_to_evict.is_some())
836
+ .count();
837
+ let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
838
+ for (rid, handle) in self.runs.runs_lru_order() {
839
+ if num_evicts_needed == 0 {
840
+ break;
841
+ }
842
+ if handle.buffered_resp.is_none() {
843
+ num_evicts_needed -= 1;
844
+ evict_these.push(rid.to_string());
845
+ }
846
+ }
847
+ for run_id in evict_these {
848
+ self.request_eviction(RequestEvictMsg {
849
+ run_id,
850
+ message: "Workflow cache full".to_string(),
851
+ reason: EvictionReason::CacheFull,
852
+ });
853
+ }
854
+ }
855
+
856
+ fn reply_to_complete(
857
+ &self,
858
+ run_id: &str,
859
+ outcome: ActivationCompleteOutcome,
860
+ chan: oneshot::Sender<ActivationCompleteResult>,
861
+ ) {
862
+ let most_recently_processed_event = self
863
+ .runs
864
+ .peek(run_id)
865
+ .map(|rh| rh.most_recently_processed_event_number)
866
+ .unwrap_or_default();
867
+ chan.send(ActivationCompleteResult {
868
+ most_recently_processed_event,
869
+ outcome,
870
+ })
871
+ .expect("Rcv half of activation reply not dropped");
872
+ }
873
+
874
+ fn shutdown_done(&self) -> bool {
875
+ let all_runs_ready = self
876
+ .runs
877
+ .handles()
878
+ .all(|r| !r.has_any_pending_work(true, false));
879
+ if self.shutdown_token.is_cancelled() && all_runs_ready {
880
+ info!("Workflow shutdown is done");
881
+ true
882
+ } else {
883
+ false
884
+ }
885
+ }
886
+
887
+ fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
888
+ self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
889
+ }
890
+
891
+ fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
892
+ self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
893
+ }
894
+
895
+ fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
896
+ self.runs.get(run_id).map(|r| &r.metrics)
897
+ }
898
+
899
+ fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
900
+ self.runs
901
+ .get(run_id)
902
+ .and_then(|rh| rh.activation)
903
+ .map(OutstandingActivation::has_only_eviction)
904
+ .unwrap_or_default()
905
+ }
906
+
907
+ fn activation_has_eviction(&mut self, run_id: &str) -> bool {
908
+ self.runs
909
+ .get(run_id)
910
+ .and_then(|rh| rh.activation)
911
+ .map(OutstandingActivation::has_eviction)
912
+ .unwrap_or_default()
913
+ }
914
+
915
+ fn outstanding_wfts(&self) -> usize {
916
+ self.runs.handles().filter(|r| r.wft.is_some()).count()
917
+ }
918
+
919
+ // Useful when debugging
920
+ #[allow(dead_code)]
921
+ fn info_dump(&self, run_id: &str) {
922
+ if let Some(r) = self.runs.peek(run_id) {
923
+ info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
924
+ trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
925
+ last_action_acked=r.last_action_acked);
926
+ } else {
927
+ info!(run_id, "Run not found");
928
+ }
929
+ }
930
+ }
931
+
932
+ /// Drains pending queries from the workflow task and appends them to the activation's jobs
933
+ fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
934
+ debug!(queries=?wft.pending_queries, "Dispatching queries");
935
+ let query_jobs = wft
936
+ .pending_queries
937
+ .drain(..)
938
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
939
+ act.jobs.extend(query_jobs);
940
+ }