temporalio 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (316) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +130 -0
  3. data/bridge/Cargo.lock +2865 -0
  4. data/bridge/Cargo.toml +26 -0
  5. data/bridge/sdk-core/ARCHITECTURE.md +76 -0
  6. data/bridge/sdk-core/Cargo.lock +2606 -0
  7. data/bridge/sdk-core/Cargo.toml +2 -0
  8. data/bridge/sdk-core/LICENSE.txt +23 -0
  9. data/bridge/sdk-core/README.md +107 -0
  10. data/bridge/sdk-core/arch_docs/diagrams/README.md +10 -0
  11. data/bridge/sdk-core/arch_docs/diagrams/sticky_queues.puml +40 -0
  12. data/bridge/sdk-core/arch_docs/diagrams/workflow_internals.svg +1 -0
  13. data/bridge/sdk-core/arch_docs/sticky_queues.md +51 -0
  14. data/bridge/sdk-core/bridge-ffi/Cargo.toml +24 -0
  15. data/bridge/sdk-core/bridge-ffi/LICENSE.txt +23 -0
  16. data/bridge/sdk-core/bridge-ffi/build.rs +25 -0
  17. data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +249 -0
  18. data/bridge/sdk-core/bridge-ffi/src/lib.rs +825 -0
  19. data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +211 -0
  20. data/bridge/sdk-core/client/Cargo.toml +40 -0
  21. data/bridge/sdk-core/client/LICENSE.txt +23 -0
  22. data/bridge/sdk-core/client/src/lib.rs +1294 -0
  23. data/bridge/sdk-core/client/src/metrics.rs +165 -0
  24. data/bridge/sdk-core/client/src/raw.rs +931 -0
  25. data/bridge/sdk-core/client/src/retry.rs +674 -0
  26. data/bridge/sdk-core/client/src/workflow_handle/mod.rs +185 -0
  27. data/bridge/sdk-core/core/Cargo.toml +116 -0
  28. data/bridge/sdk-core/core/LICENSE.txt +23 -0
  29. data/bridge/sdk-core/core/benches/workflow_replay.rs +73 -0
  30. data/bridge/sdk-core/core/src/abstractions.rs +166 -0
  31. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +911 -0
  32. data/bridge/sdk-core/core/src/core_tests/child_workflows.rs +221 -0
  33. data/bridge/sdk-core/core/src/core_tests/determinism.rs +107 -0
  34. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +515 -0
  35. data/bridge/sdk-core/core/src/core_tests/mod.rs +100 -0
  36. data/bridge/sdk-core/core/src/core_tests/queries.rs +736 -0
  37. data/bridge/sdk-core/core/src/core_tests/replay_flag.rs +65 -0
  38. data/bridge/sdk-core/core/src/core_tests/workers.rs +259 -0
  39. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +124 -0
  40. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +2070 -0
  41. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +515 -0
  42. data/bridge/sdk-core/core/src/lib.rs +175 -0
  43. data/bridge/sdk-core/core/src/log_export.rs +62 -0
  44. data/bridge/sdk-core/core/src/pollers/mod.rs +54 -0
  45. data/bridge/sdk-core/core/src/pollers/poll_buffer.rs +297 -0
  46. data/bridge/sdk-core/core/src/protosext/mod.rs +428 -0
  47. data/bridge/sdk-core/core/src/replay/mod.rs +71 -0
  48. data/bridge/sdk-core/core/src/retry_logic.rs +202 -0
  49. data/bridge/sdk-core/core/src/telemetry/metrics.rs +383 -0
  50. data/bridge/sdk-core/core/src/telemetry/mod.rs +412 -0
  51. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +77 -0
  52. data/bridge/sdk-core/core/src/test_help/mod.rs +875 -0
  53. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +580 -0
  54. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +1042 -0
  55. data/bridge/sdk-core/core/src/worker/activities.rs +464 -0
  56. data/bridge/sdk-core/core/src/worker/client/mocks.rs +87 -0
  57. data/bridge/sdk-core/core/src/worker/client.rs +347 -0
  58. data/bridge/sdk-core/core/src/worker/mod.rs +566 -0
  59. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +37 -0
  60. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +110 -0
  61. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +458 -0
  62. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +911 -0
  63. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +298 -0
  64. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +171 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +860 -0
  66. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +140 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +161 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +133 -0
  69. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +1448 -0
  70. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +342 -0
  71. data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +127 -0
  72. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +712 -0
  73. data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +71 -0
  74. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +443 -0
  75. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +439 -0
  76. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +169 -0
  77. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +246 -0
  78. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +96 -0
  79. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +1184 -0
  80. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +277 -0
  81. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +198 -0
  82. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +647 -0
  83. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +1143 -0
  84. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +145 -0
  85. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +88 -0
  86. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +940 -0
  87. data/bridge/sdk-core/core-api/Cargo.toml +31 -0
  88. data/bridge/sdk-core/core-api/LICENSE.txt +23 -0
  89. data/bridge/sdk-core/core-api/src/errors.rs +95 -0
  90. data/bridge/sdk-core/core-api/src/lib.rs +151 -0
  91. data/bridge/sdk-core/core-api/src/worker.rs +135 -0
  92. data/bridge/sdk-core/etc/deps.svg +187 -0
  93. data/bridge/sdk-core/etc/dynamic-config.yaml +2 -0
  94. data/bridge/sdk-core/etc/otel-collector-config.yaml +36 -0
  95. data/bridge/sdk-core/etc/prometheus.yaml +6 -0
  96. data/bridge/sdk-core/fsm/Cargo.toml +18 -0
  97. data/bridge/sdk-core/fsm/LICENSE.txt +23 -0
  98. data/bridge/sdk-core/fsm/README.md +3 -0
  99. data/bridge/sdk-core/fsm/rustfsm_procmacro/Cargo.toml +27 -0
  100. data/bridge/sdk-core/fsm/rustfsm_procmacro/LICENSE.txt +23 -0
  101. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +647 -0
  102. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/progress.rs +8 -0
  103. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.rs +18 -0
  104. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dupe_transitions_fail.stderr +12 -0
  105. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/dynamic_dest_pass.rs +41 -0
  106. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.rs +14 -0
  107. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/forgot_name_fail.stderr +11 -0
  108. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_arg_pass.rs +32 -0
  109. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/handler_pass.rs +31 -0
  110. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/medium_complex_pass.rs +46 -0
  111. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.rs +29 -0
  112. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +12 -0
  113. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/simple_pass.rs +32 -0
  114. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.rs +18 -0
  115. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/struct_event_variant_fail.stderr +5 -0
  116. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.rs +11 -0
  117. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_more_item_event_variant_fail.stderr +5 -0
  118. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.rs +11 -0
  119. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/tuple_zero_item_event_variant_fail.stderr +5 -0
  120. data/bridge/sdk-core/fsm/rustfsm_trait/Cargo.toml +14 -0
  121. data/bridge/sdk-core/fsm/rustfsm_trait/LICENSE.txt +23 -0
  122. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +249 -0
  123. data/bridge/sdk-core/fsm/src/lib.rs +2 -0
  124. data/bridge/sdk-core/histories/fail_wf_task.bin +0 -0
  125. data/bridge/sdk-core/histories/timer_workflow_history.bin +0 -0
  126. data/bridge/sdk-core/integ-with-otel.sh +7 -0
  127. data/bridge/sdk-core/protos/api_upstream/README.md +9 -0
  128. data/bridge/sdk-core/protos/api_upstream/api-linter.yaml +40 -0
  129. data/bridge/sdk-core/protos/api_upstream/buf.yaml +12 -0
  130. data/bridge/sdk-core/protos/api_upstream/dependencies/gogoproto/gogo.proto +141 -0
  131. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +86 -0
  132. data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +83 -0
  133. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +259 -0
  134. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +112 -0
  135. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +46 -0
  136. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/cluster.proto +40 -0
  137. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +57 -0
  138. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +55 -0
  139. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +168 -0
  140. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +97 -0
  141. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +51 -0
  142. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +50 -0
  143. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +41 -0
  144. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +60 -0
  145. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +59 -0
  146. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +51 -0
  147. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +122 -0
  148. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +108 -0
  149. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +114 -0
  150. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +56 -0
  151. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +751 -0
  152. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +97 -0
  153. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +161 -0
  154. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +99 -0
  155. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +61 -0
  156. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +55 -0
  157. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +300 -0
  158. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +108 -0
  159. data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +46 -0
  160. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +59 -0
  161. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +145 -0
  162. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +1124 -0
  163. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +401 -0
  164. data/bridge/sdk-core/protos/grpc/health/v1/health.proto +63 -0
  165. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +78 -0
  166. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +79 -0
  167. data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +210 -0
  168. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +77 -0
  169. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +15 -0
  170. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +30 -0
  171. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +30 -0
  172. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +261 -0
  173. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +297 -0
  174. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +29 -0
  175. data/bridge/sdk-core/protos/testsrv_upstream/api-linter.yaml +38 -0
  176. data/bridge/sdk-core/protos/testsrv_upstream/buf.yaml +13 -0
  177. data/bridge/sdk-core/protos/testsrv_upstream/dependencies/gogoproto/gogo.proto +141 -0
  178. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +63 -0
  179. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +90 -0
  180. data/bridge/sdk-core/rustfmt.toml +1 -0
  181. data/bridge/sdk-core/sdk/Cargo.toml +47 -0
  182. data/bridge/sdk-core/sdk/LICENSE.txt +23 -0
  183. data/bridge/sdk-core/sdk/src/activity_context.rs +230 -0
  184. data/bridge/sdk-core/sdk/src/app_data.rs +37 -0
  185. data/bridge/sdk-core/sdk/src/conversions.rs +8 -0
  186. data/bridge/sdk-core/sdk/src/interceptors.rs +17 -0
  187. data/bridge/sdk-core/sdk/src/lib.rs +792 -0
  188. data/bridge/sdk-core/sdk/src/payload_converter.rs +11 -0
  189. data/bridge/sdk-core/sdk/src/workflow_context/options.rs +295 -0
  190. data/bridge/sdk-core/sdk/src/workflow_context.rs +683 -0
  191. data/bridge/sdk-core/sdk/src/workflow_future.rs +503 -0
  192. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +30 -0
  193. data/bridge/sdk-core/sdk-core-protos/LICENSE.txt +23 -0
  194. data/bridge/sdk-core/sdk-core-protos/build.rs +108 -0
  195. data/bridge/sdk-core/sdk-core-protos/src/constants.rs +7 -0
  196. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +497 -0
  197. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +230 -0
  198. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +1910 -0
  199. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +38 -0
  200. data/bridge/sdk-core/sdk-core-protos/src/utilities.rs +14 -0
  201. data/bridge/sdk-core/test-utils/Cargo.toml +35 -0
  202. data/bridge/sdk-core/test-utils/src/canned_histories.rs +1579 -0
  203. data/bridge/sdk-core/test-utils/src/histfetch.rs +28 -0
  204. data/bridge/sdk-core/test-utils/src/lib.rs +598 -0
  205. data/bridge/sdk-core/tests/integ_tests/client_tests.rs +36 -0
  206. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +128 -0
  207. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +218 -0
  208. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +146 -0
  209. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +437 -0
  210. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +93 -0
  211. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +878 -0
  212. data/bridge/sdk-core/tests/integ_tests/workflow_tests/appdata_propagation.rs +61 -0
  213. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +59 -0
  214. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +58 -0
  215. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +50 -0
  216. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +60 -0
  217. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +54 -0
  218. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +634 -0
  219. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +113 -0
  220. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +137 -0
  221. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +93 -0
  222. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +167 -0
  223. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +99 -0
  224. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +131 -0
  225. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +75 -0
  226. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +587 -0
  227. data/bridge/sdk-core/tests/load_tests.rs +191 -0
  228. data/bridge/sdk-core/tests/main.rs +111 -0
  229. data/bridge/sdk-core/tests/runner.rs +93 -0
  230. data/bridge/src/connection.rs +167 -0
  231. data/bridge/src/lib.rs +180 -0
  232. data/bridge/src/runtime.rs +47 -0
  233. data/bridge/src/worker.rs +73 -0
  234. data/ext/Rakefile +9 -0
  235. data/lib/bridge.so +0 -0
  236. data/lib/gen/dependencies/gogoproto/gogo_pb.rb +14 -0
  237. data/lib/gen/temporal/api/batch/v1/message_pb.rb +48 -0
  238. data/lib/gen/temporal/api/cluster/v1/message_pb.rb +67 -0
  239. data/lib/gen/temporal/api/command/v1/message_pb.rb +166 -0
  240. data/lib/gen/temporal/api/common/v1/message_pb.rb +69 -0
  241. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +32 -0
  242. data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +26 -0
  243. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +37 -0
  244. data/lib/gen/temporal/api/enums/v1/common_pb.rb +41 -0
  245. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +67 -0
  246. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +71 -0
  247. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +37 -0
  248. data/lib/gen/temporal/api/enums/v1/query_pb.rb +31 -0
  249. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +24 -0
  250. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +28 -0
  251. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +30 -0
  252. data/lib/gen/temporal/api/enums/v1/update_pb.rb +28 -0
  253. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +89 -0
  254. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +84 -0
  255. data/lib/gen/temporal/api/failure/v1/message_pb.rb +83 -0
  256. data/lib/gen/temporal/api/filter/v1/message_pb.rb +40 -0
  257. data/lib/gen/temporal/api/history/v1/message_pb.rb +489 -0
  258. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +63 -0
  259. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +125 -0
  260. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +20 -0
  261. data/lib/gen/temporal/api/query/v1/message_pb.rb +38 -0
  262. data/lib/gen/temporal/api/replication/v1/message_pb.rb +37 -0
  263. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +128 -0
  264. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +73 -0
  265. data/lib/gen/temporal/api/update/v1/message_pb.rb +26 -0
  266. data/lib/gen/temporal/api/version/v1/message_pb.rb +41 -0
  267. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +110 -0
  268. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +771 -0
  269. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +20 -0
  270. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +58 -0
  271. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +57 -0
  272. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +222 -0
  273. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +57 -0
  274. data/lib/gen/temporal/sdk/core/common/common_pb.rb +22 -0
  275. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +34 -0
  276. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +27 -0
  277. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +164 -0
  278. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +192 -0
  279. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +34 -0
  280. data/lib/temporal/bridge.rb +14 -0
  281. data/lib/temporal/client/implementation.rb +339 -0
  282. data/lib/temporal/client/workflow_handle.rb +243 -0
  283. data/lib/temporal/client.rb +144 -0
  284. data/lib/temporal/connection.rb +736 -0
  285. data/lib/temporal/data_converter.rb +150 -0
  286. data/lib/temporal/error/failure.rb +194 -0
  287. data/lib/temporal/error/workflow_failure.rb +17 -0
  288. data/lib/temporal/errors.rb +22 -0
  289. data/lib/temporal/failure_converter/base.rb +26 -0
  290. data/lib/temporal/failure_converter/basic.rb +313 -0
  291. data/lib/temporal/failure_converter.rb +8 -0
  292. data/lib/temporal/interceptor/chain.rb +27 -0
  293. data/lib/temporal/interceptor/client.rb +102 -0
  294. data/lib/temporal/payload_codec/base.rb +32 -0
  295. data/lib/temporal/payload_converter/base.rb +24 -0
  296. data/lib/temporal/payload_converter/bytes.rb +26 -0
  297. data/lib/temporal/payload_converter/composite.rb +47 -0
  298. data/lib/temporal/payload_converter/encoding_base.rb +35 -0
  299. data/lib/temporal/payload_converter/json.rb +25 -0
  300. data/lib/temporal/payload_converter/nil.rb +25 -0
  301. data/lib/temporal/payload_converter.rb +14 -0
  302. data/lib/temporal/retry_policy.rb +82 -0
  303. data/lib/temporal/retry_state.rb +35 -0
  304. data/lib/temporal/runtime.rb +22 -0
  305. data/lib/temporal/timeout_type.rb +29 -0
  306. data/lib/temporal/version.rb +1 -1
  307. data/lib/temporal/workflow/execution_info.rb +54 -0
  308. data/lib/temporal/workflow/execution_status.rb +36 -0
  309. data/lib/temporal/workflow/id_reuse_policy.rb +36 -0
  310. data/lib/temporal/workflow/query_reject_condition.rb +33 -0
  311. data/lib/temporal.rb +4 -0
  312. data/lib/temporalio.rb +3 -1
  313. data/lib/thermite_patch.rb +23 -0
  314. data/temporalio.gemspec +41 -0
  315. metadata +543 -9
  316. data/temporal.gemspec +0 -20
@@ -0,0 +1,940 @@
1
+ use crate::{
2
+ abstractions::{dbg_panic, stream_when_allowed, MeteredSemaphore},
3
+ protosext::ValidPollWFTQResponse,
4
+ telemetry::metrics::workflow_worker_type,
5
+ worker::{
6
+ workflow::{history_update::NextPageToken, run_cache::RunCache, *},
7
+ LocalActRequest, LocalActivityResolution, LEGACY_QUERY_ID,
8
+ },
9
+ MetricsContext,
10
+ };
11
+ use futures::{stream, stream::PollNext, Stream, StreamExt};
12
+ use std::{collections::VecDeque, fmt::Debug, future, sync::Arc, time::Instant};
13
+ use temporal_sdk_core_api::errors::{PollWfError, WFMachinesError};
14
+ use temporal_sdk_core_protos::{
15
+ coresdk::{
16
+ workflow_activation::{
17
+ create_evict_activation, query_to_job, remove_from_cache::EvictionReason,
18
+ workflow_activation_job,
19
+ },
20
+ workflow_completion::Failure,
21
+ },
22
+ temporal::api::{enums::v1::WorkflowTaskFailedCause, failure::v1::Failure as TFailure},
23
+ };
24
+ use tokio::sync::{mpsc::unbounded_channel, oneshot};
25
+ use tokio_stream::wrappers::UnboundedReceiverStream;
26
+ use tokio_util::sync::CancellationToken;
27
+ use tracing::{Level, Span};
28
+
29
+ /// This struct holds all the state needed for tracking what workflow runs are currently cached
30
+ /// and how WFTs should be dispatched to them, etc.
31
+ ///
32
+ /// See [WFStream::build] for more
33
+ pub(crate) struct WFStream {
34
+ runs: RunCache,
35
+ /// Buffered polls for new runs which need a cache slot to open up before we can handle them
36
+ buffered_polls_need_cache_slot: VecDeque<PermittedWFT>,
37
+
38
+ /// Client for accessing server for history pagination etc.
39
+ client: Arc<dyn WorkerClient>,
40
+
41
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
42
+ wft_semaphore: MeteredSemaphore,
43
+ shutdown_token: CancellationToken,
44
+
45
+ metrics: MetricsContext,
46
+ }
47
+ /// All possible inputs to the [WFStream]
48
+ #[derive(derive_more::From, Debug)]
49
+ enum WFStreamInput {
50
+ NewWft(PermittedWFT),
51
+ Local(LocalInput),
52
+ /// The stream given to us which represents the poller (or a mock) terminated.
53
+ PollerDead,
54
+ /// The stream given to us which represents the poller (or a mock) encountered a non-retryable
55
+ /// error while polling
56
+ PollerError(tonic::Status),
57
+ }
58
+ impl From<RunUpdateResponse> for WFStreamInput {
59
+ fn from(r: RunUpdateResponse) -> Self {
60
+ WFStreamInput::Local(LocalInput {
61
+ input: LocalInputs::RunUpdateResponse(r.kind),
62
+ span: r.span,
63
+ })
64
+ }
65
+ }
66
+ /// A non-poller-received input to the [WFStream]
67
+ #[derive(derive_more::DebugCustom)]
68
+ #[debug(fmt = "LocalInput {{ {:?} }}", input)]
69
+ pub(super) struct LocalInput {
70
+ pub input: LocalInputs,
71
+ pub span: Span,
72
+ }
73
+ /// Everything that _isn't_ a poll which may affect workflow state. Always higher priority than
74
+ /// new polls.
75
+ #[derive(Debug, derive_more::From)]
76
+ pub(super) enum LocalInputs {
77
+ Completion(WFActCompleteMsg),
78
+ LocalResolution(LocalResolutionMsg),
79
+ PostActivation(PostActivationMsg),
80
+ RunUpdateResponse(RunUpdateResponseKind),
81
+ RequestEviction(RequestEvictMsg),
82
+ GetStateInfo(GetStateInfoMsg),
83
+ }
84
+ #[derive(Debug, derive_more::From)]
85
+ #[allow(clippy::large_enum_variant)] // PollerDead only ever gets used once, so not important.
86
+ enum ExternalPollerInputs {
87
+ NewWft(PermittedWFT),
88
+ PollerDead,
89
+ PollerError(tonic::Status),
90
+ }
91
+ impl From<ExternalPollerInputs> for WFStreamInput {
92
+ fn from(l: ExternalPollerInputs) -> Self {
93
+ match l {
94
+ ExternalPollerInputs::NewWft(v) => WFStreamInput::NewWft(v),
95
+ ExternalPollerInputs::PollerDead => WFStreamInput::PollerDead,
96
+ ExternalPollerInputs::PollerError(e) => WFStreamInput::PollerError(e),
97
+ }
98
+ }
99
+ }
100
+
101
+ impl WFStream {
102
+ /// Constructs workflow state management and returns a stream which outputs activations.
103
+ ///
104
+ /// * `external_wfts` is a stream of validated poll responses as returned by a poller (or mock)
105
+ /// * `wfts_from_complete` is the recv side of a channel that new WFTs from completions should
106
+ /// come down.
107
+ /// * `local_rx` is a stream of actions that workflow state needs to see. Things like
108
+ /// completions, local activities finishing, etc. See [LocalInputs].
109
+ ///
110
+ /// These inputs are combined, along with an internal feedback channel for run-specific updates,
111
+ /// to form the inputs to a stream of [WFActStreamInput]s. The stream processor then takes
112
+ /// action on those inputs, and then may yield activations.
113
+ ///
114
+ /// Updating runs may need to do async work like fetching additional history. In order to
115
+ /// facilitate this, each run lives in its own task which is communicated with by sending
116
+ /// [RunAction]s and receiving [RunUpdateResponse]s via its [ManagedRunHandle].
117
+ pub(super) fn build(
118
+ basics: WorkflowBasics,
119
+ external_wfts: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
120
+ local_rx: impl Stream<Item = LocalInput> + Send + 'static,
121
+ client: Arc<dyn WorkerClient>,
122
+ local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
123
+ + Send
124
+ + Sync
125
+ + 'static,
126
+ ) -> impl Stream<Item = Result<ActivationOrAuto, PollWfError>> {
127
+ let wft_semaphore = MeteredSemaphore::new(
128
+ basics.max_outstanding_wfts,
129
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
130
+ MetricsContext::available_task_slots,
131
+ );
132
+ let wft_sem_clone = wft_semaphore.clone();
133
+ let proceeder = move || {
134
+ let wft_sem_clone = wft_sem_clone.clone();
135
+ async move { wft_sem_clone.acquire_owned().await.unwrap() }
136
+ };
137
+ let poller_wfts = stream_when_allowed(external_wfts, proceeder);
138
+ let (run_update_tx, run_update_rx) = unbounded_channel();
139
+ let local_rx = stream::select(
140
+ local_rx.map(Into::into),
141
+ UnboundedReceiverStream::new(run_update_rx).map(Into::into),
142
+ );
143
+ let all_inputs = stream::select_with_strategy(
144
+ local_rx,
145
+ poller_wfts
146
+ .map(|(wft, permit)| match wft {
147
+ Ok(wft) => ExternalPollerInputs::NewWft(PermittedWFT { wft, permit }),
148
+ Err(e) => ExternalPollerInputs::PollerError(e),
149
+ })
150
+ .chain(stream::once(async { ExternalPollerInputs::PollerDead }))
151
+ .map(Into::into)
152
+ .boxed(),
153
+ // Priority always goes to the local stream
154
+ |_: &mut ()| PollNext::Left,
155
+ );
156
+ let mut state = WFStream {
157
+ buffered_polls_need_cache_slot: Default::default(),
158
+ runs: RunCache::new(
159
+ basics.max_cached_workflows,
160
+ basics.namespace.clone(),
161
+ run_update_tx,
162
+ Arc::new(local_activity_request_sink),
163
+ basics.metrics.clone(),
164
+ ),
165
+ client,
166
+ wft_semaphore,
167
+ shutdown_token: basics.shutdown_token,
168
+ metrics: basics.metrics,
169
+ };
170
+ all_inputs
171
+ .map(move |action| {
172
+ let span = span!(Level::DEBUG, "new_stream_input", action=?action);
173
+ let _span_g = span.enter();
174
+
175
+ let maybe_activation = match action {
176
+ WFStreamInput::NewWft(pwft) => {
177
+ debug!(run_id=%pwft.wft.workflow_execution.run_id, "New WFT");
178
+ state.instantiate_or_update(pwft);
179
+ None
180
+ }
181
+ WFStreamInput::Local(local_input) => {
182
+ let _span_g = local_input.span.enter();
183
+ match local_input.input {
184
+ LocalInputs::RunUpdateResponse(resp) => {
185
+ state.process_run_update_response(resp)
186
+ }
187
+ LocalInputs::Completion(completion) => {
188
+ state.process_completion(completion);
189
+ None
190
+ }
191
+ LocalInputs::PostActivation(report) => {
192
+ state.process_post_activation(report);
193
+ None
194
+ }
195
+ LocalInputs::LocalResolution(res) => {
196
+ state.local_resolution(res);
197
+ None
198
+ }
199
+ LocalInputs::RequestEviction(evict) => {
200
+ state.request_eviction(evict);
201
+ None
202
+ }
203
+ LocalInputs::GetStateInfo(gsi) => {
204
+ let _ = gsi.response_tx.send(WorkflowStateInfo {
205
+ cached_workflows: state.runs.len(),
206
+ outstanding_wft: state.outstanding_wfts(),
207
+ available_wft_permits: state.wft_semaphore.available_permits(),
208
+ });
209
+ None
210
+ }
211
+ }
212
+ }
213
+ WFStreamInput::PollerDead => {
214
+ debug!("WFT poller died, shutting down");
215
+ state.shutdown_token.cancel();
216
+ None
217
+ }
218
+ WFStreamInput::PollerError(e) => {
219
+ warn!("WFT poller errored, shutting down");
220
+ return Err(PollWfError::TonicError(e));
221
+ }
222
+ };
223
+
224
+ if let Some(ref act) = maybe_activation {
225
+ if let Some(run_handle) = state.runs.get_mut(act.run_id()) {
226
+ run_handle.insert_outstanding_activation(act);
227
+ } else {
228
+ dbg_panic!("Tried to insert activation for missing run!");
229
+ }
230
+ }
231
+ state.reconcile_buffered();
232
+ if state.shutdown_done() {
233
+ return Err(PollWfError::ShutDown);
234
+ }
235
+
236
+ Ok(maybe_activation)
237
+ })
238
+ .filter_map(|o| {
239
+ future::ready(match o {
240
+ Ok(None) => None,
241
+ Ok(Some(v)) => Some(Ok(v)),
242
+ Err(e) => {
243
+ if !matches!(e, PollWfError::ShutDown) {
244
+ error!(
245
+ "Workflow processing encountered fatal error and must shut down {:?}",
246
+ e
247
+ );
248
+ }
249
+ Some(Err(e))
250
+ }
251
+ })
252
+ })
253
+ // Stop the stream once we have shut down
254
+ .take_while(|o| future::ready(!matches!(o, Err(PollWfError::ShutDown))))
255
+ }
256
+
257
+ fn process_run_update_response(
258
+ &mut self,
259
+ resp: RunUpdateResponseKind,
260
+ ) -> Option<ActivationOrAuto> {
261
+ debug!(resp=%resp, "Processing run update response from machines");
262
+ match resp {
263
+ RunUpdateResponseKind::Good(mut resp) => {
264
+ let run_handle = self
265
+ .runs
266
+ .get_mut(&resp.run_id)
267
+ .expect("Workflow must exist, it just sent us an update response");
268
+ run_handle.have_seen_terminal_event = resp.have_seen_terminal_event;
269
+ run_handle.more_pending_work = resp.more_pending_work;
270
+ run_handle.last_action_acked = true;
271
+ run_handle.most_recently_processed_event_number =
272
+ resp.most_recently_processed_event_number;
273
+
274
+ let r = match resp.outgoing_activation {
275
+ Some(ActivationOrAuto::LangActivation(mut activation)) => {
276
+ if resp.in_response_to_wft {
277
+ let wft = run_handle
278
+ .wft
279
+ .as_mut()
280
+ .expect("WFT must exist for run just updated with one");
281
+ // If there are in-poll queries, insert jobs for those queries into the
282
+ // activation, but only if we hit the cache. If we didn't, those queries
283
+ // will need to be dealt with once replay is over
284
+ if !wft.pending_queries.is_empty() && wft.hit_cache {
285
+ put_queries_in_act(&mut activation, wft);
286
+ }
287
+ }
288
+
289
+ if activation.jobs.is_empty() {
290
+ dbg_panic!("Should not send lang activation with no jobs");
291
+ }
292
+ Some(ActivationOrAuto::LangActivation(activation))
293
+ }
294
+ Some(ActivationOrAuto::ReadyForQueries(mut act)) => {
295
+ if let Some(wft) = run_handle.wft.as_mut() {
296
+ put_queries_in_act(&mut act, wft);
297
+ Some(ActivationOrAuto::LangActivation(act))
298
+ } else {
299
+ dbg_panic!("Ready for queries but no WFT!");
300
+ None
301
+ }
302
+ }
303
+ a @ Some(ActivationOrAuto::Autocomplete { .. }) => a,
304
+ None => {
305
+ // If the response indicates there is no activation to send yet but there
306
+ // is more pending work, we should check again.
307
+ if run_handle.more_pending_work {
308
+ run_handle.check_more_activations();
309
+ None
310
+ } else if let Some(reason) = run_handle.trying_to_evict.as_ref() {
311
+ // If a run update came back and had nothing to do, but we're trying to
312
+ // evict, just do that now as long as there's no other outstanding work.
313
+ if run_handle.activation.is_none() && !run_handle.more_pending_work {
314
+ let mut evict_act = create_evict_activation(
315
+ resp.run_id,
316
+ reason.message.clone(),
317
+ reason.reason,
318
+ );
319
+ evict_act.history_length =
320
+ run_handle.most_recently_processed_event_number as u32;
321
+ Some(ActivationOrAuto::LangActivation(evict_act))
322
+ } else {
323
+ None
324
+ }
325
+ } else {
326
+ None
327
+ }
328
+ }
329
+ };
330
+ if let Some(f) = resp.fulfillable_complete.take() {
331
+ f.fulfill();
332
+ }
333
+
334
+ // After each run update, check if it's ready to handle any buffered poll
335
+ if matches!(&r, Some(ActivationOrAuto::Autocomplete { .. }) | None)
336
+ && !run_handle.has_any_pending_work(false, true)
337
+ {
338
+ if let Some(bufft) = run_handle.buffered_resp.take() {
339
+ self.instantiate_or_update(bufft);
340
+ }
341
+ }
342
+ r
343
+ }
344
+ RunUpdateResponseKind::Fail(fail) => {
345
+ if let Some(r) = self.runs.get_mut(&fail.run_id) {
346
+ r.last_action_acked = true;
347
+ }
348
+
349
+ if let Some(resp_chan) = fail.completion_resp {
350
+ // Automatically fail the workflow task in the event we couldn't update machines
351
+ let fail_cause = if matches!(&fail.err, WFMachinesError::Nondeterminism(_)) {
352
+ WorkflowTaskFailedCause::NonDeterministicError
353
+ } else {
354
+ WorkflowTaskFailedCause::Unspecified
355
+ };
356
+ let wft_fail_str = format!("{:?}", fail.err);
357
+ self.failed_completion(
358
+ fail.run_id,
359
+ fail_cause,
360
+ fail.err.evict_reason(),
361
+ TFailure::application_failure(wft_fail_str, false).into(),
362
+ resp_chan,
363
+ );
364
+ } else {
365
+ // TODO: This should probably also fail workflow tasks, but that wasn't
366
+ // implemented pre-refactor either.
367
+ warn!(error=?fail.err, run_id=%fail.run_id, "Error while updating workflow");
368
+ self.request_eviction(RequestEvictMsg {
369
+ run_id: fail.run_id,
370
+ message: format!("Error while updating workflow: {:?}", fail.err),
371
+ reason: fail.err.evict_reason(),
372
+ });
373
+ }
374
+ None
375
+ }
376
+ }
377
+ }
378
+
379
+ #[instrument(level = "debug", skip(self, pwft),
380
+ fields(run_id=%pwft.wft.workflow_execution.run_id))]
381
+ fn instantiate_or_update(&mut self, pwft: PermittedWFT) {
382
+ let (mut work, permit) = if let Some(w) = self.buffer_resp_if_outstanding_work(pwft) {
383
+ (w.wft, w.permit)
384
+ } else {
385
+ return;
386
+ };
387
+
388
+ let run_id = work.workflow_execution.run_id.clone();
389
+ // If our cache is full and this WFT is for an unseen run we must first evict a run before
390
+ // we can deal with this task. So, buffer the task in that case.
391
+ if !self.runs.has_run(&run_id) && self.runs.is_full() {
392
+ self.buffer_resp_on_full_cache(PermittedWFT { wft: work, permit });
393
+ return;
394
+ }
395
+
396
+ let start_event_id = work.history.events.first().map(|e| e.event_id);
397
+ debug!(
398
+ run_id = %run_id,
399
+ task_token = %&work.task_token,
400
+ history_length = %work.history.events.len(),
401
+ start_event_id = ?start_event_id,
402
+ has_legacy_query = %work.legacy_query.is_some(),
403
+ attempt = %work.attempt,
404
+ "Applying new workflow task from server"
405
+ );
406
+
407
+ let wft_info = WorkflowTaskInfo {
408
+ attempt: work.attempt,
409
+ task_token: work.task_token,
410
+ };
411
+ let poll_resp_is_incremental = work
412
+ .history
413
+ .events
414
+ .get(0)
415
+ .map(|ev| ev.event_id > 1)
416
+ .unwrap_or_default();
417
+ let poll_resp_is_incremental = poll_resp_is_incremental || work.history.events.is_empty();
418
+
419
+ let mut did_miss_cache = !poll_resp_is_incremental;
420
+
421
+ let page_token = if !self.runs.has_run(&run_id) && poll_resp_is_incremental {
422
+ debug!(run_id=?run_id, "Workflow task has partial history, but workflow is not in \
423
+ cache. Will fetch history");
424
+ self.metrics.sticky_cache_miss();
425
+ did_miss_cache = true;
426
+ NextPageToken::FetchFromStart
427
+ } else {
428
+ work.next_page_token.into()
429
+ };
430
+ let history_update = HistoryUpdate::new(
431
+ HistoryPaginator::new(
432
+ work.history,
433
+ work.workflow_execution.workflow_id.clone(),
434
+ run_id.clone(),
435
+ page_token,
436
+ self.client.clone(),
437
+ ),
438
+ work.previous_started_event_id,
439
+ );
440
+ let legacy_query_from_poll = work
441
+ .legacy_query
442
+ .take()
443
+ .map(|q| query_to_job(LEGACY_QUERY_ID.to_string(), q));
444
+
445
+ let mut pending_queries = work.query_requests.into_iter().collect::<Vec<_>>();
446
+ if !pending_queries.is_empty() && legacy_query_from_poll.is_some() {
447
+ error!(
448
+ "Server issued both normal and legacy queries. This should not happen. Please \
449
+ file a bug report."
450
+ );
451
+ self.request_eviction(RequestEvictMsg {
452
+ run_id,
453
+ message: "Server issued both normal and legacy query".to_string(),
454
+ reason: EvictionReason::Fatal,
455
+ });
456
+ return;
457
+ }
458
+ if let Some(lq) = legacy_query_from_poll {
459
+ pending_queries.push(lq);
460
+ }
461
+
462
+ let start_time = Instant::now();
463
+ let run_handle = self.runs.instantiate_or_update(
464
+ &run_id,
465
+ &work.workflow_execution.workflow_id,
466
+ &work.workflow_type,
467
+ history_update,
468
+ start_time,
469
+ );
470
+ run_handle.wft = Some(OutstandingTask {
471
+ info: wft_info,
472
+ hit_cache: !did_miss_cache,
473
+ pending_queries,
474
+ start_time,
475
+ permit,
476
+ })
477
+ }
478
+
479
+ #[instrument(level = "debug", skip(self, complete),
480
+ fields(run_id=%complete.completion.run_id()))]
481
+ fn process_completion(&mut self, complete: WFActCompleteMsg) {
482
+ match complete.completion {
483
+ ValidatedCompletion::Success { run_id, commands } => {
484
+ self.successful_completion(run_id, commands, complete.response_tx);
485
+ }
486
+ ValidatedCompletion::Fail { run_id, failure } => {
487
+ self.failed_completion(
488
+ run_id,
489
+ WorkflowTaskFailedCause::Unspecified,
490
+ EvictionReason::LangFail,
491
+ failure,
492
+ complete.response_tx,
493
+ );
494
+ }
495
+ }
496
+ // Always queue evictions after completion when we have a zero-size cache
497
+ if self.runs.cache_capacity() == 0 {
498
+ self.request_eviction_of_lru_run();
499
+ }
500
+ }
501
+
502
+ fn successful_completion(
503
+ &mut self,
504
+ run_id: String,
505
+ mut commands: Vec<WFCommand>,
506
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
507
+ ) {
508
+ let activation_was_only_eviction = self.activation_has_only_eviction(&run_id);
509
+ let (task_token, has_pending_query, start_time) =
510
+ if let Some(entry) = self.get_task(&run_id) {
511
+ (
512
+ entry.info.task_token.clone(),
513
+ !entry.pending_queries.is_empty(),
514
+ entry.start_time,
515
+ )
516
+ } else {
517
+ if !activation_was_only_eviction {
518
+ // Not an error if this was an eviction, since it's normal to issue eviction
519
+ // activations without an associated workflow task in that case.
520
+ dbg_panic!(
521
+ "Attempted to complete activation for run {} without associated workflow task",
522
+ run_id
523
+ );
524
+ }
525
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
526
+ return;
527
+ };
528
+
529
+ // If the only command from the activation is a legacy query response, that means we need
530
+ // to respond differently than a typical activation.
531
+ if matches!(&commands.as_slice(),
532
+ &[WFCommand::QueryResponse(qr)] if qr.query_id == LEGACY_QUERY_ID)
533
+ {
534
+ let qr = match commands.remove(0) {
535
+ WFCommand::QueryResponse(qr) => qr,
536
+ _ => unreachable!("We just verified this is the only command"),
537
+ };
538
+ self.reply_to_complete(
539
+ &run_id,
540
+ ActivationCompleteOutcome::ReportWFTSuccess(ServerCommandsWithWorkflowInfo {
541
+ task_token,
542
+ action: ActivationAction::RespondLegacyQuery {
543
+ result: Box::new(qr),
544
+ },
545
+ }),
546
+ resp_chan,
547
+ );
548
+ } else {
549
+ // First strip out query responses from other commands that actually affect machines
550
+ // Would be prettier with `drain_filter`
551
+ let mut i = 0;
552
+ let mut query_responses = vec![];
553
+ while i < commands.len() {
554
+ if matches!(commands[i], WFCommand::QueryResponse(_)) {
555
+ if let WFCommand::QueryResponse(qr) = commands.remove(i) {
556
+ query_responses.push(qr);
557
+ }
558
+ } else {
559
+ i += 1;
560
+ }
561
+ }
562
+
563
+ let activation_was_eviction = self.activation_has_eviction(&run_id);
564
+ if let Some(rh) = self.runs.get_mut(&run_id) {
565
+ rh.send_completion(RunActivationCompletion {
566
+ task_token,
567
+ start_time,
568
+ commands,
569
+ activation_was_eviction,
570
+ activation_was_only_eviction,
571
+ has_pending_query,
572
+ query_responses,
573
+ resp_chan: Some(resp_chan),
574
+ });
575
+ } else {
576
+ dbg_panic!("Run {} missing during completion", run_id);
577
+ }
578
+ };
579
+ }
580
+
581
+ fn failed_completion(
582
+ &mut self,
583
+ run_id: String,
584
+ cause: WorkflowTaskFailedCause,
585
+ reason: EvictionReason,
586
+ failure: Failure,
587
+ resp_chan: oneshot::Sender<ActivationCompleteResult>,
588
+ ) {
589
+ let tt = if let Some(tt) = self.get_task(&run_id).map(|t| t.info.task_token.clone()) {
590
+ tt
591
+ } else {
592
+ dbg_panic!(
593
+ "No workflow task for run id {} found when trying to fail activation",
594
+ run_id
595
+ );
596
+ self.reply_to_complete(&run_id, ActivationCompleteOutcome::DoNothing, resp_chan);
597
+ return;
598
+ };
599
+
600
+ if let Some(m) = self.run_metrics(&run_id) {
601
+ m.wf_task_failed();
602
+ }
603
+ let message = format!("Workflow activation completion failed: {:?}", &failure);
604
+ // Blow up any cached data associated with the workflow
605
+ let should_report = match self.request_eviction(RequestEvictMsg {
606
+ run_id: run_id.clone(),
607
+ message,
608
+ reason,
609
+ }) {
610
+ EvictionRequestResult::EvictionRequested(Some(attempt))
611
+ | EvictionRequestResult::EvictionAlreadyRequested(Some(attempt)) => attempt <= 1,
612
+ _ => false,
613
+ };
614
+ // If the outstanding WFT is a legacy query task, report that we need to fail it
615
+ let outcome = if self
616
+ .runs
617
+ .get(&run_id)
618
+ .map(|rh| rh.pending_work_is_legacy_query())
619
+ .unwrap_or_default()
620
+ {
621
+ ActivationCompleteOutcome::ReportWFTFail(
622
+ FailedActivationWFTReport::ReportLegacyQueryFailure(tt, failure),
623
+ )
624
+ } else if should_report {
625
+ ActivationCompleteOutcome::ReportWFTFail(FailedActivationWFTReport::Report(
626
+ tt, cause, failure,
627
+ ))
628
+ } else {
629
+ ActivationCompleteOutcome::DoNothing
630
+ };
631
+ self.reply_to_complete(&run_id, outcome, resp_chan);
632
+ }
633
+
634
+ fn process_post_activation(&mut self, report: PostActivationMsg) {
635
+ let run_id = &report.run_id;
636
+
637
+ // If we reported to server, we always want to mark it complete.
638
+ let maybe_t = self.complete_wft(run_id, report.reported_wft_to_server);
639
+
640
+ if self
641
+ .get_activation(run_id)
642
+ .map(|a| a.has_eviction())
643
+ .unwrap_or_default()
644
+ {
645
+ self.evict_run(run_id);
646
+ };
647
+
648
+ if let Some(wft) = report.wft_from_complete {
649
+ debug!(run_id=%wft.workflow_execution.run_id, "New WFT from completion");
650
+ if let Some(t) = maybe_t {
651
+ self.instantiate_or_update(PermittedWFT {
652
+ wft,
653
+ permit: t.permit,
654
+ })
655
+ }
656
+ }
657
+
658
+ if let Some(rh) = self.runs.get_mut(run_id) {
659
+ // Delete the activation
660
+ rh.activation.take();
661
+ // Attempt to produce the next activation if needed
662
+ rh.check_more_activations();
663
+ }
664
+ }
665
+
666
+ fn local_resolution(&mut self, msg: LocalResolutionMsg) {
667
+ let run_id = msg.run_id;
668
+ if let Some(rh) = self.runs.get_mut(&run_id) {
669
+ rh.send_local_resolution(msg.res)
670
+ } else {
671
+ // It isn't an explicit error if the machine is missing when a local activity resolves.
672
+ // This can happen if an activity reports a timeout after we stopped caring about it.
673
+ debug!(run_id = %run_id,
674
+ "Tried to resolve a local activity for a run we are no longer tracking");
675
+ }
676
+ }
677
+
678
+ /// Request a workflow eviction. This will (eventually, after replay is done) queue up an
679
+ /// activation to evict the workflow from the lang side. Workflow will not *actually* be evicted
680
+ /// until lang replies to that activation
681
+ fn request_eviction(&mut self, info: RequestEvictMsg) -> EvictionRequestResult {
682
+ let activation_has_eviction = self.activation_has_eviction(&info.run_id);
683
+ if let Some(rh) = self.runs.get_mut(&info.run_id) {
684
+ let attempts = rh.wft.as_ref().map(|wt| wt.info.attempt);
685
+ if !activation_has_eviction && rh.trying_to_evict.is_none() {
686
+ debug!(run_id=%info.run_id, reason=%info.message, "Eviction requested");
687
+ rh.trying_to_evict = Some(info);
688
+ rh.check_more_activations();
689
+ EvictionRequestResult::EvictionRequested(attempts)
690
+ } else {
691
+ EvictionRequestResult::EvictionAlreadyRequested(attempts)
692
+ }
693
+ } else {
694
+ warn!(run_id=%info.run_id, "Eviction requested for unknown run");
695
+ EvictionRequestResult::NotFound
696
+ }
697
+ }
698
+
699
+ fn request_eviction_of_lru_run(&mut self) -> EvictionRequestResult {
700
+ if let Some(lru_run_id) = self.runs.current_lru_run() {
701
+ let run_id = lru_run_id.to_string();
702
+ self.request_eviction(RequestEvictMsg {
703
+ run_id,
704
+ message: "Workflow cache full".to_string(),
705
+ reason: EvictionReason::CacheFull,
706
+ })
707
+ } else {
708
+ // This branch shouldn't really be possible
709
+ EvictionRequestResult::NotFound
710
+ }
711
+ }
712
+
713
+ /// Evict a workflow from the cache by its run id. Any existing pending activations will be
714
+ /// destroyed, and any outstanding activations invalidated.
715
+ fn evict_run(&mut self, run_id: &str) {
716
+ debug!(run_id=%run_id, "Evicting run");
717
+
718
+ let mut did_take_buff = false;
719
+ // Now it can safely be deleted, it'll get recreated once the un-buffered poll is handled if
720
+ // there was one.
721
+ if let Some(mut rh) = self.runs.remove(run_id) {
722
+ rh.handle.abort();
723
+
724
+ if let Some(buff) = rh.buffered_resp.take() {
725
+ self.instantiate_or_update(buff);
726
+ did_take_buff = true;
727
+ }
728
+ }
729
+
730
+ if !did_take_buff {
731
+ // If there wasn't a buffered poll, there might be one for a different run which needs
732
+ // a free cache slot, and now there is.
733
+ if let Some(buff) = self.buffered_polls_need_cache_slot.pop_front() {
734
+ self.instantiate_or_update(buff);
735
+ }
736
+ }
737
+ }
738
+
739
+ fn complete_wft(
740
+ &mut self,
741
+ run_id: &str,
742
+ reported_wft_to_server: bool,
743
+ ) -> Option<OutstandingTask> {
744
+ // If the WFT completion wasn't sent to the server, but we did see the final event, we still
745
+ // want to clear the workflow task. This can really only happen in replay testing, where we
746
+ // will generate poll responses with complete history but no attached query, and such a WFT
747
+ // would never really exist. The server wouldn't send a workflow task with nothing to do,
748
+ // but they are very useful for testing complete replay.
749
+ let saw_final = self
750
+ .runs
751
+ .get(run_id)
752
+ .map(|r| r.have_seen_terminal_event)
753
+ .unwrap_or_default();
754
+ if !saw_final && !reported_wft_to_server {
755
+ return None;
756
+ }
757
+
758
+ if let Some(rh) = self.runs.get_mut(run_id) {
759
+ // Can't mark the WFT complete if there are pending queries, as doing so would destroy
760
+ // them.
761
+ if rh
762
+ .wft
763
+ .as_ref()
764
+ .map(|wft| !wft.pending_queries.is_empty())
765
+ .unwrap_or_default()
766
+ {
767
+ return None;
768
+ }
769
+
770
+ debug!("Marking WFT completed");
771
+ let retme = rh.wft.take();
772
+ if let Some(ot) = &retme {
773
+ if let Some(m) = self.run_metrics(run_id) {
774
+ m.wf_task_latency(ot.start_time.elapsed());
775
+ }
776
+ }
777
+ retme
778
+ } else {
779
+ None
780
+ }
781
+ }
782
+
783
+ /// Stores some work if there is any outstanding WFT or activation for the run. If there was
784
+ /// not, returns the work back out inside the option.
785
+ fn buffer_resp_if_outstanding_work(&mut self, work: PermittedWFT) -> Option<PermittedWFT> {
786
+ let run_id = &work.wft.workflow_execution.run_id;
787
+ if let Some(mut run) = self.runs.get_mut(run_id) {
788
+ let about_to_issue_evict = run.trying_to_evict.is_some() && !run.last_action_acked;
789
+ let has_wft = run.wft.is_some();
790
+ let has_activation = run.activation.is_some();
791
+ if has_wft
792
+ || has_activation
793
+ || about_to_issue_evict
794
+ || run.more_pending_work
795
+ || !run.last_action_acked
796
+ {
797
+ debug!(run_id = %run_id, run = ?run,
798
+ "Got new WFT for a run with outstanding work, buffering it");
799
+ run.buffered_resp = Some(work);
800
+ None
801
+ } else {
802
+ Some(work)
803
+ }
804
+ } else {
805
+ Some(work)
806
+ }
807
+ }
808
+
809
+ fn buffer_resp_on_full_cache(&mut self, work: PermittedWFT) {
810
+ debug!(run_id=%work.wft.workflow_execution.run_id, "Buffering WFT because cache is full");
811
+ // If there's already a buffered poll for the run, replace it.
812
+ if let Some(rh) = self
813
+ .buffered_polls_need_cache_slot
814
+ .iter_mut()
815
+ .find(|w| w.wft.workflow_execution.run_id == work.wft.workflow_execution.run_id)
816
+ {
817
+ *rh = work;
818
+ } else {
819
+ // Otherwise push it to the back
820
+ self.buffered_polls_need_cache_slot.push_back(work);
821
+ }
822
+ }
823
+
824
+ /// Makes sure we have enough pending evictions to fulfill the needs of buffered WFTs who are
825
+ /// waiting on a cache slot
826
+ fn reconcile_buffered(&mut self) {
827
+ // We must ensure that there are at least as many pending evictions as there are tasks
828
+ // that we might need to un-buffer (skipping runs which already have buffered tasks for
829
+ // themselves)
830
+ let num_in_buff = self.buffered_polls_need_cache_slot.len();
831
+ let mut evict_these = vec![];
832
+ let num_existing_evictions = self
833
+ .runs
834
+ .runs_lru_order()
835
+ .filter(|(_, h)| h.trying_to_evict.is_some())
836
+ .count();
837
+ let mut num_evicts_needed = num_in_buff.saturating_sub(num_existing_evictions);
838
+ for (rid, handle) in self.runs.runs_lru_order() {
839
+ if num_evicts_needed == 0 {
840
+ break;
841
+ }
842
+ if handle.buffered_resp.is_none() {
843
+ num_evicts_needed -= 1;
844
+ evict_these.push(rid.to_string());
845
+ }
846
+ }
847
+ for run_id in evict_these {
848
+ self.request_eviction(RequestEvictMsg {
849
+ run_id,
850
+ message: "Workflow cache full".to_string(),
851
+ reason: EvictionReason::CacheFull,
852
+ });
853
+ }
854
+ }
855
+
856
+ fn reply_to_complete(
857
+ &self,
858
+ run_id: &str,
859
+ outcome: ActivationCompleteOutcome,
860
+ chan: oneshot::Sender<ActivationCompleteResult>,
861
+ ) {
862
+ let most_recently_processed_event = self
863
+ .runs
864
+ .peek(run_id)
865
+ .map(|rh| rh.most_recently_processed_event_number)
866
+ .unwrap_or_default();
867
+ chan.send(ActivationCompleteResult {
868
+ most_recently_processed_event,
869
+ outcome,
870
+ })
871
+ .expect("Rcv half of activation reply not dropped");
872
+ }
873
+
874
+ fn shutdown_done(&self) -> bool {
875
+ let all_runs_ready = self
876
+ .runs
877
+ .handles()
878
+ .all(|r| !r.has_any_pending_work(true, false));
879
+ if self.shutdown_token.is_cancelled() && all_runs_ready {
880
+ info!("Workflow shutdown is done");
881
+ true
882
+ } else {
883
+ false
884
+ }
885
+ }
886
+
887
+ fn get_task(&mut self, run_id: &str) -> Option<&OutstandingTask> {
888
+ self.runs.get(run_id).and_then(|rh| rh.wft.as_ref())
889
+ }
890
+
891
+ fn get_activation(&mut self, run_id: &str) -> Option<&OutstandingActivation> {
892
+ self.runs.get(run_id).and_then(|rh| rh.activation.as_ref())
893
+ }
894
+
895
+ fn run_metrics(&mut self, run_id: &str) -> Option<&MetricsContext> {
896
+ self.runs.get(run_id).map(|r| &r.metrics)
897
+ }
898
+
899
+ fn activation_has_only_eviction(&mut self, run_id: &str) -> bool {
900
+ self.runs
901
+ .get(run_id)
902
+ .and_then(|rh| rh.activation)
903
+ .map(OutstandingActivation::has_only_eviction)
904
+ .unwrap_or_default()
905
+ }
906
+
907
+ fn activation_has_eviction(&mut self, run_id: &str) -> bool {
908
+ self.runs
909
+ .get(run_id)
910
+ .and_then(|rh| rh.activation)
911
+ .map(OutstandingActivation::has_eviction)
912
+ .unwrap_or_default()
913
+ }
914
+
915
+ fn outstanding_wfts(&self) -> usize {
916
+ self.runs.handles().filter(|r| r.wft.is_some()).count()
917
+ }
918
+
919
+ // Useful when debugging
920
+ #[allow(dead_code)]
921
+ fn info_dump(&self, run_id: &str) {
922
+ if let Some(r) = self.runs.peek(run_id) {
923
+ info!(run_id, wft=?r.wft, activation=?r.activation, buffered=r.buffered_resp.is_some(),
924
+ trying_to_evict=r.trying_to_evict.is_some(), more_work=r.more_pending_work,
925
+ last_action_acked=r.last_action_acked);
926
+ } else {
927
+ info!(run_id, "Run not found");
928
+ }
929
+ }
930
+ }
931
+
932
+ /// Drains pending queries from the workflow task and appends them to the activation's jobs
933
+ fn put_queries_in_act(act: &mut WorkflowActivation, wft: &mut OutstandingTask) {
934
+ debug!(queries=?wft.pending_queries, "Dispatching queries");
935
+ let query_jobs = wft
936
+ .pending_queries
937
+ .drain(..)
938
+ .map(|q| workflow_activation_job::Variant::QueryWorkflow(q).into());
939
+ act.jobs.extend(query_jobs);
940
+ }