temporalio 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (310) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +180 -7
  3. data/bridge/Cargo.lock +208 -76
  4. data/bridge/Cargo.toml +5 -2
  5. data/bridge/sdk-core/Cargo.toml +1 -1
  6. data/bridge/sdk-core/README.md +20 -10
  7. data/bridge/sdk-core/client/Cargo.toml +1 -1
  8. data/bridge/sdk-core/client/src/lib.rs +227 -59
  9. data/bridge/sdk-core/client/src/metrics.rs +17 -8
  10. data/bridge/sdk-core/client/src/raw.rs +13 -12
  11. data/bridge/sdk-core/client/src/retry.rs +132 -43
  12. data/bridge/sdk-core/core/Cargo.toml +28 -15
  13. data/bridge/sdk-core/core/benches/workflow_replay.rs +13 -10
  14. data/bridge/sdk-core/core/src/abstractions.rs +225 -36
  15. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +217 -79
  16. data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
  17. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +565 -34
  18. data/bridge/sdk-core/core/src/core_tests/queries.rs +247 -90
  19. data/bridge/sdk-core/core/src/core_tests/workers.rs +3 -5
  20. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  21. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +430 -67
  22. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
  23. data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
  24. data/bridge/sdk-core/core/src/lib.rs +148 -34
  25. data/bridge/sdk-core/core/src/protosext/mod.rs +1 -1
  26. data/bridge/sdk-core/core/src/replay/mod.rs +185 -41
  27. data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
  28. data/bridge/sdk-core/core/src/telemetry/metrics.rs +219 -140
  29. data/bridge/sdk-core/core/src/telemetry/mod.rs +326 -315
  30. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +20 -14
  31. data/bridge/sdk-core/core/src/test_help/mod.rs +85 -21
  32. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
  33. data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  34. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +364 -128
  35. data/bridge/sdk-core/core/src/worker/activities.rs +263 -170
  36. data/bridge/sdk-core/core/src/worker/client/mocks.rs +23 -3
  37. data/bridge/sdk-core/core/src/worker/client.rs +48 -6
  38. data/bridge/sdk-core/core/src/worker/mod.rs +186 -75
  39. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  40. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +13 -24
  41. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +879 -226
  42. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +101 -48
  43. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +8 -12
  44. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -9
  45. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +90 -32
  46. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +6 -9
  47. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -10
  48. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +6 -9
  49. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +160 -83
  50. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +36 -54
  51. data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +179 -0
  52. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +104 -157
  53. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +8 -12
  54. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -13
  55. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +10 -4
  56. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -11
  57. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
  58. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +395 -299
  59. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +12 -20
  60. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +33 -18
  61. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1032 -374
  62. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +525 -392
  63. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
  64. data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +3 -6
  66. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +456 -681
  69. data/bridge/sdk-core/core-api/Cargo.toml +6 -4
  70. data/bridge/sdk-core/core-api/src/errors.rs +1 -34
  71. data/bridge/sdk-core/core-api/src/lib.rs +7 -45
  72. data/bridge/sdk-core/core-api/src/telemetry.rs +141 -0
  73. data/bridge/sdk-core/core-api/src/worker.rs +27 -1
  74. data/bridge/sdk-core/etc/deps.svg +115 -140
  75. data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
  76. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
  77. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
  78. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
  79. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  80. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
  81. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
  82. data/bridge/sdk-core/protos/api_upstream/buf.yaml +0 -3
  83. data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
  84. data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
  85. data/bridge/sdk-core/protos/api_upstream/{temporal/api/enums/v1/cluster.proto → build/tools.go} +7 -18
  86. data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
  87. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +12 -9
  88. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +15 -26
  89. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
  90. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
  91. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +4 -9
  92. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
  93. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +10 -8
  94. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +28 -2
  95. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
  96. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
  97. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
  98. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
  99. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
  100. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
  101. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
  102. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
  103. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  104. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
  105. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +62 -26
  106. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
  107. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +24 -61
  108. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -21
  109. data/bridge/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
  110. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
  111. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
  112. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +110 -31
  113. data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  114. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +4 -4
  115. data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
  116. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
  117. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +3 -2
  118. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +111 -36
  119. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +19 -5
  120. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
  121. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
  122. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
  123. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  124. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  125. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
  126. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +9 -0
  127. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +9 -1
  128. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
  129. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
  130. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
  131. data/bridge/sdk-core/sdk/Cargo.toml +4 -3
  132. data/bridge/sdk-core/sdk/src/interceptors.rs +36 -3
  133. data/bridge/sdk-core/sdk/src/lib.rs +94 -25
  134. data/bridge/sdk-core/sdk/src/workflow_context.rs +13 -2
  135. data/bridge/sdk-core/sdk/src/workflow_future.rs +10 -13
  136. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  137. data/bridge/sdk-core/sdk-core-protos/build.rs +36 -2
  138. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +164 -104
  139. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +27 -23
  140. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +252 -74
  141. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  142. data/bridge/sdk-core/test-utils/Cargo.toml +4 -1
  143. data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
  144. data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
  145. data/bridge/sdk-core/test-utils/src/lib.rs +161 -50
  146. data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  147. data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
  148. data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
  149. data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
  150. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  151. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
  152. data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +239 -0
  153. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -60
  154. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
  155. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
  156. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
  157. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  158. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
  159. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
  160. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
  161. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
  162. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +151 -116
  163. data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +54 -0
  164. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
  165. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +115 -24
  166. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  167. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
  168. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
  169. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
  170. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
  171. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +27 -18
  172. data/bridge/sdk-core/tests/main.rs +8 -16
  173. data/bridge/sdk-core/tests/runner.rs +75 -36
  174. data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
  175. data/bridge/src/connection.rs +117 -82
  176. data/bridge/src/lib.rs +356 -42
  177. data/bridge/src/runtime.rs +10 -3
  178. data/bridge/src/test_server.rs +153 -0
  179. data/bridge/src/worker.rs +133 -9
  180. data/lib/gen/temporal/api/batch/v1/message_pb.rb +8 -6
  181. data/lib/gen/temporal/api/command/v1/message_pb.rb +10 -16
  182. data/lib/gen/temporal/api/common/v1/message_pb.rb +5 -1
  183. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +2 -1
  184. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +3 -3
  185. data/lib/gen/temporal/api/enums/v1/common_pb.rb +2 -1
  186. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +5 -4
  187. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +9 -1
  188. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +1 -1
  189. data/lib/gen/temporal/api/enums/v1/query_pb.rb +1 -1
  190. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +1 -1
  191. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +1 -1
  192. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +1 -1
  193. data/lib/gen/temporal/api/enums/v1/update_pb.rb +7 -10
  194. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +1 -1
  195. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +1 -1
  196. data/lib/gen/temporal/api/failure/v1/message_pb.rb +1 -1
  197. data/lib/gen/temporal/api/filter/v1/message_pb.rb +1 -1
  198. data/lib/gen/temporal/api/history/v1/message_pb.rb +34 -25
  199. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +2 -1
  200. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +14 -51
  201. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +1 -1
  202. data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
  203. data/lib/gen/temporal/api/query/v1/message_pb.rb +1 -1
  204. data/lib/gen/temporal/api/replication/v1/message_pb.rb +1 -1
  205. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +22 -1
  206. data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
  207. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +2 -2
  208. data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
  209. data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
  210. data/lib/gen/temporal/api/update/v1/message_pb.rb +49 -3
  211. data/lib/gen/temporal/api/version/v1/message_pb.rb +1 -1
  212. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +2 -1
  213. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +47 -20
  214. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +1 -1
  215. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
  216. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
  217. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
  218. data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
  219. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
  220. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
  221. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +28 -21
  222. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +32 -24
  223. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
  224. data/lib/temporalio/activity/context.rb +102 -0
  225. data/lib/temporalio/activity/info.rb +67 -0
  226. data/lib/temporalio/activity.rb +85 -0
  227. data/lib/temporalio/bridge/connect_options.rb +15 -0
  228. data/lib/temporalio/bridge/error.rb +8 -0
  229. data/lib/temporalio/bridge/retry_config.rb +24 -0
  230. data/lib/temporalio/bridge/tls_options.rb +19 -0
  231. data/lib/temporalio/bridge.rb +14 -0
  232. data/lib/{temporal → temporalio}/client/implementation.rb +57 -56
  233. data/lib/{temporal → temporalio}/client/workflow_handle.rb +35 -35
  234. data/lib/{temporal → temporalio}/client.rb +19 -32
  235. data/lib/temporalio/connection/retry_config.rb +44 -0
  236. data/lib/temporalio/connection/service.rb +20 -0
  237. data/lib/temporalio/connection/test_service.rb +92 -0
  238. data/lib/temporalio/connection/tls_options.rb +51 -0
  239. data/lib/temporalio/connection/workflow_service.rb +731 -0
  240. data/lib/temporalio/connection.rb +86 -0
  241. data/lib/{temporal → temporalio}/data_converter.rb +76 -35
  242. data/lib/{temporal → temporalio}/error/failure.rb +6 -6
  243. data/lib/{temporal → temporalio}/error/workflow_failure.rb +4 -2
  244. data/lib/{temporal → temporalio}/errors.rb +19 -1
  245. data/lib/{temporal → temporalio}/failure_converter/base.rb +5 -5
  246. data/lib/{temporal → temporalio}/failure_converter/basic.rb +58 -52
  247. data/lib/temporalio/failure_converter.rb +7 -0
  248. data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
  249. data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
  250. data/lib/{temporal → temporalio}/interceptor/chain.rb +7 -6
  251. data/lib/{temporal → temporalio}/interceptor/client.rb +27 -2
  252. data/lib/temporalio/interceptor.rb +22 -0
  253. data/lib/{temporal → temporalio}/payload_codec/base.rb +5 -5
  254. data/lib/{temporal → temporalio}/payload_converter/base.rb +3 -3
  255. data/lib/{temporal → temporalio}/payload_converter/bytes.rb +4 -3
  256. data/lib/{temporal → temporalio}/payload_converter/composite.rb +7 -5
  257. data/lib/{temporal → temporalio}/payload_converter/encoding_base.rb +4 -4
  258. data/lib/{temporal → temporalio}/payload_converter/json.rb +4 -3
  259. data/lib/{temporal → temporalio}/payload_converter/nil.rb +4 -3
  260. data/lib/temporalio/payload_converter.rb +14 -0
  261. data/lib/{temporal → temporalio}/retry_policy.rb +17 -7
  262. data/lib/{temporal → temporalio}/retry_state.rb +1 -1
  263. data/lib/temporalio/runtime.rb +25 -0
  264. data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
  265. data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
  266. data/lib/temporalio/testing/workflow_environment.rb +112 -0
  267. data/lib/temporalio/testing.rb +175 -0
  268. data/lib/{temporal → temporalio}/timeout_type.rb +2 -2
  269. data/lib/temporalio/version.rb +3 -0
  270. data/lib/temporalio/worker/activity_runner.rb +114 -0
  271. data/lib/temporalio/worker/activity_worker.rb +164 -0
  272. data/lib/temporalio/worker/reactor.rb +46 -0
  273. data/lib/temporalio/worker/runner.rb +63 -0
  274. data/lib/temporalio/worker/sync_worker.rb +124 -0
  275. data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
  276. data/lib/temporalio/worker.rb +204 -0
  277. data/lib/temporalio/workflow/async.rb +46 -0
  278. data/lib/{temporal → temporalio}/workflow/execution_info.rb +4 -4
  279. data/lib/{temporal → temporalio}/workflow/execution_status.rb +1 -1
  280. data/lib/temporalio/workflow/future.rb +138 -0
  281. data/lib/{temporal → temporalio}/workflow/id_reuse_policy.rb +6 -6
  282. data/lib/temporalio/workflow/info.rb +76 -0
  283. data/lib/{temporal → temporalio}/workflow/query_reject_condition.rb +5 -5
  284. data/lib/temporalio.rb +12 -3
  285. data/temporalio.gemspec +11 -6
  286. metadata +137 -64
  287. data/bridge/sdk-core/Cargo.lock +0 -2606
  288. data/bridge/sdk-core/bridge-ffi/Cargo.toml +0 -24
  289. data/bridge/sdk-core/bridge-ffi/LICENSE.txt +0 -23
  290. data/bridge/sdk-core/bridge-ffi/build.rs +0 -25
  291. data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -249
  292. data/bridge/sdk-core/bridge-ffi/src/lib.rs +0 -825
  293. data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +0 -211
  294. data/bridge/sdk-core/core/src/log_export.rs +0 -62
  295. data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
  296. data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
  297. data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
  298. data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
  299. data/bridge/sdk-core/sdk/src/conversions.rs +0 -8
  300. data/lib/bridge.so +0 -0
  301. data/lib/gen/temporal/api/cluster/v1/message_pb.rb +0 -67
  302. data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +0 -26
  303. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
  304. data/lib/temporal/bridge.rb +0 -14
  305. data/lib/temporal/connection.rb +0 -736
  306. data/lib/temporal/failure_converter.rb +0 -8
  307. data/lib/temporal/payload_converter.rb +0 -14
  308. data/lib/temporal/runtime.rb +0 -22
  309. data/lib/temporal/version.rb +0 -3
  310. data/lib/temporal.rb +0 -8
@@ -8,47 +8,66 @@ mod history_update;
8
8
  mod machines;
9
9
  mod managed_run;
10
10
  mod run_cache;
11
+ mod wft_extraction;
11
12
  pub(crate) mod wft_poller;
12
13
  mod workflow_stream;
13
14
 
15
+ #[cfg(feature = "save_wf_inputs")]
16
+ pub use workflow_stream::replay_wf_state_inputs;
17
+
14
18
  pub(crate) use bridge::WorkflowBridge;
15
19
  pub(crate) use driven_workflow::{DrivenWorkflow, WorkflowFetcher};
16
- pub(crate) use history_update::{HistoryPaginator, HistoryUpdate};
17
- pub(crate) use machines::WFMachinesError;
20
+ pub(crate) use history_update::HistoryUpdate;
18
21
  #[cfg(test)]
19
22
  pub(crate) use managed_run::ManagedWFFunc;
20
23
 
24
+ use crate::worker::activities::TrackedPermittedTqResp;
21
25
  use crate::{
22
- abstractions::OwnedMeteredSemPermit,
23
- protosext::{legacy_query_failure, ValidPollWFTQResponse, WorkflowActivationExt},
24
- telemetry::VecDisplayer,
26
+ abstractions::{
27
+ stream_when_allowed, MeteredSemaphore, TrackedOwnedMeteredSemPermit, UsedMeteredSemPermit,
28
+ },
29
+ internal_flags::InternalFlags,
30
+ protosext::{legacy_query_failure, ValidPollWFTQResponse},
31
+ telemetry::{
32
+ metrics::workflow_worker_type, set_trace_subscriber_for_current_thread, TelemetryInstance,
33
+ VecDisplayer,
34
+ },
25
35
  worker::{
26
- activities::{ActivitiesFromWFTsHandle, PermittedTqResp},
27
- client::WorkerClient,
36
+ activities::{ActivitiesFromWFTsHandle, LocalActivityManager},
37
+ client::{WorkerClient, WorkflowTaskCompletion},
28
38
  workflow::{
29
- managed_run::{ManagedRun, WorkflowManager},
39
+ history_update::HistoryPaginator,
40
+ managed_run::RunUpdateAct,
41
+ wft_extraction::{HistoryFetchReq, WFTExtractor},
30
42
  wft_poller::validate_wft,
31
43
  workflow_stream::{LocalInput, LocalInputs, WFStream},
32
44
  },
33
- LocalActRequest, LocalActivityResolution,
45
+ LocalActRequest, LocalActivityExecutionResult, LocalActivityResolution,
34
46
  },
35
47
  MetricsContext,
36
48
  };
49
+ use anyhow::anyhow;
37
50
  use futures::{stream::BoxStream, Stream, StreamExt};
51
+ use futures_util::{future::abortable, stream};
52
+ use prost_types::TimestampError;
38
53
  use std::{
39
- fmt::{Debug, Display, Formatter},
54
+ cell::RefCell,
55
+ collections::VecDeque,
56
+ fmt::Debug,
40
57
  future::Future,
41
58
  ops::DerefMut,
59
+ rc::Rc,
42
60
  result,
43
61
  sync::Arc,
62
+ thread,
44
63
  time::{Duration, Instant},
45
64
  };
46
- use temporal_client::WorkflowTaskCompletion;
47
65
  use temporal_sdk_core_api::errors::{CompleteWfError, PollWfError};
48
66
  use temporal_sdk_core_protos::{
49
67
  coresdk::{
50
68
  workflow_activation::{
51
- remove_from_cache::EvictionReason, QueryWorkflow, WorkflowActivation,
69
+ remove_from_cache::EvictionReason, workflow_activation_job, QueryWorkflow,
70
+ WorkflowActivation, WorkflowActivationJob,
52
71
  },
53
72
  workflow_commands::*,
54
73
  workflow_completion,
@@ -58,35 +77,41 @@ use temporal_sdk_core_protos::{
58
77
  },
59
78
  temporal::api::{
60
79
  command::v1::{command::Attributes, Command as ProtoCommand, Command},
61
- common::v1::{Memo, RetryPolicy, SearchAttributes},
80
+ common::v1::{Memo, MeteringMetadata, RetryPolicy, SearchAttributes, WorkflowExecution},
62
81
  enums::v1::WorkflowTaskFailedCause,
82
+ query::v1::WorkflowQuery,
83
+ sdk::v1::WorkflowTaskCompletedMetadata,
63
84
  taskqueue::v1::StickyExecutionAttributes,
64
- workflowservice::v1::PollActivityTaskQueueResponse,
85
+ workflowservice::v1::{get_system_info_response, PollActivityTaskQueueResponse},
65
86
  },
66
87
  TaskToken,
67
88
  };
68
89
  use tokio::{
69
90
  sync::{
70
- mpsc::{unbounded_channel, UnboundedSender},
91
+ mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender},
71
92
  oneshot,
72
93
  },
73
- task,
74
- task::{JoinError, JoinHandle},
94
+ task::{spawn_blocking, LocalSet},
75
95
  };
76
96
  use tokio_stream::wrappers::UnboundedReceiverStream;
77
97
  use tokio_util::sync::CancellationToken;
78
98
  use tracing::Span;
79
99
 
80
100
  pub(crate) const LEGACY_QUERY_ID: &str = "legacy_query";
101
+ /// What percentage of a WFT timeout we are willing to wait before sending a WFT heartbeat when
102
+ /// necessary.
103
+ const WFT_HEARTBEAT_TIMEOUT_FRACTION: f32 = 0.8;
104
+ const MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK: usize = 3;
81
105
 
82
106
  type Result<T, E = WFMachinesError> = result::Result<T, E>;
83
107
  type BoxedActivationStream = BoxStream<'static, Result<ActivationOrAuto, PollWfError>>;
108
+ type InternalFlagsRef = Rc<RefCell<InternalFlags>>;
84
109
 
85
110
  /// Centralizes all state related to workflows and workflow tasks
86
111
  pub(crate) struct Workflows {
87
112
  task_queue: String,
88
113
  local_tx: UnboundedSender<LocalInput>,
89
- processing_task: tokio::sync::Mutex<Option<JoinHandle<()>>>,
114
+ processing_task: tokio::sync::Mutex<Option<thread::JoinHandle<()>>>,
90
115
  activation_stream: tokio::sync::Mutex<(
91
116
  BoxedActivationStream,
92
117
  // Used to indicate polling may begin
@@ -98,62 +123,128 @@ pub(crate) struct Workflows {
98
123
  sticky_attrs: Option<StickyExecutionAttributes>,
99
124
  /// If set, can be used to reserve activity task slots for eager-return of new activity tasks.
100
125
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
126
+ /// Ensures we stay at or below this worker's maximum concurrent workflow task limit
127
+ wft_semaphore: MeteredSemaphore,
128
+ local_act_mgr: Arc<LocalActivityManager>,
101
129
  }
102
130
 
103
- pub(super) struct WorkflowBasics {
131
+ pub(crate) struct WorkflowBasics {
104
132
  pub max_cached_workflows: usize,
105
133
  pub max_outstanding_wfts: usize,
106
134
  pub shutdown_token: CancellationToken,
107
135
  pub metrics: MetricsContext,
108
136
  pub namespace: String,
109
137
  pub task_queue: String,
138
+ pub ignore_evicts_on_shutdown: bool,
139
+ pub fetching_concurrency: usize,
140
+ pub server_capabilities: get_system_info_response::Capabilities,
141
+ #[cfg(feature = "save_wf_inputs")]
142
+ pub wf_state_inputs: Option<UnboundedSender<Vec<u8>>>,
143
+ }
144
+
145
+ pub(crate) struct RunBasics<'a> {
146
+ pub namespace: String,
147
+ pub workflow_id: String,
148
+ pub workflow_type: String,
149
+ pub run_id: String,
150
+ pub history: HistoryUpdate,
151
+ pub metrics: MetricsContext,
152
+ pub capabilities: &'a get_system_info_response::Capabilities,
110
153
  }
111
154
 
112
155
  impl Workflows {
156
+ #[allow(clippy::too_many_arguments)] // Not much worth combining here
113
157
  pub(super) fn new(
114
158
  basics: WorkflowBasics,
115
159
  sticky_attrs: Option<StickyExecutionAttributes>,
116
160
  client: Arc<dyn WorkerClient>,
117
161
  wft_stream: impl Stream<Item = Result<ValidPollWFTQResponse, tonic::Status>> + Send + 'static,
118
- local_activity_request_sink: impl Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution>
119
- + Send
120
- + Sync
121
- + 'static,
162
+ local_activity_request_sink: impl LocalActivityRequestSink,
163
+ local_act_mgr: Arc<LocalActivityManager>,
164
+ heartbeat_timeout_rx: UnboundedReceiver<HeartbeatTimeoutMsg>,
122
165
  activity_tasks_handle: Option<ActivitiesFromWFTsHandle>,
166
+ telem_instance: Option<&TelemetryInstance>,
123
167
  ) -> Self {
124
168
  let (local_tx, local_rx) = unbounded_channel();
169
+ let (fetch_tx, fetch_rx) = unbounded_channel();
125
170
  let shutdown_tok = basics.shutdown_token.clone();
126
171
  let task_queue = basics.task_queue.clone();
127
- let mut stream = WFStream::build(
128
- basics,
172
+ let wft_semaphore = MeteredSemaphore::new(
173
+ basics.max_outstanding_wfts,
174
+ basics.metrics.with_new_attrs([workflow_worker_type()]),
175
+ MetricsContext::available_task_slots,
176
+ );
177
+ // Only allow polling of the new WFT stream if there are available task slots
178
+ let proceeder = stream::unfold(wft_semaphore.clone(), |sem| async move {
179
+ Some((sem.acquire_owned().await.unwrap(), sem))
180
+ });
181
+ let wft_stream = stream_when_allowed(wft_stream, proceeder);
182
+ let extracted_wft_stream = WFTExtractor::build(
183
+ client.clone(),
184
+ basics.fetching_concurrency,
129
185
  wft_stream,
186
+ UnboundedReceiverStream::new(fetch_rx),
187
+ );
188
+ let locals_stream = stream::select(
130
189
  UnboundedReceiverStream::new(local_rx),
131
- client.clone(),
132
- local_activity_request_sink,
190
+ UnboundedReceiverStream::new(heartbeat_timeout_rx).map(Into::into),
133
191
  );
134
192
  let (activation_tx, activation_rx) = unbounded_channel();
135
193
  let (start_polling_tx, start_polling_rx) = oneshot::channel();
136
194
  // We must spawn a task to constantly poll the activation stream, because otherwise
137
195
  // activation completions would not cause anything to happen until the next poll.
138
- let processing_task = task::spawn(async move {
139
- // However, we want to avoid plowing ahead until we've been asked to poll at least once.
140
- // This supports activity-only workers.
141
- let do_poll = tokio::select! {
142
- sp = start_polling_rx => {
143
- sp.is_ok()
196
+ let tracing_sub = telem_instance.map(|ti| ti.trace_subscriber());
197
+ let processing_task = thread::spawn(move || {
198
+ if let Some(ts) = tracing_sub {
199
+ set_trace_subscriber_for_current_thread(ts);
200
+ }
201
+ let rt = tokio::runtime::Builder::new_current_thread()
202
+ .enable_all()
203
+ .thread_name("workflow-processing")
204
+ .build()
205
+ .unwrap();
206
+ let local = LocalSet::new();
207
+ local.block_on(&rt, async move {
208
+ let mut stream = WFStream::build(
209
+ basics,
210
+ extracted_wft_stream,
211
+ locals_stream,
212
+ local_activity_request_sink,
213
+ );
214
+
215
+ // However, we want to avoid plowing ahead until we've been asked to poll at least
216
+ // once. This supports activity-only workers.
217
+ let do_poll = tokio::select! {
218
+ sp = start_polling_rx => {
219
+ sp.is_ok()
220
+ }
221
+ _ = shutdown_tok.cancelled() => {
222
+ false
223
+ }
224
+ };
225
+ if !do_poll {
226
+ return;
144
227
  }
145
- _ = shutdown_tok.cancelled() => {
146
- false
228
+ while let Some(output) = stream.next().await {
229
+ match output {
230
+ Ok(o) => {
231
+ for fetchreq in o.fetch_histories {
232
+ fetch_tx
233
+ .send(fetchreq)
234
+ .expect("Fetch channel must not be dropped");
235
+ }
236
+ for act in o.activations {
237
+ activation_tx
238
+ .send(Ok(act))
239
+ .expect("Activation processor channel not dropped");
240
+ }
241
+ }
242
+ Err(e) => activation_tx
243
+ .send(Err(e))
244
+ .expect("Activation processor channel not dropped"),
245
+ }
147
246
  }
148
- };
149
- if !do_poll {
150
- return;
151
- }
152
- while let Some(act) = stream.next().await {
153
- activation_tx
154
- .send(act)
155
- .expect("Activation processor channel not dropped");
156
- }
247
+ });
157
248
  });
158
249
  Self {
159
250
  task_queue,
@@ -166,12 +257,14 @@ impl Workflows {
166
257
  client,
167
258
  sticky_attrs,
168
259
  activity_tasks_handle,
260
+ wft_semaphore,
261
+ local_act_mgr,
169
262
  }
170
263
  }
171
264
 
172
- pub async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
265
+ pub(super) async fn next_workflow_activation(&self) -> Result<WorkflowActivation, PollWfError> {
173
266
  loop {
174
- let r = {
267
+ let al = {
175
268
  let mut lock = self.activation_stream.lock().await;
176
269
  let (ref mut stream, ref mut beginner) = lock.deref_mut();
177
270
  if let Some(beginner) = beginner.take() {
@@ -179,17 +272,37 @@ impl Workflows {
179
272
  }
180
273
  stream.next().await.unwrap_or(Err(PollWfError::ShutDown))?
181
274
  };
182
- Span::current().record("run_id", &r.run_id());
183
- match r {
275
+ Span::current().record("run_id", al.run_id());
276
+ match al {
184
277
  ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
185
278
  debug!(activation=%act, "Sending activation to lang");
186
279
  break Ok(act);
187
280
  }
188
281
  ActivationOrAuto::Autocomplete { run_id } => {
189
- self.activation_completed(WorkflowActivationCompletion {
190
- run_id,
191
- status: Some(workflow_completion::Success::from_variants(vec![]).into()),
192
- })
282
+ self.activation_completed(
283
+ WorkflowActivationCompletion {
284
+ run_id,
285
+ status: Some(
286
+ workflow_completion::Success::from_variants(vec![]).into(),
287
+ ),
288
+ },
289
+ // We need to say a type, but the type is irrelevant, so imagine some
290
+ // boxed function we'll never call.
291
+ Option::<Box<dyn Fn(&str, usize) + Send>>::None,
292
+ )
293
+ .await?;
294
+ }
295
+ ActivationOrAuto::AutoFail {
296
+ run_id,
297
+ machines_err,
298
+ } => {
299
+ self.activation_completed(
300
+ WorkflowActivationCompletion {
301
+ run_id,
302
+ status: Some(auto_fail_to_complete_status(machines_err)),
303
+ },
304
+ Option::<Box<dyn Fn(&str, usize) + Send>>::None,
305
+ )
193
306
  .await?;
194
307
  }
195
308
  }
@@ -199,10 +312,11 @@ impl Workflows {
199
312
  /// Queue an activation completion for processing, returning a future that will resolve with
200
313
  /// the outcome of that completion. See [ActivationCompletedOutcome].
201
314
  ///
202
- /// Returns the most-recently-processed event number for the run
203
- pub async fn activation_completed(
315
+ /// Returns the most-recently-processed event number for the run.
316
+ pub(super) async fn activation_completed(
204
317
  &self,
205
318
  completion: WorkflowActivationCompletion,
319
+ post_activate_hook: Option<impl Fn(&str, usize)>,
206
320
  ) -> Result<usize, CompleteWfError> {
207
321
  let is_empty_completion = completion.is_empty();
208
322
  let completion = validate_completion(completion)?;
@@ -210,7 +324,7 @@ impl Workflows {
210
324
  let (tx, rx) = oneshot::channel();
211
325
  let was_sent = self.send_local(WFActCompleteMsg {
212
326
  completion,
213
- response_tx: tx,
327
+ response_tx: Some(tx),
214
328
  });
215
329
  if !was_sent {
216
330
  if is_empty_completion {
@@ -227,7 +341,7 @@ impl Workflows {
227
341
  .await
228
342
  .expect("Send half of activation complete response not dropped");
229
343
  let mut wft_from_complete = None;
230
- let reported_wft_to_server = match completion_outcome.outcome {
344
+ let wft_report_status = match completion_outcome.outcome {
231
345
  ActivationCompleteOutcome::ReportWFTSuccess(report) => match report {
232
346
  ServerCommandsWithWorkflowInfo {
233
347
  task_token,
@@ -236,6 +350,7 @@ impl Workflows {
236
350
  mut commands,
237
351
  query_responses,
238
352
  force_new_wft,
353
+ sdk_metadata,
239
354
  },
240
355
  } => {
241
356
  let reserved_act_permits =
@@ -249,6 +364,13 @@ impl Workflows {
249
364
  sticky_attributes: None,
250
365
  return_new_workflow_task: true,
251
366
  force_create_new_workflow_task: force_new_wft,
367
+ sdk_metadata,
368
+ metering_metadata: MeteringMetadata {
369
+ nonfirst_local_activity_execution_attempts: self
370
+ .local_act_mgr
371
+ .get_nonfirst_attempt_count(&run_id)
372
+ as u32,
373
+ },
252
374
  };
253
375
  let sticky_attrs = self.sticky_attrs.clone();
254
376
  // Do not return new WFT if we would not cache, because returned new WFTs are
@@ -270,14 +392,14 @@ impl Workflows {
270
392
  Ok(())
271
393
  })
272
394
  .await;
273
- true
395
+ WFTReportStatus::Reported
274
396
  }
275
397
  ServerCommandsWithWorkflowInfo {
276
398
  task_token,
277
399
  action: ActivationAction::RespondLegacyQuery { result },
278
400
  } => {
279
401
  self.respond_legacy_query(task_token, *result).await;
280
- true
402
+ WFTReportStatus::Reported
281
403
  }
282
404
  },
283
405
  ActivationCompleteOutcome::ReportWFTFail(outcome) => match outcome {
@@ -289,29 +411,54 @@ impl Workflows {
289
411
  .await
290
412
  })
291
413
  .await;
292
- true
414
+ WFTReportStatus::Reported
293
415
  }
294
416
  FailedActivationWFTReport::ReportLegacyQueryFailure(task_token, failure) => {
295
417
  warn!(run_id=%run_id, failure=?failure, "Failing legacy query request");
296
418
  self.respond_legacy_query(task_token, legacy_query_failure(failure))
297
419
  .await;
298
- true
420
+ WFTReportStatus::Reported
299
421
  }
300
422
  },
301
- ActivationCompleteOutcome::DoNothing => false,
423
+ ActivationCompleteOutcome::WFTFailedDontReport => WFTReportStatus::DropWft,
424
+ ActivationCompleteOutcome::DoNothing => WFTReportStatus::NotReported,
425
+ };
426
+
427
+ let maybe_pwft = if let Some(wft) = wft_from_complete {
428
+ match HistoryPaginator::from_poll(wft, self.client.clone()).await {
429
+ Ok((paginator, pwft)) => Some((pwft, paginator)),
430
+ Err(e) => {
431
+ self.request_eviction(
432
+ &run_id,
433
+ format!("Failed to paginate workflow task from completion: {e:?}"),
434
+ EvictionReason::Fatal,
435
+ );
436
+ None
437
+ }
438
+ }
439
+ } else {
440
+ None
302
441
  };
303
442
 
443
+ if let Some(h) = post_activate_hook {
444
+ h(&run_id, completion_outcome.most_recently_processed_event);
445
+ }
446
+
304
447
  self.post_activation(PostActivationMsg {
305
448
  run_id,
306
- reported_wft_to_server,
307
- wft_from_complete,
449
+ wft_report_status,
450
+ wft_from_complete: maybe_pwft,
308
451
  });
309
452
 
310
453
  Ok(completion_outcome.most_recently_processed_event)
311
454
  }
312
455
 
313
456
  /// Tell workflow that a local activity has finished with the provided result
314
- pub fn notify_of_local_result(&self, run_id: impl Into<String>, resolved: LocalResolution) {
457
+ pub(super) fn notify_of_local_result(
458
+ &self,
459
+ run_id: impl Into<String>,
460
+ resolved: LocalResolution,
461
+ ) {
315
462
  self.send_local(LocalResolutionMsg {
316
463
  run_id: run_id.into(),
317
464
  res: resolved,
@@ -319,7 +466,7 @@ impl Workflows {
319
466
  }
320
467
 
321
468
  /// Request eviction of a workflow
322
- pub fn request_eviction(
469
+ pub(super) fn request_eviction(
323
470
  &self,
324
471
  run_id: impl Into<String>,
325
472
  message: impl Into<String>,
@@ -333,22 +480,39 @@ impl Workflows {
333
480
  }
334
481
 
335
482
  /// Query the state of workflow management. Can return `None` if workflow state is shut down.
336
- pub fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
483
+ pub(super) fn get_state_info(&self) -> impl Future<Output = Option<WorkflowStateInfo>> {
337
484
  let (tx, rx) = oneshot::channel();
338
485
  self.send_local(GetStateInfoMsg { response_tx: tx });
339
486
  async move { rx.await.ok() }
340
487
  }
341
488
 
342
- pub async fn shutdown(&self) -> Result<(), JoinError> {
489
+ pub(super) fn available_wft_permits(&self) -> usize {
490
+ self.wft_semaphore.available_permits()
491
+ }
492
+
493
+ pub(super) async fn shutdown(&self) -> Result<(), anyhow::Error> {
343
494
  let maybe_jh = self.processing_task.lock().await.take();
344
495
  if let Some(jh) = maybe_jh {
345
- // This acts as a final wake up in case the stream is still alive and wouldn't otherwise
346
- // receive another message. It allows it to shut itself down.
347
- let _ = self.get_state_info();
348
- jh.await
349
- } else {
350
- Ok(())
496
+ // This serves to drive the stream if it is still alive and wouldn't otherwise receive
497
+ // another message. It allows it to shut itself down.
498
+ let (waker, stop_waker) = abortable(async {
499
+ let mut interval = tokio::time::interval(Duration::from_millis(10));
500
+ loop {
501
+ interval.tick().await;
502
+ let _ = self.get_state_info().await;
503
+ }
504
+ });
505
+ let (_, jh_res) = tokio::join!(
506
+ waker,
507
+ spawn_blocking(move || {
508
+ let r = jh.join();
509
+ stop_waker.abort();
510
+ r
511
+ })
512
+ );
513
+ jh_res?.map_err(|e| anyhow!("Error joining workflow processing thread: {e:?}"))?;
351
514
  }
515
+ Ok(())
352
516
  }
353
517
 
354
518
  /// Must be called after every activation completion has finished
@@ -390,7 +554,11 @@ impl Workflows {
390
554
  /// successfully.
391
555
  fn send_local(&self, msg: impl Into<LocalInputs>) -> bool {
392
556
  let msg = msg.into();
393
- let print_err = !matches!(msg, LocalInputs::GetStateInfo(_));
557
+ let print_err = match &msg {
558
+ LocalInputs::GetStateInfo(_) => false,
559
+ LocalInputs::LocalResolution(lr) if lr.res.is_la_cancel_confirmation() => false,
560
+ _ => true,
561
+ };
394
562
  if let Err(e) = self.local_tx.send(LocalInput {
395
563
  input: msg,
396
564
  span: Span::current(),
@@ -411,7 +579,7 @@ impl Workflows {
411
579
  /// Process eagerly returned activities from WFT completion
412
580
  fn handle_eager_activities(
413
581
  &self,
414
- reserved_act_permits: Vec<OwnedMeteredSemPermit>,
582
+ reserved_act_permits: Vec<TrackedOwnedMeteredSemPermit>,
415
583
  eager_acts: Vec<PollActivityTaskQueueResponse>,
416
584
  ) {
417
585
  if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
@@ -432,7 +600,7 @@ impl Workflows {
432
600
  let with_permits = reserved_act_permits
433
601
  .into_iter()
434
602
  .zip(eager_acts.into_iter())
435
- .map(|(permit, resp)| PermittedTqResp { permit, resp });
603
+ .map(|(permit, resp)| TrackedPermittedTqResp { permit, resp });
436
604
  if with_permits.len() > 0 {
437
605
  debug!(
438
606
  "Adding {} activity tasks received from WFT complete",
@@ -455,32 +623,36 @@ impl Workflows {
455
623
  fn reserve_activity_slots_for_outgoing_commands(
456
624
  &self,
457
625
  commands: &mut [Command],
458
- ) -> Vec<OwnedMeteredSemPermit> {
626
+ ) -> Vec<TrackedOwnedMeteredSemPermit> {
459
627
  let mut reserved = vec![];
460
- if let Some(at_handle) = self.activity_tasks_handle.as_ref() {
461
- for cmd in commands {
462
- if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
463
- cmd.attributes.as_mut()
464
- {
465
- // If request_eager_execution was already false, that means lang explicitly
466
- // told us it didn't want to eagerly execute for some reason. So, we only
467
- // ever turn *off* eager execution if a slot is not available or the activity
468
- // is scheduled on a different task queue.
469
- if attrs.request_eager_execution {
470
- let same_task_queue = attrs
471
- .task_queue
628
+ for cmd in commands {
629
+ if let Some(Attributes::ScheduleActivityTaskCommandAttributes(attrs)) =
630
+ cmd.attributes.as_mut()
631
+ {
632
+ // If request_eager_execution was already false, that means lang explicitly
633
+ // told us it didn't want to eagerly execute for some reason. So, we only
634
+ // ever turn *off* eager execution if a slot is not available or the activity
635
+ // is scheduled on a different task queue.
636
+ if attrs.request_eager_execution {
637
+ let same_task_queue = attrs
638
+ .task_queue
639
+ .as_ref()
640
+ .map(|q| q.name == self.task_queue)
641
+ .unwrap_or_default();
642
+ if same_task_queue
643
+ && reserved.len() < MAX_EAGER_ACTIVITY_RESERVATIONS_PER_WORKFLOW_TASK
644
+ {
645
+ if let Some(p) = self
646
+ .activity_tasks_handle
472
647
  .as_ref()
473
- .map(|q| q.name == self.task_queue)
474
- .unwrap_or_default();
475
- if same_task_queue {
476
- if let Some(p) = at_handle.reserve_slot() {
477
- reserved.push(p);
478
- } else {
479
- attrs.request_eager_execution = false;
480
- }
648
+ .and_then(|h| h.reserve_slot())
649
+ {
650
+ reserved.push(p);
481
651
  } else {
482
652
  attrs.request_eager_execution = false;
483
653
  }
654
+ } else {
655
+ attrs.request_eager_execution = false;
484
656
  }
485
657
  }
486
658
  }
@@ -502,180 +674,30 @@ impl Workflows {
502
674
  }
503
675
  }
504
676
 
505
- /// Manages access to a specific workflow run, and contains various bookkeeping information that the
506
- /// [WFStream] may need to access quickly.
507
- #[derive(derive_more::DebugCustom)]
508
- #[debug(
509
- fmt = "ManagedRunHandle {{ wft: {:?}, activation: {:?}, buffered_resp: {:?} \
510
- have_seen_terminal_event: {}, most_recently_processed_event: {}, more_pending_work: {}, \
511
- trying_to_evict: {}, last_action_acked: {} }}",
512
- wft,
513
- activation,
514
- buffered_resp,
515
- have_seen_terminal_event,
516
- most_recently_processed_event_number,
517
- more_pending_work,
518
- "trying_to_evict.is_some()",
519
- last_action_acked
677
+ /// Returned when a cache miss happens and we need to fetch history from the beginning to
678
+ /// replay a run
679
+ #[derive(Debug, derive_more::Display)]
680
+ #[display(
681
+ fmt = "CacheMissFetchReq(run_id: {})",
682
+ "original_wft.work.execution.run_id"
520
683
  )]
521
- struct ManagedRunHandle {
522
- /// If set, the WFT this run is currently/will be processing.
523
- wft: Option<OutstandingTask>,
524
- /// An outstanding activation to lang
525
- activation: Option<OutstandingActivation>,
526
- /// If set, it indicates there is a buffered poll response from the server that applies to this
527
- /// run. This can happen when lang takes too long to complete a task and the task times out, for
528
- /// example. Upon next completion, the buffered response will be removed and can be made ready
529
- /// to be returned from polling
530
- buffered_resp: Option<PermittedWFT>,
531
- /// True if this machine has seen an event which ends the execution
532
- have_seen_terminal_event: bool,
533
- /// The most recently processed event id this machine has seen. 0 means it has seen nothing.
534
- most_recently_processed_event_number: usize,
535
- /// Is set true when the machines indicate that there is additional known work to be processed
536
- more_pending_work: bool,
537
- /// Is set if an eviction has been requested for this run
538
- trying_to_evict: Option<RequestEvictMsg>,
539
- /// Set to true if the last action we tried to take to this run has been processed (ie: the
540
- /// [RunUpdateResponse] for it has been seen.
541
- last_action_acked: bool,
542
- /// For sending work to the machines
543
- run_actions_tx: UnboundedSender<RunAction>,
544
- /// Handle to the task where the actual machines live
545
- handle: JoinHandle<()>,
546
- metrics: MetricsContext,
684
+ #[must_use]
685
+ struct CacheMissFetchReq {
686
+ original_wft: PermittedWFT,
687
+ }
688
+ /// Bubbled up from inside workflow state if we're trying to apply the next workflow task but it
689
+ /// isn't in memory
690
+ #[derive(Debug)]
691
+ #[must_use]
692
+ struct NextPageReq {
693
+ paginator: HistoryPaginator,
694
+ span: Span,
547
695
  }
548
- impl ManagedRunHandle {
549
- fn new(
550
- wfm: WorkflowManager,
551
- activations_tx: UnboundedSender<RunUpdateResponse>,
552
- local_activity_request_sink: LocalActivityRequestSink,
553
- metrics: MetricsContext,
554
- ) -> Self {
555
- let (run_actions_tx, run_actions_rx) = unbounded_channel();
556
- let managed = ManagedRun::new(wfm, activations_tx, local_activity_request_sink);
557
- let handle = tokio::task::spawn(managed.run(run_actions_rx));
558
- Self {
559
- wft: None,
560
- activation: None,
561
- buffered_resp: None,
562
- have_seen_terminal_event: false,
563
- most_recently_processed_event_number: 0,
564
- more_pending_work: false,
565
- trying_to_evict: None,
566
- last_action_acked: true,
567
- handle,
568
- metrics,
569
- run_actions_tx,
570
- }
571
- }
572
-
573
- fn incoming_wft(&mut self, wft: NewIncomingWFT) {
574
- if self.wft.is_some() {
575
- error!("Trying to send a new WFT for a run which already has one!");
576
- }
577
- self.send_run_action(RunActions::NewIncomingWFT(wft));
578
- }
579
- fn check_more_activations(&mut self) {
580
- // No point in checking for more activations if we have not acked the last update, or
581
- // if there's already an outstanding activation.
582
- if self.last_action_acked && self.activation.is_none() {
583
- self.send_run_action(RunActions::CheckMoreWork {
584
- want_to_evict: self.trying_to_evict.clone(),
585
- has_pending_queries: self
586
- .wft
587
- .as_ref()
588
- .map(|wft| !wft.pending_queries.is_empty())
589
- .unwrap_or_default(),
590
- has_wft: self.wft.is_some(),
591
- });
592
- }
593
- }
594
- fn send_completion(&mut self, c: RunActivationCompletion) {
595
- self.send_run_action(RunActions::ActivationCompletion(c));
596
- }
597
- fn send_local_resolution(&mut self, r: LocalResolution) {
598
- self.send_run_action(RunActions::LocalResolution(r));
599
- }
600
-
601
- fn insert_outstanding_activation(&mut self, act: &ActivationOrAuto) {
602
- let act_type = match &act {
603
- ActivationOrAuto::LangActivation(act) | ActivationOrAuto::ReadyForQueries(act) => {
604
- if act.is_legacy_query() {
605
- OutstandingActivation::LegacyQuery
606
- } else {
607
- OutstandingActivation::Normal {
608
- contains_eviction: act.eviction_index().is_some(),
609
- num_jobs: act.jobs.len(),
610
- }
611
- }
612
- }
613
- ActivationOrAuto::Autocomplete { .. } => OutstandingActivation::Autocomplete,
614
- };
615
- if let Some(old_act) = self.activation {
616
- // This is a panic because we have screwed up core logic if this is violated. It must be
617
- // upheld.
618
- panic!(
619
- "Attempted to insert a new outstanding activation {:?}, but there already was \
620
- one outstanding: {:?}",
621
- act, old_act
622
- );
623
- }
624
- self.activation = Some(act_type);
625
- }
626
-
627
- fn send_run_action(&mut self, action: RunActions) {
628
- self.last_action_acked = false;
629
- self.run_actions_tx
630
- .send(RunAction {
631
- action,
632
- trace_span: Span::current(),
633
- })
634
- .expect("Receive half of run actions not dropped");
635
- }
636
-
637
- /// Returns true if the managed run has any form of pending work
638
- /// If `ignore_evicts` is true, pending evictions do not count as pending work.
639
- /// If `ignore_buffered` is true, buffered workflow tasks do not count as pending work.
640
- fn has_any_pending_work(&self, ignore_evicts: bool, ignore_buffered: bool) -> bool {
641
- let evict_work = if ignore_evicts {
642
- false
643
- } else {
644
- self.trying_to_evict.is_some()
645
- };
646
- let act_work = if ignore_evicts {
647
- if let Some(ref act) = self.activation {
648
- !act.has_only_eviction()
649
- } else {
650
- false
651
- }
652
- } else {
653
- self.activation.is_some()
654
- };
655
- let buffered = if ignore_buffered {
656
- false
657
- } else {
658
- self.buffered_resp.is_some()
659
- };
660
- self.wft.is_some()
661
- || buffered
662
- || !self.last_action_acked
663
- || self.more_pending_work
664
- || act_work
665
- || evict_work
666
- }
667
696
 
668
- /// Returns true if the handle is currently processing a WFT which contains a legacy query.
669
- fn pending_work_is_legacy_query(&self) -> bool {
670
- // Either we know because there is a pending legacy query, or it's already been drained and
671
- // sent as an activation.
672
- matches!(self.activation, Some(OutstandingActivation::LegacyQuery))
673
- || self
674
- .wft
675
- .as_ref()
676
- .map(|t| t.has_pending_legacy_query())
677
- .unwrap_or_default()
678
- }
697
+ #[derive(Debug)]
698
+ struct WFStreamOutput {
699
+ activations: VecDeque<ActivationOrAuto>,
700
+ fetch_histories: VecDeque<HistoryFetchReq>,
679
701
  }
680
702
 
681
703
  #[derive(Debug, derive_more::Display)]
@@ -684,9 +706,15 @@ enum ActivationOrAuto {
684
706
  /// This type should only be filled with an empty activation which is ready to have queries
685
707
  /// inserted into the joblist
686
708
  ReadyForQueries(WorkflowActivation),
709
+ #[display(fmt = "Autocomplete(run_id={run_id})")]
687
710
  Autocomplete {
688
711
  run_id: String,
689
712
  },
713
+ #[display(fmt = "AutoFail(run_id={run_id})")]
714
+ AutoFail {
715
+ run_id: String,
716
+ machines_err: WFMachinesError,
717
+ },
690
718
  }
691
719
  impl ActivationOrAuto {
692
720
  pub fn run_id(&self) -> &str {
@@ -694,15 +722,53 @@ impl ActivationOrAuto {
694
722
  ActivationOrAuto::LangActivation(act) => &act.run_id,
695
723
  ActivationOrAuto::Autocomplete { run_id, .. } => run_id,
696
724
  ActivationOrAuto::ReadyForQueries(act) => &act.run_id,
725
+ ActivationOrAuto::AutoFail { run_id, .. } => run_id,
697
726
  }
698
727
  }
699
728
  }
700
729
 
730
+ /// A processed WFT which has been validated and had a history update extracted from it
701
731
  #[derive(derive_more::DebugCustom)]
702
- #[debug(fmt = "PermittedWft {{ {:?} }}", wft)]
732
+ #[cfg_attr(
733
+ feature = "save_wf_inputs",
734
+ derive(serde::Serialize, serde::Deserialize)
735
+ )]
736
+ #[debug(fmt = "PermittedWft({work:?})")]
703
737
  pub(crate) struct PermittedWFT {
704
- wft: ValidPollWFTQResponse,
705
- permit: OwnedMeteredSemPermit,
738
+ work: PreparedWFT,
739
+ #[cfg_attr(
740
+ feature = "save_wf_inputs",
741
+ serde(skip, default = "UsedMeteredSemPermit::fake_deserialized")
742
+ )]
743
+ permit: UsedMeteredSemPermit,
744
+ #[cfg_attr(
745
+ feature = "save_wf_inputs",
746
+ serde(skip, default = "HistoryPaginator::fake_deserialized")
747
+ )]
748
+ paginator: HistoryPaginator,
749
+ }
750
+ #[derive(Debug)]
751
+ #[cfg_attr(
752
+ feature = "save_wf_inputs",
753
+ derive(serde::Serialize, serde::Deserialize)
754
+ )]
755
+ struct PreparedWFT {
756
+ task_token: TaskToken,
757
+ attempt: u32,
758
+ execution: WorkflowExecution,
759
+ workflow_type: String,
760
+ legacy_query: Option<WorkflowQuery>,
761
+ query_requests: Vec<QueryWorkflow>,
762
+ update: HistoryUpdate,
763
+ }
764
+ impl PreparedWFT {
765
+ /// Returns true if the contained history update is incremental (IE: expects to hit a cached
766
+ /// workflow)
767
+ pub fn is_incremental(&self) -> bool {
768
+ let start_event_id = self.update.first_event_id();
769
+ let poll_resp_is_incremental = start_event_id.map(|eid| eid > 1).unwrap_or_default();
770
+ poll_resp_is_incremental || start_event_id.is_none()
771
+ }
706
772
  }
707
773
 
708
774
  #[derive(Debug)]
@@ -714,7 +780,7 @@ pub(crate) struct OutstandingTask {
714
780
  pub start_time: Instant,
715
781
  /// The WFT permit owned by this task, ensures we don't exceed max concurrent WFT, and makes
716
782
  /// sure the permit is automatically freed when we delete the task.
717
- pub permit: OwnedMeteredSemPermit,
783
+ pub permit: UsedMeteredSemPermit,
718
784
  }
719
785
 
720
786
  impl OutstandingTask {
@@ -767,6 +833,11 @@ impl OutstandingActivation {
767
833
  pub struct WorkflowTaskInfo {
768
834
  pub task_token: TaskToken,
769
835
  pub attempt: u32,
836
+ /// Exists to allow easy tagging of spans with workflow ids. Is duplicative of info inside the
837
+ /// run machines themselves, but that can't be accessed easily. Would be nice to somehow have a
838
+ /// shared repository, or refcounts, or whatever, for strings like these that get duped all
839
+ /// sorts of places.
840
+ pub wf_id: String,
770
841
  }
771
842
 
772
843
  #[derive(Debug)]
@@ -788,49 +859,80 @@ pub(crate) enum ActivationAction {
788
859
  commands: Vec<ProtoCommand>,
789
860
  query_responses: Vec<QueryResult>,
790
861
  force_new_wft: bool,
862
+ sdk_metadata: WorkflowTaskCompletedMetadata,
791
863
  },
792
864
  /// We should respond to a legacy query request
793
865
  RespondLegacyQuery { result: Box<QueryResult> },
794
866
  }
795
867
 
796
- #[derive(Debug, Eq, PartialEq, Hash)]
797
- pub(crate) enum EvictionRequestResult {
798
- EvictionRequested(Option<u32>),
868
+ #[derive(Debug)]
869
+ enum EvictionRequestResult {
870
+ EvictionRequested(Option<u32>, RunUpdateAct),
799
871
  NotFound,
800
872
  EvictionAlreadyRequested(Option<u32>),
801
873
  }
874
+ impl EvictionRequestResult {
875
+ fn into_run_update_resp(self) -> RunUpdateAct {
876
+ match self {
877
+ EvictionRequestResult::EvictionRequested(_, resp) => resp,
878
+ EvictionRequestResult::NotFound
879
+ | EvictionRequestResult::EvictionAlreadyRequested(_) => None,
880
+ }
881
+ }
882
+ }
802
883
 
803
884
  #[derive(Debug)]
804
885
  #[allow(dead_code)] // Not always used in non-test
805
886
  pub(crate) struct WorkflowStateInfo {
806
887
  pub cached_workflows: usize,
807
888
  pub outstanding_wft: usize,
808
- pub available_wft_permits: usize,
809
889
  }
810
890
 
811
891
  #[derive(Debug)]
892
+ #[cfg_attr(
893
+ feature = "save_wf_inputs",
894
+ derive(serde::Serialize, serde::Deserialize)
895
+ )]
812
896
  struct WFActCompleteMsg {
813
897
  completion: ValidatedCompletion,
814
- response_tx: oneshot::Sender<ActivationCompleteResult>,
898
+ #[cfg_attr(feature = "save_wf_inputs", serde(skip))]
899
+ response_tx: Option<oneshot::Sender<ActivationCompleteResult>>,
815
900
  }
816
901
  #[derive(Debug)]
902
+ #[cfg_attr(
903
+ feature = "save_wf_inputs",
904
+ derive(serde::Serialize, serde::Deserialize)
905
+ )]
817
906
  struct LocalResolutionMsg {
818
907
  run_id: String,
819
908
  res: LocalResolution,
820
909
  }
821
910
  #[derive(Debug)]
911
+ #[cfg_attr(
912
+ feature = "save_wf_inputs",
913
+ derive(serde::Serialize, serde::Deserialize)
914
+ )]
822
915
  struct PostActivationMsg {
823
916
  run_id: String,
824
- reported_wft_to_server: bool,
825
- wft_from_complete: Option<ValidPollWFTQResponse>,
917
+ wft_report_status: WFTReportStatus,
918
+ wft_from_complete: Option<(PreparedWFT, HistoryPaginator)>,
826
919
  }
827
920
  #[derive(Debug, Clone)]
921
+ #[cfg_attr(
922
+ feature = "save_wf_inputs",
923
+ derive(serde::Serialize, serde::Deserialize)
924
+ )]
828
925
  struct RequestEvictMsg {
829
926
  run_id: String,
830
927
  message: String,
831
928
  reason: EvictionReason,
832
929
  }
833
930
  #[derive(Debug)]
931
+ pub(crate) struct HeartbeatTimeoutMsg {
932
+ pub(crate) run_id: String,
933
+ pub(crate) span: Span,
934
+ }
935
+ #[derive(Debug)]
834
936
  struct GetStateInfoMsg {
835
937
  response_tx: oneshot::Sender<WorkflowStateInfo>,
836
938
  }
@@ -851,16 +953,24 @@ enum ActivationCompleteOutcome {
851
953
  ReportWFTFail(FailedActivationWFTReport),
852
954
  /// There's nothing to do right now. EX: The workflow needs to keep replaying.
853
955
  DoNothing,
956
+ /// The workflow task failed, but we shouldn't report it. EX: We have failed 2 or more attempts
957
+ /// in a row.
958
+ WFTFailedDontReport,
854
959
  }
855
- #[derive(Debug)]
856
- struct FulfillableActivationComplete {
857
- result: ActivationCompleteResult,
858
- resp_chan: oneshot::Sender<ActivationCompleteResult>,
859
- }
860
- impl FulfillableActivationComplete {
861
- fn fulfill(self) {
862
- let _ = self.resp_chan.send(self.result);
863
- }
960
+ /// Did we report, or not, completion of a WFT to server?
961
+ #[derive(Debug, Copy, Clone)]
962
+ #[cfg_attr(
963
+ feature = "save_wf_inputs",
964
+ derive(serde::Serialize, serde::Deserialize)
965
+ )]
966
+ enum WFTReportStatus {
967
+ Reported,
968
+ /// The WFT completion was not reported when finishing the activation, because there's still
969
+ /// work to be done. EX: Running LAs.
970
+ NotReported,
971
+ /// We didn't report, but we want to clear the outstanding workflow task anyway. See
972
+ /// [ActivationCompleteOutcome::WFTFailedDontReport]
973
+ DropWft,
864
974
  }
865
975
 
866
976
  fn validate_completion(
@@ -887,10 +997,11 @@ fn validate_completion(
887
997
  )
888
998
  {
889
999
  return Err(CompleteWfError::MalformedWorkflowCompletion {
890
- reason: "Workflow completion had a legacy query response along with other \
891
- commands. This is not allowed and constitutes an error in the \
892
- lang SDK"
893
- .to_owned(),
1000
+ reason: format!(
1001
+ "Workflow completion had a legacy query response along with other \
1002
+ commands. This is not allowed and constitutes an error in the \
1003
+ lang SDK. Commands: {commands:?}"
1004
+ ),
894
1005
  run_id: completion.run_id,
895
1006
  });
896
1007
  }
@@ -898,6 +1009,7 @@ fn validate_completion(
898
1009
  Ok(ValidatedCompletion::Success {
899
1010
  run_id: completion.run_id,
900
1011
  commands,
1012
+ used_flags: success.used_internal_flags,
901
1013
  })
902
1014
  }
903
1015
  Some(workflow_activation_completion::Status::Failed(failure)) => {
@@ -914,11 +1026,16 @@ fn validate_completion(
914
1026
  }
915
1027
 
916
1028
  #[derive(Debug)]
1029
+ #[cfg_attr(
1030
+ feature = "save_wf_inputs",
1031
+ derive(serde::Serialize, serde::Deserialize)
1032
+ )]
917
1033
  #[allow(clippy::large_enum_variant)]
918
1034
  enum ValidatedCompletion {
919
1035
  Success {
920
1036
  run_id: String,
921
1037
  commands: Vec<WFCommand>,
1038
+ used_flags: Vec<u32>,
922
1039
  },
923
1040
  Fail {
924
1041
  run_id: String,
@@ -935,104 +1052,6 @@ impl ValidatedCompletion {
935
1052
  }
936
1053
  }
937
1054
 
938
- /// Input to run tasks, sent to [ManagedRun]s via [ManagedRunHandle]s
939
- #[derive(Debug)]
940
- struct RunAction {
941
- action: RunActions,
942
- trace_span: Span,
943
- }
944
- #[derive(Debug)]
945
- #[allow(clippy::large_enum_variant)]
946
- enum RunActions {
947
- NewIncomingWFT(NewIncomingWFT),
948
- ActivationCompletion(RunActivationCompletion),
949
- CheckMoreWork {
950
- want_to_evict: Option<RequestEvictMsg>,
951
- has_pending_queries: bool,
952
- has_wft: bool,
953
- },
954
- LocalResolution(LocalResolution),
955
- HeartbeatTimeout,
956
- }
957
- #[derive(Debug)]
958
- struct NewIncomingWFT {
959
- /// This field is only populated if the machines already exist. Otherwise the machines
960
- /// are instantiated with the workflow history.
961
- history_update: Option<HistoryUpdate>,
962
- /// Wft start time
963
- start_time: Instant,
964
- }
965
- #[derive(Debug)]
966
- struct RunActivationCompletion {
967
- task_token: TaskToken,
968
- start_time: Instant,
969
- commands: Vec<WFCommand>,
970
- activation_was_eviction: bool,
971
- activation_was_only_eviction: bool,
972
- has_pending_query: bool,
973
- query_responses: Vec<QueryResult>,
974
- /// Used to notify the worker when the completion is done processing and the completion can
975
- /// unblock. Must always be `Some` when initialized.
976
- resp_chan: Option<oneshot::Sender<ActivationCompleteResult>>,
977
- }
978
-
979
- /// A response from a [ManagedRun] held by a [ManagedRunHandle]
980
- #[derive(Debug)]
981
- struct RunUpdateResponse {
982
- kind: RunUpdateResponseKind,
983
- span: Span,
984
- }
985
- #[derive(Debug, derive_more::Display)]
986
- #[allow(clippy::large_enum_variant)]
987
- enum RunUpdateResponseKind {
988
- Good(GoodRunUpdate),
989
- Fail(FailRunUpdate),
990
- }
991
-
992
- #[derive(Debug)]
993
- struct GoodRunUpdate {
994
- run_id: String,
995
- outgoing_activation: Option<ActivationOrAuto>,
996
- fulfillable_complete: Option<FulfillableActivationComplete>,
997
- have_seen_terminal_event: bool,
998
- /// Is true if there are more jobs that need to be sent to lang
999
- more_pending_work: bool,
1000
- most_recently_processed_event_number: usize,
1001
- /// Is true if this update was in response to a new WFT
1002
- in_response_to_wft: bool,
1003
- }
1004
- impl Display for GoodRunUpdate {
1005
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1006
- write!(
1007
- f,
1008
- "GoodRunUpdate(run_id: {}, outgoing_activation: {}, more_pending_work: {})",
1009
- self.run_id,
1010
- if let Some(og) = self.outgoing_activation.as_ref() {
1011
- format!("{}", og)
1012
- } else {
1013
- "None".to_string()
1014
- },
1015
- self.more_pending_work
1016
- )
1017
- }
1018
- }
1019
- #[derive(Debug)]
1020
- pub(crate) struct FailRunUpdate {
1021
- run_id: String,
1022
- err: WFMachinesError,
1023
- /// This is populated if the run update failed while processing a completion - and thus we
1024
- /// must respond down it when handling the failure.
1025
- completion_resp: Option<oneshot::Sender<ActivationCompleteResult>>,
1026
- }
1027
- impl Display for FailRunUpdate {
1028
- fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1029
- write!(
1030
- f,
1031
- "FailRunUpdate(run_id: {}, error: {:?})",
1032
- self.run_id, self.err
1033
- )
1034
- }
1035
- }
1036
1055
  #[derive(Debug)]
1037
1056
  pub struct OutgoingServerCommands {
1038
1057
  pub commands: Vec<ProtoCommand>,
@@ -1040,9 +1059,22 @@ pub struct OutgoingServerCommands {
1040
1059
  }
1041
1060
 
1042
1061
  #[derive(Debug)]
1062
+ #[cfg_attr(
1063
+ feature = "save_wf_inputs",
1064
+ derive(serde::Serialize, serde::Deserialize)
1065
+ )]
1043
1066
  pub(crate) enum LocalResolution {
1044
1067
  LocalActivity(LocalActivityResolution),
1045
1068
  }
1069
+ impl LocalResolution {
1070
+ pub fn is_la_cancel_confirmation(&self) -> bool {
1071
+ match self {
1072
+ LocalResolution::LocalActivity(lar) => {
1073
+ matches!(lar.result, LocalActivityExecutionResult::Cancelled(_))
1074
+ }
1075
+ }
1076
+ }
1077
+ }
1046
1078
 
1047
1079
  #[derive(thiserror::Error, Debug, derive_more::From)]
1048
1080
  #[error("Lang provided workflow command with empty variant")]
@@ -1051,6 +1083,10 @@ pub struct EmptyWorkflowCommandErr;
1051
1083
  /// [DrivenWorkflow]s respond with these when called, to indicate what they want to do next.
1052
1084
  /// EX: Create a new timer, complete the workflow, etc.
1053
1085
  #[derive(Debug, derive_more::From, derive_more::Display)]
1086
+ #[cfg_attr(
1087
+ feature = "save_wf_inputs",
1088
+ derive(serde::Serialize, serde::Deserialize)
1089
+ )]
1054
1090
  #[allow(clippy::large_enum_variant)]
1055
1091
  pub enum WFCommand {
1056
1092
  /// Returned when we need to wait for the lang sdk to send us something
@@ -1073,6 +1109,7 @@ pub enum WFCommand {
1073
1109
  SignalExternalWorkflow(SignalExternalWorkflowExecution),
1074
1110
  CancelSignalWorkflow(CancelSignalWorkflow),
1075
1111
  UpsertSearchAttributes(UpsertWorkflowSearchAttributes),
1112
+ ModifyWorkflowProperties(ModifyWorkflowProperties),
1076
1113
  }
1077
1114
 
1078
1115
  impl TryFrom<WorkflowCommand> for WFCommand {
@@ -1114,6 +1151,9 @@ impl TryFrom<WorkflowCommand> for WFCommand {
1114
1151
  workflow_command::Variant::UpsertWorkflowSearchAttributes(s) => {
1115
1152
  Ok(Self::UpsertSearchAttributes(s))
1116
1153
  }
1154
+ workflow_command::Variant::ModifyWorkflowProperties(s) => {
1155
+ Ok(Self::ModifyWorkflowProperties(s))
1156
+ }
1117
1157
  }
1118
1158
  }
1119
1159
  }
@@ -1139,5 +1179,98 @@ pub struct WorkflowStartedInfo {
1139
1179
  retry_policy: Option<RetryPolicy>,
1140
1180
  }
1141
1181
 
1142
- type LocalActivityRequestSink =
1143
- Arc<dyn Fn(Vec<LocalActRequest>) -> Vec<LocalActivityResolution> + Send + Sync>;
1182
+ /// Wraps outgoing activation job protos with some internal details core might care about
1183
+ #[derive(Debug, derive_more::Display)]
1184
+ #[display(fmt = "{variant}")]
1185
+ struct OutgoingJob {
1186
+ variant: workflow_activation_job::Variant,
1187
+ /// Since LA resolutions are not distinguished from non-LA resolutions as far as lang is
1188
+ /// concerned, but core cares about that sometimes, attach that info here.
1189
+ is_la_resolution: bool,
1190
+ }
1191
+ impl<WA: Into<workflow_activation_job::Variant>> From<WA> for OutgoingJob {
1192
+ fn from(wa: WA) -> Self {
1193
+ Self {
1194
+ variant: wa.into(),
1195
+ is_la_resolution: false,
1196
+ }
1197
+ }
1198
+ }
1199
+ impl From<OutgoingJob> for WorkflowActivationJob {
1200
+ fn from(og: OutgoingJob) -> Self {
1201
+ Self {
1202
+ variant: Some(og.variant),
1203
+ }
1204
+ }
1205
+ }
1206
+
1207
+ /// Errors thrown inside of workflow machines
1208
+ #[derive(thiserror::Error, Debug)]
1209
+ pub(crate) enum WFMachinesError {
1210
+ #[error("Nondeterminism error: {0}")]
1211
+ Nondeterminism(String),
1212
+ #[error("Fatal error in workflow machines: {0}")]
1213
+ Fatal(String),
1214
+ }
1215
+
1216
+ impl WFMachinesError {
1217
+ pub fn evict_reason(&self) -> EvictionReason {
1218
+ match self {
1219
+ WFMachinesError::Nondeterminism(_) => EvictionReason::Nondeterminism,
1220
+ WFMachinesError::Fatal(_) => EvictionReason::Fatal,
1221
+ }
1222
+ }
1223
+ }
1224
+
1225
+ impl From<TimestampError> for WFMachinesError {
1226
+ fn from(_: TimestampError) -> Self {
1227
+ Self::Fatal("Could not decode timestamp".to_string())
1228
+ }
1229
+ }
1230
+
1231
+ fn auto_fail_to_complete_status(err: WFMachinesError) -> workflow_activation_completion::Status {
1232
+ workflow_activation_completion::Status::Failed(Failure {
1233
+ failure: Some(
1234
+ temporal_sdk_core_protos::temporal::api::failure::v1::Failure {
1235
+ message: "Error while processing workflow task".to_string(),
1236
+ source: err.to_string(),
1237
+ stack_trace: "".to_string(),
1238
+ encoded_attributes: None,
1239
+ cause: None,
1240
+ failure_info: None,
1241
+ },
1242
+ ),
1243
+ force_cause: WorkflowTaskFailedCause::from(err.evict_reason()) as i32,
1244
+ })
1245
+ }
1246
+
1247
+ pub(crate) trait LocalActivityRequestSink: Send + Sync + 'static {
1248
+ fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution>;
1249
+ }
1250
+
1251
+ #[derive(derive_more::Constructor)]
1252
+ pub(super) struct LAReqSink {
1253
+ lam: Arc<LocalActivityManager>,
1254
+ /// If we're recording WF inputs, we also need to store immediate resolutions so they're
1255
+ /// available on replay.
1256
+ #[allow(dead_code)] // sometimes appears unused due to feature flagging
1257
+ recorder: Option<UnboundedSender<Vec<u8>>>,
1258
+ }
1259
+
1260
+ impl LocalActivityRequestSink for LAReqSink {
1261
+ fn sink_reqs(&self, reqs: Vec<LocalActRequest>) -> Vec<LocalActivityResolution> {
1262
+ if reqs.is_empty() {
1263
+ return vec![];
1264
+ }
1265
+
1266
+ #[allow(clippy::let_and_return)] // When feature is off clippy doesn't like this
1267
+ let res = self.lam.enqueue(reqs);
1268
+
1269
+ // We always save when there are any reqs, even if the response might be empty, so that
1270
+ // calls/responses are 1:1
1271
+ #[cfg(feature = "save_wf_inputs")]
1272
+ self.write_req(&res);
1273
+
1274
+ res
1275
+ }
1276
+ }