temporalio 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (310) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +180 -7
  3. data/bridge/Cargo.lock +208 -76
  4. data/bridge/Cargo.toml +5 -2
  5. data/bridge/sdk-core/Cargo.toml +1 -1
  6. data/bridge/sdk-core/README.md +20 -10
  7. data/bridge/sdk-core/client/Cargo.toml +1 -1
  8. data/bridge/sdk-core/client/src/lib.rs +227 -59
  9. data/bridge/sdk-core/client/src/metrics.rs +17 -8
  10. data/bridge/sdk-core/client/src/raw.rs +13 -12
  11. data/bridge/sdk-core/client/src/retry.rs +132 -43
  12. data/bridge/sdk-core/core/Cargo.toml +28 -15
  13. data/bridge/sdk-core/core/benches/workflow_replay.rs +13 -10
  14. data/bridge/sdk-core/core/src/abstractions.rs +225 -36
  15. data/bridge/sdk-core/core/src/core_tests/activity_tasks.rs +217 -79
  16. data/bridge/sdk-core/core/src/core_tests/determinism.rs +165 -2
  17. data/bridge/sdk-core/core/src/core_tests/local_activities.rs +565 -34
  18. data/bridge/sdk-core/core/src/core_tests/queries.rs +247 -90
  19. data/bridge/sdk-core/core/src/core_tests/workers.rs +3 -5
  20. data/bridge/sdk-core/core/src/core_tests/workflow_cancels.rs +1 -1
  21. data/bridge/sdk-core/core/src/core_tests/workflow_tasks.rs +430 -67
  22. data/bridge/sdk-core/core/src/ephemeral_server/mod.rs +106 -12
  23. data/bridge/sdk-core/core/src/internal_flags.rs +136 -0
  24. data/bridge/sdk-core/core/src/lib.rs +148 -34
  25. data/bridge/sdk-core/core/src/protosext/mod.rs +1 -1
  26. data/bridge/sdk-core/core/src/replay/mod.rs +185 -41
  27. data/bridge/sdk-core/core/src/telemetry/log_export.rs +190 -0
  28. data/bridge/sdk-core/core/src/telemetry/metrics.rs +219 -140
  29. data/bridge/sdk-core/core/src/telemetry/mod.rs +326 -315
  30. data/bridge/sdk-core/core/src/telemetry/prometheus_server.rs +20 -14
  31. data/bridge/sdk-core/core/src/test_help/mod.rs +85 -21
  32. data/bridge/sdk-core/core/src/worker/activities/activity_heartbeat_manager.rs +112 -156
  33. data/bridge/sdk-core/core/src/worker/activities/activity_task_poller_stream.rs +89 -0
  34. data/bridge/sdk-core/core/src/worker/activities/local_activities.rs +364 -128
  35. data/bridge/sdk-core/core/src/worker/activities.rs +263 -170
  36. data/bridge/sdk-core/core/src/worker/client/mocks.rs +23 -3
  37. data/bridge/sdk-core/core/src/worker/client.rs +48 -6
  38. data/bridge/sdk-core/core/src/worker/mod.rs +186 -75
  39. data/bridge/sdk-core/core/src/worker/workflow/bridge.rs +1 -3
  40. data/bridge/sdk-core/core/src/worker/workflow/driven_workflow.rs +13 -24
  41. data/bridge/sdk-core/core/src/worker/workflow/history_update.rs +879 -226
  42. data/bridge/sdk-core/core/src/worker/workflow/machines/activity_state_machine.rs +101 -48
  43. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_external_state_machine.rs +8 -12
  44. data/bridge/sdk-core/core/src/worker/workflow/machines/cancel_workflow_state_machine.rs +6 -9
  45. data/bridge/sdk-core/core/src/worker/workflow/machines/child_workflow_state_machine.rs +90 -32
  46. data/bridge/sdk-core/core/src/worker/workflow/machines/complete_workflow_state_machine.rs +6 -9
  47. data/bridge/sdk-core/core/src/worker/workflow/machines/continue_as_new_workflow_state_machine.rs +7 -10
  48. data/bridge/sdk-core/core/src/worker/workflow/machines/fail_workflow_state_machine.rs +6 -9
  49. data/bridge/sdk-core/core/src/worker/workflow/machines/local_activity_state_machine.rs +160 -83
  50. data/bridge/sdk-core/core/src/worker/workflow/machines/mod.rs +36 -54
  51. data/bridge/sdk-core/core/src/worker/workflow/machines/modify_workflow_properties_state_machine.rs +179 -0
  52. data/bridge/sdk-core/core/src/worker/workflow/machines/patch_state_machine.rs +104 -157
  53. data/bridge/sdk-core/core/src/worker/workflow/machines/signal_external_state_machine.rs +8 -12
  54. data/bridge/sdk-core/core/src/worker/workflow/machines/timer_state_machine.rs +9 -13
  55. data/bridge/sdk-core/core/src/worker/workflow/machines/transition_coverage.rs +10 -4
  56. data/bridge/sdk-core/core/src/worker/workflow/machines/upsert_search_attributes_state_machine.rs +14 -11
  57. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines/local_acts.rs +6 -17
  58. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_machines.rs +395 -299
  59. data/bridge/sdk-core/core/src/worker/workflow/machines/workflow_task_state_machine.rs +12 -20
  60. data/bridge/sdk-core/core/src/worker/workflow/managed_run/managed_wf_test.rs +33 -18
  61. data/bridge/sdk-core/core/src/worker/workflow/managed_run.rs +1032 -374
  62. data/bridge/sdk-core/core/src/worker/workflow/mod.rs +525 -392
  63. data/bridge/sdk-core/core/src/worker/workflow/run_cache.rs +40 -57
  64. data/bridge/sdk-core/core/src/worker/workflow/wft_extraction.rs +125 -0
  65. data/bridge/sdk-core/core/src/worker/workflow/wft_poller.rs +3 -6
  66. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/saved_wf_inputs.rs +117 -0
  67. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream/tonic_status_serde.rs +24 -0
  68. data/bridge/sdk-core/core/src/worker/workflow/workflow_stream.rs +456 -681
  69. data/bridge/sdk-core/core-api/Cargo.toml +6 -4
  70. data/bridge/sdk-core/core-api/src/errors.rs +1 -34
  71. data/bridge/sdk-core/core-api/src/lib.rs +7 -45
  72. data/bridge/sdk-core/core-api/src/telemetry.rs +141 -0
  73. data/bridge/sdk-core/core-api/src/worker.rs +27 -1
  74. data/bridge/sdk-core/etc/deps.svg +115 -140
  75. data/bridge/sdk-core/etc/regen-depgraph.sh +5 -0
  76. data/bridge/sdk-core/fsm/rustfsm_procmacro/src/lib.rs +18 -15
  77. data/bridge/sdk-core/fsm/rustfsm_procmacro/tests/trybuild/no_handle_conversions_require_into_fail.stderr +1 -1
  78. data/bridge/sdk-core/fsm/rustfsm_trait/src/lib.rs +8 -3
  79. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-16_history.bin +0 -0
  80. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-23_history.bin +0 -0
  81. data/bridge/sdk-core/histories/evict_while_la_running_no_interference-85_history.bin +0 -0
  82. data/bridge/sdk-core/protos/api_upstream/buf.yaml +0 -3
  83. data/bridge/sdk-core/protos/api_upstream/build/go.mod +7 -0
  84. data/bridge/sdk-core/protos/api_upstream/build/go.sum +5 -0
  85. data/bridge/sdk-core/protos/api_upstream/{temporal/api/enums/v1/cluster.proto → build/tools.go} +7 -18
  86. data/bridge/sdk-core/protos/api_upstream/go.mod +6 -0
  87. data/bridge/sdk-core/protos/api_upstream/temporal/api/batch/v1/message.proto +12 -9
  88. data/bridge/sdk-core/protos/api_upstream/temporal/api/command/v1/message.proto +15 -26
  89. data/bridge/sdk-core/protos/api_upstream/temporal/api/common/v1/message.proto +13 -2
  90. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/batch_operation.proto +3 -2
  91. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/command_type.proto +4 -9
  92. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/common.proto +3 -2
  93. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/event_type.proto +10 -8
  94. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/failed_cause.proto +28 -2
  95. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/namespace.proto +2 -2
  96. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/query.proto +2 -2
  97. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/reset.proto +2 -2
  98. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/schedule.proto +2 -2
  99. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/task_queue.proto +2 -2
  100. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/update.proto +24 -19
  101. data/bridge/sdk-core/protos/api_upstream/temporal/api/enums/v1/workflow.proto +2 -2
  102. data/bridge/sdk-core/protos/api_upstream/temporal/api/errordetails/v1/message.proto +2 -2
  103. data/bridge/sdk-core/protos/api_upstream/temporal/api/failure/v1/message.proto +2 -2
  104. data/bridge/sdk-core/protos/api_upstream/temporal/api/filter/v1/message.proto +2 -2
  105. data/bridge/sdk-core/protos/api_upstream/temporal/api/history/v1/message.proto +62 -26
  106. data/bridge/sdk-core/protos/api_upstream/temporal/api/namespace/v1/message.proto +4 -2
  107. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/request_response.proto +24 -61
  108. data/bridge/sdk-core/protos/api_upstream/temporal/api/operatorservice/v1/service.proto +2 -21
  109. data/bridge/sdk-core/protos/api_upstream/temporal/api/protocol/v1/message.proto +57 -0
  110. data/bridge/sdk-core/protos/api_upstream/temporal/api/query/v1/message.proto +2 -2
  111. data/bridge/sdk-core/protos/api_upstream/temporal/api/replication/v1/message.proto +2 -2
  112. data/bridge/sdk-core/protos/api_upstream/temporal/api/schedule/v1/message.proto +110 -31
  113. data/bridge/sdk-core/protos/api_upstream/temporal/api/sdk/v1/task_complete_metadata.proto +63 -0
  114. data/bridge/sdk-core/protos/api_upstream/temporal/api/taskqueue/v1/message.proto +4 -4
  115. data/bridge/sdk-core/protos/api_upstream/temporal/api/update/v1/message.proto +71 -6
  116. data/bridge/sdk-core/protos/api_upstream/temporal/api/version/v1/message.proto +2 -2
  117. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflow/v1/message.proto +3 -2
  118. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/request_response.proto +111 -36
  119. data/bridge/sdk-core/protos/api_upstream/temporal/api/workflowservice/v1/service.proto +19 -5
  120. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_result/activity_result.proto +1 -0
  121. data/bridge/sdk-core/protos/local/temporal/sdk/core/activity_task/activity_task.proto +1 -0
  122. data/bridge/sdk-core/protos/local/temporal/sdk/core/child_workflow/child_workflow.proto +1 -0
  123. data/bridge/sdk-core/protos/local/temporal/sdk/core/common/common.proto +1 -0
  124. data/bridge/sdk-core/protos/local/temporal/sdk/core/core_interface.proto +1 -0
  125. data/bridge/sdk-core/protos/local/temporal/sdk/core/external_data/external_data.proto +1 -0
  126. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_activation/workflow_activation.proto +9 -0
  127. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_commands/workflow_commands.proto +9 -1
  128. data/bridge/sdk-core/protos/local/temporal/sdk/core/workflow_completion/workflow_completion.proto +6 -0
  129. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/request_response.proto +2 -2
  130. data/bridge/sdk-core/protos/testsrv_upstream/temporal/api/testservice/v1/service.proto +2 -2
  131. data/bridge/sdk-core/sdk/Cargo.toml +4 -3
  132. data/bridge/sdk-core/sdk/src/interceptors.rs +36 -3
  133. data/bridge/sdk-core/sdk/src/lib.rs +94 -25
  134. data/bridge/sdk-core/sdk/src/workflow_context.rs +13 -2
  135. data/bridge/sdk-core/sdk/src/workflow_future.rs +10 -13
  136. data/bridge/sdk-core/sdk-core-protos/Cargo.toml +5 -2
  137. data/bridge/sdk-core/sdk-core-protos/build.rs +36 -2
  138. data/bridge/sdk-core/sdk-core-protos/src/history_builder.rs +164 -104
  139. data/bridge/sdk-core/sdk-core-protos/src/history_info.rs +27 -23
  140. data/bridge/sdk-core/sdk-core-protos/src/lib.rs +252 -74
  141. data/bridge/sdk-core/sdk-core-protos/src/task_token.rs +12 -2
  142. data/bridge/sdk-core/test-utils/Cargo.toml +4 -1
  143. data/bridge/sdk-core/test-utils/src/canned_histories.rs +106 -296
  144. data/bridge/sdk-core/test-utils/src/histfetch.rs +1 -1
  145. data/bridge/sdk-core/test-utils/src/lib.rs +161 -50
  146. data/bridge/sdk-core/test-utils/src/wf_input_saver.rs +50 -0
  147. data/bridge/sdk-core/test-utils/src/workflows.rs +29 -0
  148. data/bridge/sdk-core/tests/fuzzy_workflow.rs +130 -0
  149. data/bridge/sdk-core/tests/{load_tests.rs → heavy_tests.rs} +125 -51
  150. data/bridge/sdk-core/tests/integ_tests/ephemeral_server_tests.rs +25 -3
  151. data/bridge/sdk-core/tests/integ_tests/heartbeat_tests.rs +10 -5
  152. data/bridge/sdk-core/tests/integ_tests/metrics_tests.rs +239 -0
  153. data/bridge/sdk-core/tests/integ_tests/polling_tests.rs +4 -60
  154. data/bridge/sdk-core/tests/integ_tests/queries_tests.rs +5 -128
  155. data/bridge/sdk-core/tests/integ_tests/visibility_tests.rs +83 -25
  156. data/bridge/sdk-core/tests/integ_tests/workflow_tests/activities.rs +93 -69
  157. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_external.rs +1 -0
  158. data/bridge/sdk-core/tests/integ_tests/workflow_tests/cancel_wf.rs +6 -13
  159. data/bridge/sdk-core/tests/integ_tests/workflow_tests/child_workflows.rs +1 -0
  160. data/bridge/sdk-core/tests/integ_tests/workflow_tests/continue_as_new.rs +6 -2
  161. data/bridge/sdk-core/tests/integ_tests/workflow_tests/determinism.rs +3 -10
  162. data/bridge/sdk-core/tests/integ_tests/workflow_tests/local_activities.rs +151 -116
  163. data/bridge/sdk-core/tests/integ_tests/workflow_tests/modify_wf_properties.rs +54 -0
  164. data/bridge/sdk-core/tests/integ_tests/workflow_tests/patches.rs +7 -28
  165. data/bridge/sdk-core/tests/integ_tests/workflow_tests/replay.rs +115 -24
  166. data/bridge/sdk-core/tests/integ_tests/workflow_tests/resets.rs +1 -0
  167. data/bridge/sdk-core/tests/integ_tests/workflow_tests/signals.rs +18 -14
  168. data/bridge/sdk-core/tests/integ_tests/workflow_tests/stickyness.rs +6 -20
  169. data/bridge/sdk-core/tests/integ_tests/workflow_tests/timers.rs +10 -21
  170. data/bridge/sdk-core/tests/integ_tests/workflow_tests/upsert_search_attrs.rs +6 -4
  171. data/bridge/sdk-core/tests/integ_tests/workflow_tests.rs +27 -18
  172. data/bridge/sdk-core/tests/main.rs +8 -16
  173. data/bridge/sdk-core/tests/runner.rs +75 -36
  174. data/bridge/sdk-core/tests/wf_input_replay.rs +32 -0
  175. data/bridge/src/connection.rs +117 -82
  176. data/bridge/src/lib.rs +356 -42
  177. data/bridge/src/runtime.rs +10 -3
  178. data/bridge/src/test_server.rs +153 -0
  179. data/bridge/src/worker.rs +133 -9
  180. data/lib/gen/temporal/api/batch/v1/message_pb.rb +8 -6
  181. data/lib/gen/temporal/api/command/v1/message_pb.rb +10 -16
  182. data/lib/gen/temporal/api/common/v1/message_pb.rb +5 -1
  183. data/lib/gen/temporal/api/enums/v1/batch_operation_pb.rb +2 -1
  184. data/lib/gen/temporal/api/enums/v1/command_type_pb.rb +3 -3
  185. data/lib/gen/temporal/api/enums/v1/common_pb.rb +2 -1
  186. data/lib/gen/temporal/api/enums/v1/event_type_pb.rb +5 -4
  187. data/lib/gen/temporal/api/enums/v1/failed_cause_pb.rb +9 -1
  188. data/lib/gen/temporal/api/enums/v1/namespace_pb.rb +1 -1
  189. data/lib/gen/temporal/api/enums/v1/query_pb.rb +1 -1
  190. data/lib/gen/temporal/api/enums/v1/reset_pb.rb +1 -1
  191. data/lib/gen/temporal/api/enums/v1/schedule_pb.rb +1 -1
  192. data/lib/gen/temporal/api/enums/v1/task_queue_pb.rb +1 -1
  193. data/lib/gen/temporal/api/enums/v1/update_pb.rb +7 -10
  194. data/lib/gen/temporal/api/enums/v1/workflow_pb.rb +1 -1
  195. data/lib/gen/temporal/api/errordetails/v1/message_pb.rb +1 -1
  196. data/lib/gen/temporal/api/failure/v1/message_pb.rb +1 -1
  197. data/lib/gen/temporal/api/filter/v1/message_pb.rb +1 -1
  198. data/lib/gen/temporal/api/history/v1/message_pb.rb +34 -25
  199. data/lib/gen/temporal/api/namespace/v1/message_pb.rb +2 -1
  200. data/lib/gen/temporal/api/operatorservice/v1/request_response_pb.rb +14 -51
  201. data/lib/gen/temporal/api/operatorservice/v1/service_pb.rb +1 -1
  202. data/lib/gen/temporal/api/protocol/v1/message_pb.rb +30 -0
  203. data/lib/gen/temporal/api/query/v1/message_pb.rb +1 -1
  204. data/lib/gen/temporal/api/replication/v1/message_pb.rb +1 -1
  205. data/lib/gen/temporal/api/schedule/v1/message_pb.rb +22 -1
  206. data/lib/gen/temporal/api/sdk/v1/task_complete_metadata_pb.rb +23 -0
  207. data/lib/gen/temporal/api/taskqueue/v1/message_pb.rb +2 -2
  208. data/lib/gen/temporal/api/testservice/v1/request_response_pb.rb +49 -0
  209. data/lib/gen/temporal/api/testservice/v1/service_pb.rb +21 -0
  210. data/lib/gen/temporal/api/update/v1/message_pb.rb +49 -3
  211. data/lib/gen/temporal/api/version/v1/message_pb.rb +1 -1
  212. data/lib/gen/temporal/api/workflow/v1/message_pb.rb +2 -1
  213. data/lib/gen/temporal/api/workflowservice/v1/request_response_pb.rb +47 -20
  214. data/lib/gen/temporal/api/workflowservice/v1/service_pb.rb +1 -1
  215. data/lib/gen/temporal/sdk/core/activity_result/activity_result_pb.rb +13 -9
  216. data/lib/gen/temporal/sdk/core/activity_task/activity_task_pb.rb +10 -6
  217. data/lib/gen/temporal/sdk/core/child_workflow/child_workflow_pb.rb +13 -9
  218. data/lib/gen/temporal/sdk/core/common/common_pb.rb +7 -3
  219. data/lib/gen/temporal/sdk/core/core_interface_pb.rb +9 -3
  220. data/lib/gen/temporal/sdk/core/external_data/external_data_pb.rb +7 -3
  221. data/lib/gen/temporal/sdk/core/workflow_activation/workflow_activation_pb.rb +28 -21
  222. data/lib/gen/temporal/sdk/core/workflow_commands/workflow_commands_pb.rb +32 -24
  223. data/lib/gen/temporal/sdk/core/workflow_completion/workflow_completion_pb.rb +12 -5
  224. data/lib/temporalio/activity/context.rb +102 -0
  225. data/lib/temporalio/activity/info.rb +67 -0
  226. data/lib/temporalio/activity.rb +85 -0
  227. data/lib/temporalio/bridge/connect_options.rb +15 -0
  228. data/lib/temporalio/bridge/error.rb +8 -0
  229. data/lib/temporalio/bridge/retry_config.rb +24 -0
  230. data/lib/temporalio/bridge/tls_options.rb +19 -0
  231. data/lib/temporalio/bridge.rb +14 -0
  232. data/lib/{temporal → temporalio}/client/implementation.rb +57 -56
  233. data/lib/{temporal → temporalio}/client/workflow_handle.rb +35 -35
  234. data/lib/{temporal → temporalio}/client.rb +19 -32
  235. data/lib/temporalio/connection/retry_config.rb +44 -0
  236. data/lib/temporalio/connection/service.rb +20 -0
  237. data/lib/temporalio/connection/test_service.rb +92 -0
  238. data/lib/temporalio/connection/tls_options.rb +51 -0
  239. data/lib/temporalio/connection/workflow_service.rb +731 -0
  240. data/lib/temporalio/connection.rb +86 -0
  241. data/lib/{temporal → temporalio}/data_converter.rb +76 -35
  242. data/lib/{temporal → temporalio}/error/failure.rb +6 -6
  243. data/lib/{temporal → temporalio}/error/workflow_failure.rb +4 -2
  244. data/lib/{temporal → temporalio}/errors.rb +19 -1
  245. data/lib/{temporal → temporalio}/failure_converter/base.rb +5 -5
  246. data/lib/{temporal → temporalio}/failure_converter/basic.rb +58 -52
  247. data/lib/temporalio/failure_converter.rb +7 -0
  248. data/lib/temporalio/interceptor/activity_inbound.rb +22 -0
  249. data/lib/temporalio/interceptor/activity_outbound.rb +24 -0
  250. data/lib/{temporal → temporalio}/interceptor/chain.rb +7 -6
  251. data/lib/{temporal → temporalio}/interceptor/client.rb +27 -2
  252. data/lib/temporalio/interceptor.rb +22 -0
  253. data/lib/{temporal → temporalio}/payload_codec/base.rb +5 -5
  254. data/lib/{temporal → temporalio}/payload_converter/base.rb +3 -3
  255. data/lib/{temporal → temporalio}/payload_converter/bytes.rb +4 -3
  256. data/lib/{temporal → temporalio}/payload_converter/composite.rb +7 -5
  257. data/lib/{temporal → temporalio}/payload_converter/encoding_base.rb +4 -4
  258. data/lib/{temporal → temporalio}/payload_converter/json.rb +4 -3
  259. data/lib/{temporal → temporalio}/payload_converter/nil.rb +4 -3
  260. data/lib/temporalio/payload_converter.rb +14 -0
  261. data/lib/{temporal → temporalio}/retry_policy.rb +17 -7
  262. data/lib/{temporal → temporalio}/retry_state.rb +1 -1
  263. data/lib/temporalio/runtime.rb +25 -0
  264. data/lib/temporalio/testing/time_skipping_handle.rb +32 -0
  265. data/lib/temporalio/testing/time_skipping_interceptor.rb +23 -0
  266. data/lib/temporalio/testing/workflow_environment.rb +112 -0
  267. data/lib/temporalio/testing.rb +175 -0
  268. data/lib/{temporal → temporalio}/timeout_type.rb +2 -2
  269. data/lib/temporalio/version.rb +3 -0
  270. data/lib/temporalio/worker/activity_runner.rb +114 -0
  271. data/lib/temporalio/worker/activity_worker.rb +164 -0
  272. data/lib/temporalio/worker/reactor.rb +46 -0
  273. data/lib/temporalio/worker/runner.rb +63 -0
  274. data/lib/temporalio/worker/sync_worker.rb +124 -0
  275. data/lib/temporalio/worker/thread_pool_executor.rb +51 -0
  276. data/lib/temporalio/worker.rb +204 -0
  277. data/lib/temporalio/workflow/async.rb +46 -0
  278. data/lib/{temporal → temporalio}/workflow/execution_info.rb +4 -4
  279. data/lib/{temporal → temporalio}/workflow/execution_status.rb +1 -1
  280. data/lib/temporalio/workflow/future.rb +138 -0
  281. data/lib/{temporal → temporalio}/workflow/id_reuse_policy.rb +6 -6
  282. data/lib/temporalio/workflow/info.rb +76 -0
  283. data/lib/{temporal → temporalio}/workflow/query_reject_condition.rb +5 -5
  284. data/lib/temporalio.rb +12 -3
  285. data/temporalio.gemspec +11 -6
  286. metadata +137 -64
  287. data/bridge/sdk-core/Cargo.lock +0 -2606
  288. data/bridge/sdk-core/bridge-ffi/Cargo.toml +0 -24
  289. data/bridge/sdk-core/bridge-ffi/LICENSE.txt +0 -23
  290. data/bridge/sdk-core/bridge-ffi/build.rs +0 -25
  291. data/bridge/sdk-core/bridge-ffi/include/sdk-core-bridge.h +0 -249
  292. data/bridge/sdk-core/bridge-ffi/src/lib.rs +0 -825
  293. data/bridge/sdk-core/bridge-ffi/src/wrappers.rs +0 -211
  294. data/bridge/sdk-core/core/src/log_export.rs +0 -62
  295. data/bridge/sdk-core/core/src/worker/workflow/machines/mutable_side_effect_state_machine.rs +0 -127
  296. data/bridge/sdk-core/core/src/worker/workflow/machines/side_effect_state_machine.rs +0 -71
  297. data/bridge/sdk-core/protos/api_upstream/temporal/api/cluster/v1/message.proto +0 -83
  298. data/bridge/sdk-core/protos/local/temporal/sdk/core/bridge/bridge.proto +0 -210
  299. data/bridge/sdk-core/sdk/src/conversions.rs +0 -8
  300. data/lib/bridge.so +0 -0
  301. data/lib/gen/temporal/api/cluster/v1/message_pb.rb +0 -67
  302. data/lib/gen/temporal/api/enums/v1/cluster_pb.rb +0 -26
  303. data/lib/gen/temporal/sdk/core/bridge/bridge_pb.rb +0 -222
  304. data/lib/temporal/bridge.rb +0 -14
  305. data/lib/temporal/connection.rb +0 -736
  306. data/lib/temporal/failure_converter.rb +0 -8
  307. data/lib/temporal/payload_converter.rb +0 -14
  308. data/lib/temporal/runtime.rb +0 -22
  309. data/lib/temporal/version.rb +0 -3
  310. data/lib/temporal.rb +0 -8
@@ -1,13 +1,18 @@
1
1
  use crate::{
2
- abstractions::{MeteredSemaphore, OwnedMeteredSemPermit},
2
+ abstractions::{dbg_panic, MeteredSemaphore, OwnedMeteredSemPermit, UsedMeteredSemPermit},
3
3
  protosext::ValidScheduleLA,
4
4
  retry_logic::RetryPolicyExt,
5
+ worker::workflow::HeartbeatTimeoutMsg,
5
6
  MetricsContext, TaskToken,
6
7
  };
7
- use parking_lot::Mutex;
8
+ use futures::{stream::BoxStream, Stream};
9
+ use futures_util::{future, future::AbortRegistration, stream, StreamExt};
10
+ use parking_lot::{Mutex, MutexGuard};
8
11
  use std::{
9
- collections::HashMap,
12
+ collections::{hash_map::Entry, HashMap},
10
13
  fmt::{Debug, Formatter},
14
+ pin::Pin,
15
+ task::{Context, Poll},
11
16
  time::{Duration, Instant, SystemTime},
12
17
  };
13
18
  use temporal_sdk_core_protos::{
@@ -25,6 +30,7 @@ use tokio::{
25
30
  task::JoinHandle,
26
31
  time::sleep,
27
32
  };
33
+ use tokio_stream::wrappers::UnboundedReceiverStream;
28
34
  use tokio_util::sync::CancellationToken;
29
35
 
30
36
  #[allow(clippy::large_enum_variant)] // Timeouts are relatively rare
@@ -45,10 +51,14 @@ pub(crate) struct LocalInFlightActInfo {
45
51
  pub la_info: NewLocalAct,
46
52
  pub dispatch_time: Instant,
47
53
  pub attempt: u32,
48
- _permit: OwnedMeteredSemPermit,
54
+ _permit: UsedMeteredSemPermit,
49
55
  }
50
56
 
51
57
  #[derive(Debug, Clone)]
58
+ #[cfg_attr(
59
+ feature = "save_wf_inputs",
60
+ derive(serde::Serialize, serde::Deserialize)
61
+ )]
52
62
  pub(crate) enum LocalActivityExecutionResult {
53
63
  Completed(Success),
54
64
  Failed(ActFail),
@@ -65,6 +75,10 @@ impl LocalActivityExecutionResult {
65
75
  }
66
76
 
67
77
  #[derive(Debug, Clone)]
78
+ #[cfg_attr(
79
+ feature = "save_wf_inputs",
80
+ derive(serde::Serialize, serde::Deserialize)
81
+ )]
68
82
  pub(crate) struct LocalActivityResolution {
69
83
  pub seq: u32,
70
84
  pub result: LocalActivityExecutionResult,
@@ -96,6 +110,17 @@ impl Debug for NewLocalAct {
96
110
  pub(crate) enum LocalActRequest {
97
111
  New(NewLocalAct),
98
112
  Cancel(ExecutingLAId),
113
+ #[from(ignore)]
114
+ CancelAllInRun(String),
115
+ StartHeartbeatTimeout {
116
+ send_on_elapse: HeartbeatTimeoutMsg,
117
+ deadline: Instant,
118
+ abort_reg: AbortRegistration,
119
+ },
120
+ /// Tell the LA manager that a workflow task was responded to (completed or failed) for a
121
+ /// certain run id
122
+ #[from(ignore)]
123
+ IndicateWorkflowTaskCompleted(String),
99
124
  }
100
125
 
101
126
  #[derive(Debug, Clone, Eq, PartialEq, Hash)]
@@ -107,28 +132,43 @@ pub(crate) struct ExecutingLAId {
107
132
  pub(crate) struct LocalActivityManager {
108
133
  /// Just so we can provide activity tasks the same namespace as the worker
109
134
  namespace: String,
110
- /// Constrains number of currently executing local activities
111
- semaphore: MeteredSemaphore,
112
135
  /// Sink for new activity execution requests
113
136
  act_req_tx: UnboundedSender<NewOrRetry>,
114
137
  /// Cancels need a different queue since they should be taken first, and don't take a permit
115
138
  cancels_req_tx: UnboundedSender<CancelOrTimeout>,
139
+ /// For the emission of heartbeat timeouts, back into the workflow machines. This channel
140
+ /// needs to come in from above us, because we cannot rely on callers getting the next
141
+ /// activation as a way to deliver heartbeats.
142
+ heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
116
143
  /// Wakes every time a complete is processed
117
144
  complete_notify: Notify,
145
+ /// Set once workflows have finished shutting down, and thus we know we will no longer receive
146
+ /// any requests to spawn new LAs
147
+ workflows_have_shut_down: CancellationToken,
118
148
 
119
149
  rcvs: tokio::sync::Mutex<RcvChans>,
120
150
  shutdown_complete_tok: CancellationToken,
121
151
  dat: Mutex<LAMData>,
122
152
  }
123
153
 
154
+ struct LocalActivityInfo {
155
+ task_token: TaskToken,
156
+ /// Tasks for the current backoff until the next retry, if any.
157
+ backing_off_task: Option<JoinHandle<()>>,
158
+ /// Tasks / info about timeouts associated with this LA. May be empty for very brief periods
159
+ /// while the LA id has been generated, but it has not yet been scheduled.
160
+ timeout_bag: Option<TimeoutBag>,
161
+ /// True once the first workflow task this LA started in has elapsed
162
+ first_wft_has_ended: bool,
163
+ /// Attempts at executing this LA during the current WFT
164
+ attempts_in_wft: usize,
165
+ }
166
+
124
167
  struct LAMData {
168
+ /// Maps local activity identifiers to information about them
169
+ la_info: HashMap<ExecutingLAId, LocalActivityInfo>,
125
170
  /// Activities that have been issued to lang but not yet completed
126
171
  outstanding_activity_tasks: HashMap<TaskToken, LocalInFlightActInfo>,
127
- id_to_tt: HashMap<ExecutingLAId, TaskToken>,
128
- /// Tasks for activities which are currently backing off. May be used to cancel retrying them.
129
- backing_off_tasks: HashMap<ExecutingLAId, JoinHandle<()>>,
130
- /// Tasks for timing out activities which are currently in the queue or dispatched.
131
- timeout_tasks: HashMap<ExecutingLAId, TimeoutBag>,
132
172
  next_tt_num: u32,
133
173
  }
134
174
 
@@ -143,43 +183,47 @@ impl LocalActivityManager {
143
183
  pub(crate) fn new(
144
184
  max_concurrent: usize,
145
185
  namespace: String,
186
+ heartbeat_timeout_tx: UnboundedSender<HeartbeatTimeoutMsg>,
146
187
  metrics_context: MetricsContext,
147
188
  ) -> Self {
148
189
  let (act_req_tx, act_req_rx) = unbounded_channel();
149
190
  let (cancels_req_tx, cancels_req_rx) = unbounded_channel();
150
191
  let shutdown_complete_tok = CancellationToken::new();
192
+ let semaphore = MeteredSemaphore::new(
193
+ max_concurrent,
194
+ metrics_context,
195
+ MetricsContext::available_task_slots,
196
+ );
151
197
  Self {
152
198
  namespace,
153
- semaphore: MeteredSemaphore::new(
154
- max_concurrent,
155
- metrics_context,
156
- MetricsContext::available_task_slots,
157
- ),
199
+ rcvs: tokio::sync::Mutex::new(RcvChans::new(
200
+ act_req_rx,
201
+ semaphore,
202
+ cancels_req_rx,
203
+ shutdown_complete_tok.clone(),
204
+ )),
158
205
  act_req_tx,
159
206
  cancels_req_tx,
207
+ heartbeat_timeout_tx,
160
208
  complete_notify: Notify::new(),
161
- rcvs: tokio::sync::Mutex::new(RcvChans {
162
- act_req_rx,
163
- cancels_req_rx,
164
- shutdown: shutdown_complete_tok.clone(),
165
- }),
166
209
  shutdown_complete_tok,
167
210
  dat: Mutex::new(LAMData {
168
211
  outstanding_activity_tasks: Default::default(),
169
- id_to_tt: Default::default(),
170
- backing_off_tasks: Default::default(),
171
- timeout_tasks: Default::default(),
212
+ la_info: Default::default(),
172
213
  next_tt_num: 0,
173
214
  }),
215
+ workflows_have_shut_down: Default::default(),
174
216
  }
175
217
  }
176
218
 
177
219
  #[cfg(test)]
178
220
  fn test(max_concurrent: usize) -> Self {
221
+ let (hb_tx, _hb_rx) = unbounded_channel();
179
222
  Self::new(
180
223
  max_concurrent,
181
224
  "fake_ns".to_string(),
182
- MetricsContext::default(),
225
+ hb_tx,
226
+ MetricsContext::no_op(),
183
227
  )
184
228
  }
185
229
 
@@ -190,76 +234,116 @@ impl LocalActivityManager {
190
234
 
191
235
  #[cfg(test)]
192
236
  fn num_in_backoff(&self) -> usize {
193
- self.dat.lock().backing_off_tasks.len()
237
+ self.dat
238
+ .lock()
239
+ .la_info
240
+ .values()
241
+ .filter(|lai| lai.backing_off_task.is_some())
242
+ .count()
194
243
  }
195
244
 
196
245
  pub(crate) fn enqueue(
197
246
  &self,
198
247
  reqs: impl IntoIterator<Item = LocalActRequest>,
199
248
  ) -> Vec<LocalActivityResolution> {
249
+ if self.workflows_have_shut_down.is_cancelled() {
250
+ dbg_panic!("Tried to enqueue local activity after workflows were shut down");
251
+ return vec![];
252
+ }
200
253
  let mut immediate_resolutions = vec![];
201
254
  for req in reqs {
202
- debug!(local_activity = ?req, "Queuing local activity");
203
255
  match req {
204
256
  LocalActRequest::New(act) => {
257
+ debug!(local_activity=?act, "Queuing local activity");
205
258
  let id = ExecutingLAId {
206
259
  run_id: act.workflow_exec_info.run_id.clone(),
207
260
  seq_num: act.schedule_cmd.seq,
208
261
  };
209
262
  let mut dlock = self.dat.lock();
210
- if dlock.id_to_tt.contains_key(&id) {
211
- // Do not queue local activities which are in fact already executing.
212
- // This can happen during evictions.
213
- debug!("Tried to queue already-executing local activity {:?}", &id);
214
- continue;
215
- }
216
- // Pre-generate and insert the task token now, before we may or may not dispatch
217
- // the activity, so we can enforce idempotency. Prevents two identical LAs
218
- // ending up in the queue at once.
219
263
  let tt = dlock.gen_next_token();
220
- dlock.id_to_tt.insert(id.clone(), tt);
221
-
222
- // Set up timeouts for the new activity
223
- match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
224
- Ok(tb) => {
225
- dlock.timeout_tasks.insert(id, tb);
226
-
227
- self.act_req_tx
228
- .send(NewOrRetry::New(act))
229
- .expect("Receive half of LA request channel cannot be dropped");
264
+ match dlock.la_info.entry(id) {
265
+ Entry::Occupied(o) => {
266
+ // Do not queue local activities which are in fact already executing.
267
+ // This can happen during evictions.
268
+ debug!(
269
+ "Tried to queue already-executing local activity {:?}",
270
+ o.key()
271
+ );
272
+ continue;
273
+ }
274
+ Entry::Vacant(ve) => {
275
+ // Insert the task token now, before we may or may not dispatch the
276
+ // activity, so we can enforce idempotency. Prevents two identical LAs
277
+ // ending up in the queue at once.
278
+ let lai = ve.insert(LocalActivityInfo {
279
+ task_token: tt,
280
+ backing_off_task: None,
281
+ timeout_bag: None,
282
+ first_wft_has_ended: false,
283
+ attempts_in_wft: 0,
284
+ });
285
+
286
+ // Set up timeouts for the new activity
287
+ match TimeoutBag::new(&act, self.cancels_req_tx.clone()) {
288
+ Ok(tb) => {
289
+ lai.timeout_bag = Some(tb);
290
+
291
+ self.act_req_tx.send(NewOrRetry::New(act)).expect(
292
+ "Receive half of LA request channel cannot be dropped",
293
+ );
294
+ }
295
+ Err(res) => immediate_resolutions.push(res),
296
+ }
230
297
  }
231
- Err(res) => immediate_resolutions.push(res),
232
298
  }
233
299
  }
300
+ LocalActRequest::StartHeartbeatTimeout {
301
+ send_on_elapse,
302
+ deadline,
303
+ abort_reg,
304
+ } => {
305
+ let chan = self.heartbeat_timeout_tx.clone();
306
+ tokio::spawn(future::Abortable::new(
307
+ async move {
308
+ tokio::time::sleep_until(deadline.into()).await;
309
+ let _ = chan.send(send_on_elapse);
310
+ },
311
+ abort_reg,
312
+ ));
313
+ }
234
314
  LocalActRequest::Cancel(id) => {
315
+ debug!(id=?id, "Cancelling local activity");
235
316
  let mut dlock = self.dat.lock();
236
-
237
- // First check if this ID is currently backing off, if so abort the backoff
238
- // task
239
- if let Some(t) = dlock.backing_off_tasks.remove(&id) {
240
- t.abort();
241
- immediate_resolutions.push(LocalActivityResolution {
242
- seq: id.seq_num,
243
- result: LocalActivityExecutionResult::Cancelled(
244
- Cancellation::from_details(None),
245
- ),
246
- runtime: Duration::from_secs(0),
247
- attempt: 0,
248
- backoff: None,
249
- original_schedule_time: None,
250
- });
251
- continue;
317
+ if let Some(lai) = dlock.la_info.get_mut(&id) {
318
+ if let Some(immediate_res) = self.cancel_one_la(id.seq_num, lai) {
319
+ immediate_resolutions.push(immediate_res);
320
+ }
252
321
  }
253
-
254
- if let Some(tt) = dlock.id_to_tt.get(&id) {
255
- self.cancels_req_tx
256
- .send(CancelOrTimeout::Cancel(ActivityTask {
257
- task_token: tt.0.clone(),
258
- variant: Some(activity_task::Variant::Cancel(Cancel {
259
- reason: ActivityCancelReason::Cancelled as i32,
260
- })),
261
- }))
262
- .expect("Receive half of LA cancel channel cannot be dropped");
322
+ }
323
+ LocalActRequest::CancelAllInRun(run_id) => {
324
+ debug!(run_id=%run_id, "Cancelling all local activities for run");
325
+ let mut dlock = self.dat.lock();
326
+ // Even if we've got 100k+ LAs this should only take a ms or two. Not worth
327
+ // adding another map to keep in sync.
328
+ let las_for_run = dlock
329
+ .la_info
330
+ .iter_mut()
331
+ .filter(|(id, _)| id.run_id == run_id);
332
+ for (laid, lainf) in las_for_run {
333
+ if let Some(immediate_res) = self.cancel_one_la(laid.seq_num, lainf) {
334
+ immediate_resolutions.push(immediate_res);
335
+ }
336
+ }
337
+ }
338
+ LocalActRequest::IndicateWorkflowTaskCompleted(run_id) => {
339
+ let mut dlock = self.dat.lock();
340
+ let las_for_run = dlock
341
+ .la_info
342
+ .iter_mut()
343
+ .filter(|(id, _)| id.run_id == run_id);
344
+ for (_, lainf) in las_for_run {
345
+ lainf.first_wft_has_ended = true;
346
+ lainf.attempts_in_wft = 0;
263
347
  }
264
348
  }
265
349
  }
@@ -270,7 +354,7 @@ impl LocalActivityManager {
270
354
  /// Returns the next pending local-activity related action, or None if shutdown has initiated
271
355
  /// and there are no more remaining actions to take.
272
356
  pub(crate) async fn next_pending(&self) -> Option<DispatchOrTimeoutLA> {
273
- let (new_or_retry, permit) = match self.rcvs.lock().await.next(&self.semaphore).await? {
357
+ let (new_or_retry, permit) = match self.rcvs.lock().await.next().await? {
274
358
  NewOrCancel::Cancel(c) => {
275
359
  return match c {
276
360
  CancelOrTimeout::Cancel(c) => Some(DispatchOrTimeoutLA::Dispatch(c)),
@@ -283,12 +367,13 @@ impl LocalActivityManager {
283
367
  let tt = self
284
368
  .dat
285
369
  .lock()
286
- .id_to_tt
370
+ .la_info
287
371
  .get(&ExecutingLAId {
288
372
  run_id: run_id.clone(),
289
373
  seq_num: resolution.seq,
290
374
  })
291
- .map(Clone::clone);
375
+ .as_ref()
376
+ .map(|lai| lai.task_token.clone());
292
377
  if let Some(task_token) = tt {
293
378
  self.complete(&task_token, &resolution.result);
294
379
  Some(ActivityTask {
@@ -323,18 +408,21 @@ impl LocalActivityManager {
323
408
  }
324
409
  NewOrRetry::Retry { in_flight, attempt } => (in_flight, attempt),
325
410
  };
326
- let orig = new_la.clone();
411
+ let la_info_for_in_flight_map = new_la.clone();
327
412
  let id = ExecutingLAId {
328
413
  run_id: new_la.workflow_exec_info.run_id.clone(),
329
414
  seq_num: new_la.schedule_cmd.seq,
330
415
  };
416
+ let orig_sched_time = new_la.schedule_cmd.original_schedule_time;
331
417
  let sa = new_la.schedule_cmd;
332
418
 
333
419
  let mut dat = self.dat.lock();
334
420
  // If this request originated from a local backoff task, clear the entry for it. We
335
421
  // don't await the handle because we know it must already be done, and there's no
336
422
  // meaningful value.
337
- dat.backing_off_tasks.remove(&id);
423
+ dat.la_info
424
+ .get_mut(&id)
425
+ .map(|lai| lai.backing_off_task.take());
338
426
 
339
427
  // If this task sat in the queue for too long, return a timeout for it instead
340
428
  if let Some(s2s) = sa.schedule_to_start_timeout.as_ref() {
@@ -348,30 +436,27 @@ impl LocalActivityManager {
348
436
  runtime: sat_for,
349
437
  attempt,
350
438
  backoff: None,
351
- original_schedule_time: Some(new_la.schedule_time),
439
+ original_schedule_time: orig_sched_time,
352
440
  },
353
441
  task: None,
354
442
  });
355
443
  }
356
444
  }
357
445
 
358
- let tt = dat
359
- .id_to_tt
360
- .get(&id)
361
- .expect("Task token must exist")
362
- .clone();
446
+ let la_info = dat.la_info.get_mut(&id).expect("Activity must exist");
447
+ let tt = la_info.task_token.clone();
448
+ if let Some(to) = la_info.timeout_bag.as_mut() {
449
+ to.mark_started();
450
+ }
363
451
  dat.outstanding_activity_tasks.insert(
364
452
  tt.clone(),
365
453
  LocalInFlightActInfo {
366
- la_info: orig,
454
+ la_info: la_info_for_in_flight_map,
367
455
  dispatch_time: Instant::now(),
368
456
  attempt,
369
- _permit: permit,
457
+ _permit: permit.into_used(),
370
458
  },
371
459
  );
372
- if let Some(to) = dat.timeout_tasks.get_mut(&id) {
373
- to.mark_started();
374
- }
375
460
 
376
461
  let (schedule_to_close, start_to_close) = sa.close_timeouts.into_sched_and_start();
377
462
  Some(DispatchOrTimeoutLA::Dispatch(ActivityTask {
@@ -406,11 +491,23 @@ impl LocalActivityManager {
406
491
  ) -> LACompleteAction {
407
492
  let mut dlock = self.dat.lock();
408
493
  if let Some(info) = dlock.outstanding_activity_tasks.remove(task_token) {
494
+ if self.workflows_have_shut_down.is_cancelled() {
495
+ // If workflows are already shut down, the results of all this don't matter.
496
+ // Just say we're done if there's nothing outstanding any more.
497
+ self.set_shutdown_complete_if_ready(&mut dlock);
498
+ }
499
+
409
500
  let exec_id = ExecutingLAId {
410
501
  run_id: info.la_info.workflow_exec_info.run_id.clone(),
411
502
  seq_num: info.la_info.schedule_cmd.seq,
412
503
  };
413
- dlock.id_to_tt.remove(&exec_id);
504
+ let maybe_old_lai = dlock.la_info.remove(&exec_id);
505
+ if let Some(ref oldlai) = maybe_old_lai {
506
+ if let Some(ref bot) = oldlai.backing_off_task {
507
+ dbg_panic!("Just-resolved LA should not have backoff task");
508
+ bot.abort();
509
+ }
510
+ }
414
511
 
415
512
  match status {
416
513
  LocalActivityExecutionResult::Completed(_)
@@ -446,8 +543,6 @@ impl LocalActivityManager {
446
543
  }
447
544
  // Immediately create a new task token for the to-be-retried LA
448
545
  let tt = dlock.gen_next_token();
449
- dlock.id_to_tt.insert(exec_id.clone(), tt);
450
-
451
546
  // Send the retry request after waiting the backoff duration
452
547
  let send_chan = self.act_req_tx.clone();
453
548
  let jh = tokio::spawn(async move {
@@ -460,7 +555,22 @@ impl LocalActivityManager {
460
555
  })
461
556
  .expect("Receive half of LA request channel cannot be dropped");
462
557
  });
463
- dlock.backing_off_tasks.insert(exec_id, jh);
558
+ dlock.la_info.insert(
559
+ exec_id,
560
+ LocalActivityInfo {
561
+ task_token: tt,
562
+ backing_off_task: Some(jh),
563
+ first_wft_has_ended: maybe_old_lai
564
+ .as_ref()
565
+ .map(|old| old.first_wft_has_ended)
566
+ .unwrap_or_default(),
567
+ attempts_in_wft: maybe_old_lai
568
+ .as_ref()
569
+ .map(|old| old.attempts_in_wft + 1)
570
+ .unwrap_or(1),
571
+ timeout_bag: maybe_old_lai.and_then(|old| old.timeout_bag),
572
+ },
573
+ );
464
574
 
465
575
  LACompleteAction::WillBeRetried
466
576
  } else {
@@ -473,11 +583,70 @@ impl LocalActivityManager {
473
583
  }
474
584
  }
475
585
 
476
- pub(crate) async fn shutdown_and_wait_all_finished(&self) {
477
- while !self.dat.lock().outstanding_activity_tasks.is_empty() {
586
+ pub(crate) fn workflows_have_shutdown(&self) {
587
+ self.workflows_have_shut_down.cancel();
588
+ self.set_shutdown_complete_if_ready(&mut self.dat.lock());
589
+ }
590
+
591
+ pub(crate) async fn wait_all_outstanding_tasks_finished(&self) {
592
+ while !self.set_shutdown_complete_if_ready(&mut self.dat.lock()) {
478
593
  self.complete_notify.notified().await;
479
594
  }
480
- self.shutdown_complete_tok.cancel();
595
+ }
596
+
597
+ /// Try to close the activity stream as soon as worker shutdown is initiated.
598
+ /// This is required for activity-only workers where since workflows are not polled and the activity poller might
599
+ /// get "stuck".
600
+ pub(crate) fn shutdown_initiated(&self) {
601
+ self.set_shutdown_complete_if_ready(&mut self.dat.lock());
602
+ }
603
+
604
+ pub(crate) fn get_nonfirst_attempt_count(&self, for_run_id: &str) -> usize {
605
+ let dlock = self.dat.lock();
606
+ dlock
607
+ .la_info
608
+ .iter()
609
+ .filter(|(id, info)| id.run_id == for_run_id && info.first_wft_has_ended)
610
+ .map(|(_, info)| info.attempts_in_wft)
611
+ .sum()
612
+ }
613
+
614
+ fn set_shutdown_complete_if_ready(&self, dlock: &mut MutexGuard<LAMData>) -> bool {
615
+ let nothing_outstanding = dlock.outstanding_activity_tasks.is_empty();
616
+ if nothing_outstanding {
617
+ self.shutdown_complete_tok.cancel();
618
+ }
619
+ nothing_outstanding
620
+ }
621
+
622
+ fn cancel_one_la(
623
+ &self,
624
+ seq: u32,
625
+ lai: &mut LocalActivityInfo,
626
+ ) -> Option<LocalActivityResolution> {
627
+ // First check if this ID is currently backing off, if so abort the backoff
628
+ // task
629
+ if let Some(t) = lai.backing_off_task.take() {
630
+ t.abort();
631
+ return Some(LocalActivityResolution {
632
+ seq,
633
+ result: LocalActivityExecutionResult::Cancelled(Cancellation::from_details(None)),
634
+ runtime: Duration::from_secs(0),
635
+ attempt: 0,
636
+ backoff: None,
637
+ original_schedule_time: None,
638
+ });
639
+ }
640
+
641
+ self.cancels_req_tx
642
+ .send(CancelOrTimeout::Cancel(ActivityTask {
643
+ task_token: lai.task_token.0.clone(),
644
+ variant: Some(activity_task::Variant::Cancel(Cancel {
645
+ reason: ActivityCancelReason::Cancelled as i32,
646
+ })),
647
+ }))
648
+ .expect("Receive half of LA cancel channel cannot be dropped");
649
+ None
481
650
  }
482
651
  }
483
652
 
@@ -515,37 +684,51 @@ enum CancelOrTimeout {
515
684
  },
516
685
  }
517
686
 
687
+ #[allow(clippy::large_enum_variant)]
518
688
  enum NewOrCancel {
519
689
  New(NewOrRetry, OwnedMeteredSemPermit),
520
690
  Cancel(CancelOrTimeout),
521
691
  }
522
692
 
693
+ #[pin_project::pin_project]
523
694
  struct RcvChans {
524
- /// Activities that need to be executed by lang
525
- act_req_rx: UnboundedReceiver<NewOrRetry>,
526
- /// Cancels to send to lang or apply internally
527
- cancels_req_rx: UnboundedReceiver<CancelOrTimeout>,
528
- shutdown: CancellationToken,
695
+ #[pin]
696
+ inner: BoxStream<'static, NewOrCancel>,
529
697
  }
530
698
 
531
699
  impl RcvChans {
532
- async fn next(&mut self, new_sem: &MeteredSemaphore) -> Option<NewOrCancel> {
533
- tokio::select! {
534
- cancel = async { self.cancels_req_rx.recv().await } => {
535
- Some(NewOrCancel::Cancel(cancel.expect("Send halves of LA manager are not dropped")))
536
- }
537
- (maybe_new_or_retry, perm) = async {
538
- // Wait for a permit to take a task and forget it. Permits are removed until a
539
- // completion.
540
- let perm = new_sem.acquire_owned().await.expect("is never closed");
541
- (self.act_req_rx.recv().await, perm)
542
- } => Some(NewOrCancel::New(
543
- maybe_new_or_retry.expect("Send halves of LA manager are not dropped"), perm
544
- )),
545
- _ = self.shutdown.cancelled() => None
700
+ fn new(
701
+ new_reqs: UnboundedReceiver<NewOrRetry>,
702
+ new_sem: MeteredSemaphore,
703
+ cancels: UnboundedReceiver<CancelOrTimeout>,
704
+ shutdown_completed: CancellationToken,
705
+ ) -> Self {
706
+ let cancel_stream = UnboundedReceiverStream::new(cancels).map(NewOrCancel::Cancel);
707
+ let new_stream = UnboundedReceiverStream::new(new_reqs)
708
+ // Get a permit for each new activity request
709
+ .zip(stream::unfold(new_sem, |new_sem| async move {
710
+ let permit = new_sem
711
+ .acquire_owned()
712
+ .await
713
+ .expect("Local activity semaphore is never closed");
714
+ Some((permit, new_sem))
715
+ }))
716
+ .map(|(req, permit)| NewOrCancel::New(req, permit));
717
+ Self {
718
+ inner: tokio_stream::StreamExt::merge(cancel_stream, new_stream)
719
+ .take_until(async move { shutdown_completed.cancelled().await })
720
+ .boxed(),
546
721
  }
547
722
  }
548
723
  }
724
+ impl Stream for RcvChans {
725
+ type Item = NewOrCancel;
726
+
727
+ fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
728
+ let this = self.project();
729
+ this.inner.poll_next(cx)
730
+ }
731
+ }
549
732
 
550
733
  struct TimeoutBag {
551
734
  sched_to_close_handle: JoinHandle<()>,
@@ -566,17 +749,21 @@ impl TimeoutBag {
566
749
  let (schedule_to_close, start_to_close) =
567
750
  new_la.schedule_cmd.close_timeouts.into_sched_and_start();
568
751
 
752
+ let sched_time = new_la
753
+ .schedule_cmd
754
+ .original_schedule_time
755
+ .unwrap_or(new_la.schedule_time);
569
756
  let resolution = LocalActivityResolution {
570
757
  seq: new_la.schedule_cmd.seq,
571
758
  result: LocalActivityExecutionResult::timeout(TimeoutType::ScheduleToClose),
572
759
  runtime: Default::default(),
573
760
  attempt: new_la.schedule_cmd.attempt,
574
761
  backoff: None,
575
- original_schedule_time: Some(new_la.schedule_time),
762
+ original_schedule_time: new_la.schedule_cmd.original_schedule_time,
576
763
  };
577
764
  // Remove any time already elapsed since the scheduling time
578
765
  let schedule_to_close = schedule_to_close
579
- .map(|s2c| s2c.saturating_sub(new_la.schedule_time.elapsed().unwrap_or_default()));
766
+ .map(|s2c| s2c.saturating_sub(sched_time.elapsed().unwrap_or_default()));
580
767
  if let Some(ref s2c) = schedule_to_close {
581
768
  if s2c.is_zero() {
582
769
  return Err(resolution);
@@ -639,18 +826,19 @@ impl Drop for TimeoutBag {
639
826
  mod tests {
640
827
  use super::*;
641
828
  use crate::{prost_dur, protosext::LACloseTimeouts};
829
+ use futures_util::FutureExt;
642
830
  use temporal_sdk_core_protos::temporal::api::{
643
831
  common::v1::RetryPolicy,
644
832
  failure::v1::{failure::FailureInfo, ApplicationFailureInfo, Failure},
645
833
  };
646
- use tokio::{sync::mpsc::error::TryRecvError, task::yield_now};
834
+ use tokio::task::yield_now;
647
835
 
648
836
  impl DispatchOrTimeoutLA {
649
837
  fn unwrap(self) -> ActivityTask {
650
838
  match self {
651
839
  DispatchOrTimeoutLA::Dispatch(t) => t,
652
- DispatchOrTimeoutLA::Timeout { .. } => {
653
- panic!("Timeout returned when expected a task")
840
+ _ => {
841
+ panic!("Non-dispatched action returned")
654
842
  }
655
843
  }
656
844
  }
@@ -1025,18 +1213,66 @@ mod tests {
1025
1213
  lam.next_pending().await.unwrap().unwrap();
1026
1214
  assert_eq!(lam.num_outstanding(), 1);
1027
1215
  // There should be nothing else in the queue
1028
- assert_eq!(
1029
- lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
1030
- TryRecvError::Empty
1031
- );
1216
+ assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
1032
1217
 
1033
1218
  // Verify that if we now enqueue the same act again, after the task is outstanding, we still
1034
1219
  // don't add it.
1035
1220
  lam.enqueue([new_la.into()]);
1036
1221
  assert_eq!(lam.num_outstanding(), 1);
1037
- assert_eq!(
1038
- lam.rcvs.lock().await.act_req_rx.try_recv().unwrap_err(),
1039
- TryRecvError::Empty
1040
- );
1222
+ assert!(lam.rcvs.lock().await.next().now_or_never().is_none());
1223
+ }
1224
+
1225
+ #[tokio::test]
1226
+ async fn nonfirst_la_attempt_count_is_accurate() {
1227
+ let run_id = "run_id";
1228
+ let lam = LocalActivityManager::test(10);
1229
+ let new_la = NewLocalAct {
1230
+ schedule_cmd: ValidScheduleLA {
1231
+ seq: 1,
1232
+ activity_id: 1.to_string(),
1233
+ retry_policy: RetryPolicy {
1234
+ initial_interval: Some(prost_dur!(from_millis(1))),
1235
+ backoff_coefficient: 1.0,
1236
+ ..Default::default()
1237
+ },
1238
+ local_retry_threshold: Duration::from_secs(500),
1239
+ ..Default::default()
1240
+ },
1241
+ workflow_type: "".to_string(),
1242
+ workflow_exec_info: WorkflowExecution {
1243
+ workflow_id: "".to_string(),
1244
+ run_id: run_id.to_string(),
1245
+ },
1246
+ schedule_time: SystemTime::now(),
1247
+ };
1248
+ lam.enqueue([new_la.clone().into()]);
1249
+ let spinfail = || async {
1250
+ for _ in 1..=10 {
1251
+ let next = lam.next_pending().await.unwrap().unwrap();
1252
+ let tt = TaskToken(next.task_token);
1253
+ lam.complete(
1254
+ &tt,
1255
+ &LocalActivityExecutionResult::Failed(Default::default()),
1256
+ );
1257
+ }
1258
+ };
1259
+
1260
+ // Fail a bunch of times
1261
+ spinfail().await;
1262
+ // Nonfirst attempt count should still be zero
1263
+ let count = lam.get_nonfirst_attempt_count(run_id);
1264
+ assert_eq!(count, 0);
1265
+
1266
+ for _ in 1..=2 {
1267
+ // This should work over multiple WFTs
1268
+ // say the first wft was completed
1269
+ lam.enqueue([LocalActRequest::IndicateWorkflowTaskCompleted(
1270
+ run_id.to_string(),
1271
+ )]);
1272
+ // Do some more attempts
1273
+ spinfail().await;
1274
+ let count = lam.get_nonfirst_attempt_count(run_id);
1275
+ assert_eq!(count, 10);
1276
+ }
1041
1277
  }
1042
1278
  }