@rivetkit/engine 1.0.0 → 2.2.1-pr.4600.b74ff3b

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1038) hide show
  1. package/CLAUDE.md +38 -0
  2. package/artifacts/config-schema.json +1140 -0
  3. package/artifacts/errors/actor.kv_storage_quota_exceeded.json +5 -0
  4. package/artifacts/errors/actor.no_runner_config_configured.json +5 -0
  5. package/artifacts/errors/guard.actor_runner_failed.json +5 -0
  6. package/artifacts/errors/guard.invalid_request.json +5 -0
  7. package/artifacts/errors/guard.invalid_request_body.json +5 -0
  8. package/artifacts/errors/guard.invalid_response_body.json +5 -0
  9. package/artifacts/errors/guard.missing_query_parameter.json +5 -0
  10. package/artifacts/errors/guard.query_ambiguous_runner_configs.json +5 -0
  11. package/artifacts/errors/guard.query_duplicate_param.json +5 -0
  12. package/artifacts/errors/guard.query_empty_actor_name.json +5 -0
  13. package/artifacts/errors/guard.query_get_disallowed_params.json +5 -0
  14. package/artifacts/errors/guard.query_invalid_base64_input.json +5 -0
  15. package/artifacts/errors/guard.query_invalid_cbor_input.json +5 -0
  16. package/artifacts/errors/guard.query_invalid_params.json +5 -0
  17. package/artifacts/errors/guard.query_invalid_percent_encoding.json +5 -0
  18. package/artifacts/errors/guard.query_missing_runner_name.json +5 -0
  19. package/artifacts/errors/guard.query_no_runner_configs.json +5 -0
  20. package/artifacts/errors/guard.query_param_missing_equals.json +5 -0
  21. package/artifacts/errors/guard.query_path_token_syntax.json +5 -0
  22. package/artifacts/errors/guard.query_unknown_param.json +5 -0
  23. package/artifacts/errors/guard.request_body_too_large.json +5 -0
  24. package/artifacts/errors/guard.response_body_too_large.json +5 -0
  25. package/artifacts/errors/serverless_runner_pool.failed_to_fetch_metadata.json +5 -0
  26. package/artifacts/errors/serverless_runner_pool.not_found.json +5 -0
  27. package/artifacts/errors/{api.rate_limited.json → test.api_rate_limited.json} +2 -2
  28. package/artifacts/errors/{namespace.invalid_name.json → test.namespace_invalid_name.json} +2 -2
  29. package/artifacts/errors/ws.going_away.json +5 -0
  30. package/artifacts/openapi.json +458 -6
  31. package/docker/builder-base/linux-gnu.Dockerfile +21 -0
  32. package/docker/builder-base/linux-musl.Dockerfile +53 -0
  33. package/docker/builder-base/osxcross.Dockerfile +42 -0
  34. package/docker/builder-base/windows-mingw.Dockerfile +41 -0
  35. package/docker/builder-base/windows-msvc.Dockerfile +25 -0
  36. package/docker/dev/docker-compose.yml +43 -18
  37. package/docker/dev/grafana/dashboards/api.json +1077 -1239
  38. package/docker/dev/grafana/dashboards/cache.json +911 -1074
  39. package/docker/dev/grafana/dashboards/epoxy.json +1606 -0
  40. package/docker/dev/grafana/dashboards/futures.json +242 -229
  41. package/docker/dev/grafana/dashboards/gasoline.json +2663 -2476
  42. package/docker/dev/grafana/dashboards/guard.json +1433 -1273
  43. package/docker/dev/grafana/dashboards/operation.json +871 -0
  44. package/docker/dev/grafana/dashboards/pegboard.json +1274 -0
  45. package/docker/dev/grafana/dashboards/tokio.json +930 -1004
  46. package/docker/dev/grafana/dashboards/traces.json +35 -13
  47. package/docker/dev/grafana/provisioning/datasources/datasources.yaml +8 -0
  48. package/docker/{dev-multinode/otel-collector-server → dev/otel-collector}/config.yaml +18 -13
  49. package/docker/dev/prometheus/prometheus.yml +4 -0
  50. package/docker/dev/rivet-engine/config.jsonc +9 -16
  51. package/docker/dev-host/docker-compose.yml +38 -16
  52. package/docker/dev-host/grafana/dashboards/api.json +1077 -1239
  53. package/docker/dev-host/grafana/dashboards/cache.json +911 -1074
  54. package/docker/dev-host/grafana/dashboards/epoxy.json +1606 -0
  55. package/docker/dev-host/grafana/dashboards/futures.json +242 -229
  56. package/docker/dev-host/grafana/dashboards/gasoline.json +2663 -2476
  57. package/docker/dev-host/grafana/dashboards/guard.json +1433 -1273
  58. package/docker/dev-host/grafana/dashboards/operation.json +871 -0
  59. package/docker/dev-host/grafana/dashboards/pegboard.json +1274 -0
  60. package/docker/dev-host/grafana/dashboards/tokio.json +930 -1004
  61. package/docker/dev-host/grafana/dashboards/traces.json +35 -13
  62. package/docker/dev-host/grafana/provisioning/datasources/datasources.yaml +8 -0
  63. package/docker/dev-host/{otel-collector-server → otel-collector}/config.yaml +18 -13
  64. package/docker/dev-host/prometheus/prometheus.yml +4 -0
  65. package/docker/dev-host/rivet-engine/config.jsonc +9 -16
  66. package/docker/dev-multidc/core/grafana/dashboards/api.json +1077 -1239
  67. package/docker/dev-multidc/core/grafana/dashboards/cache.json +911 -1074
  68. package/docker/dev-multidc/core/grafana/dashboards/epoxy.json +1606 -0
  69. package/docker/dev-multidc/core/grafana/dashboards/futures.json +242 -229
  70. package/docker/dev-multidc/core/grafana/dashboards/gasoline.json +2663 -2476
  71. package/docker/dev-multidc/core/grafana/dashboards/guard.json +1433 -1273
  72. package/docker/dev-multidc/core/grafana/dashboards/operation.json +871 -0
  73. package/docker/dev-multidc/core/grafana/dashboards/pegboard.json +1274 -0
  74. package/docker/dev-multidc/core/grafana/dashboards/tokio.json +930 -1004
  75. package/docker/dev-multidc/core/grafana/dashboards/traces.json +35 -13
  76. package/docker/dev-multidc/core/grafana/provisioning/datasources/datasources.yaml +8 -0
  77. package/docker/dev-multidc/core/prometheus/prometheus.yml +4 -0
  78. package/docker/dev-multidc/datacenters/dc-a/{otel-collector-server → otel-collector}/config.yaml +18 -13
  79. package/docker/dev-multidc/datacenters/dc-a/rivet-engine/config.jsonc +23 -22
  80. package/docker/{dev-multidc-multinode/datacenters/dc-b/otel-collector-server → dev-multidc/datacenters/dc-b/otel-collector}/config.yaml +18 -13
  81. package/docker/dev-multidc/datacenters/dc-b/rivet-engine/config.jsonc +23 -22
  82. package/docker/dev-multidc/datacenters/dc-c/{otel-collector-server → otel-collector}/config.yaml +18 -13
  83. package/docker/dev-multidc/datacenters/dc-c/rivet-engine/config.jsonc +23 -22
  84. package/docker/dev-multidc/docker-compose.yml +71 -64
  85. package/docker/dev-multidc-multinode/core/grafana/dashboards/api.json +1077 -1239
  86. package/docker/dev-multidc-multinode/core/grafana/dashboards/cache.json +911 -1074
  87. package/docker/dev-multidc-multinode/core/grafana/dashboards/epoxy.json +1606 -0
  88. package/docker/dev-multidc-multinode/core/grafana/dashboards/futures.json +242 -229
  89. package/docker/dev-multidc-multinode/core/grafana/dashboards/gasoline.json +2663 -2476
  90. package/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json +1433 -1273
  91. package/docker/dev-multidc-multinode/core/grafana/dashboards/operation.json +871 -0
  92. package/docker/dev-multidc-multinode/core/grafana/dashboards/pegboard.json +1274 -0
  93. package/docker/dev-multidc-multinode/core/grafana/dashboards/tokio.json +930 -1004
  94. package/docker/dev-multidc-multinode/core/grafana/dashboards/traces.json +35 -13
  95. package/docker/dev-multidc-multinode/core/grafana/provisioning/datasources/datasources.yaml +8 -0
  96. package/docker/dev-multidc-multinode/core/prometheus/prometheus.yml +4 -0
  97. package/docker/dev-multidc-multinode/datacenters/dc-a/{otel-collector-server → otel-collector}/config.yaml +28 -13
  98. package/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/0/config.jsonc +23 -22
  99. package/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/1/config.jsonc +23 -22
  100. package/docker/dev-multidc-multinode/datacenters/dc-a/rivet-engine/2/config.jsonc +23 -22
  101. package/docker/{dev-multidc/datacenters/dc-b/otel-collector-server → dev-multidc-multinode/datacenters/dc-b/otel-collector}/config.yaml +28 -13
  102. package/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/0/config.jsonc +23 -22
  103. package/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/1/config.jsonc +23 -22
  104. package/docker/dev-multidc-multinode/datacenters/dc-b/rivet-engine/2/config.jsonc +23 -22
  105. package/docker/dev-multidc-multinode/datacenters/dc-c/{otel-collector-server → otel-collector}/config.yaml +28 -13
  106. package/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/0/config.jsonc +23 -22
  107. package/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/1/config.jsonc +23 -22
  108. package/docker/dev-multidc-multinode/datacenters/dc-c/rivet-engine/2/config.jsonc +23 -22
  109. package/docker/dev-multidc-multinode/docker-compose.yml +113 -88
  110. package/docker/dev-multinode/docker-compose.yml +57 -26
  111. package/docker/dev-multinode/grafana/dashboards/api.json +1077 -1239
  112. package/docker/dev-multinode/grafana/dashboards/cache.json +911 -1074
  113. package/docker/dev-multinode/grafana/dashboards/epoxy.json +1606 -0
  114. package/docker/dev-multinode/grafana/dashboards/futures.json +242 -229
  115. package/docker/dev-multinode/grafana/dashboards/gasoline.json +2663 -2476
  116. package/docker/dev-multinode/grafana/dashboards/guard.json +1433 -1273
  117. package/docker/dev-multinode/grafana/dashboards/operation.json +871 -0
  118. package/docker/dev-multinode/grafana/dashboards/pegboard.json +1274 -0
  119. package/docker/dev-multinode/grafana/dashboards/tokio.json +930 -1004
  120. package/docker/dev-multinode/grafana/dashboards/traces.json +35 -13
  121. package/docker/dev-multinode/grafana/provisioning/datasources/datasources.yaml +8 -0
  122. package/docker/{dev/otel-collector-server → dev-multinode/otel-collector}/config.yaml +28 -13
  123. package/docker/dev-multinode/prometheus/prometheus.yml +4 -0
  124. package/docker/dev-multinode/rivet-engine/0/config.jsonc +9 -16
  125. package/docker/dev-multinode/rivet-engine/1/config.jsonc +9 -16
  126. package/docker/dev-multinode/rivet-engine/2/config.jsonc +9 -16
  127. package/docker/engine/linux-aarch64.Dockerfile +9 -49
  128. package/docker/engine/linux-x86_64.Dockerfile +7 -57
  129. package/docker/engine/macos-aarch64.Dockerfile +8 -54
  130. package/docker/engine/macos-x86_64.Dockerfile +9 -55
  131. package/docker/engine/windows.Dockerfile +5 -53
  132. package/docker/template/grafana-dashboards/api.json +1077 -1239
  133. package/docker/template/grafana-dashboards/cache.json +911 -1074
  134. package/docker/template/grafana-dashboards/epoxy.json +1606 -0
  135. package/docker/template/grafana-dashboards/futures.json +242 -229
  136. package/docker/template/grafana-dashboards/gasoline.json +2663 -2476
  137. package/docker/template/grafana-dashboards/guard.json +1433 -1273
  138. package/docker/template/grafana-dashboards/operation.json +871 -0
  139. package/docker/template/grafana-dashboards/pegboard.json +1274 -0
  140. package/docker/template/grafana-dashboards/tokio.json +930 -1004
  141. package/docker/template/grafana-dashboards/traces.json +35 -13
  142. package/docker/template/node_modules/.bin/js-yaml +4 -4
  143. package/docker/template/node_modules/.bin/tsc +4 -4
  144. package/docker/template/node_modules/.bin/tsserver +4 -4
  145. package/docker/template/node_modules/.bin/tsx +4 -4
  146. package/docker/template/src/docker-compose.ts +42 -29
  147. package/docker/template/src/main.ts +4 -4
  148. package/docker/template/src/services/core/grafana.ts +14 -1
  149. package/docker/template/src/services/core/prometheus.ts +20 -0
  150. package/docker/template/src/services/edge/{otel-collector-server.ts → otel-collector.ts} +55 -24
  151. package/docker/template/src/services/edge/rivet-engine.ts +4 -16
  152. package/docker/template/src/services/edge/runner.ts +2 -3
  153. package/docker/universal/Dockerfile +5 -3
  154. package/package.json +2 -5
  155. package/packages/api-builder/src/global_context.rs +1 -1
  156. package/packages/api-builder/src/metrics.rs +28 -24
  157. package/packages/api-builder/src/middleware.rs +30 -48
  158. package/packages/api-builder/src/router.rs +13 -1
  159. package/packages/api-peer/Cargo.toml +7 -9
  160. package/packages/api-peer/src/actors/delete.rs +56 -57
  161. package/packages/api-peer/src/actors/get_or_create.rs +139 -0
  162. package/packages/api-peer/src/actors/kv_get.rs +40 -28
  163. package/packages/api-peer/src/actors/list.rs +31 -14
  164. package/packages/api-peer/src/actors/list_names.rs +6 -6
  165. package/packages/api-peer/src/actors/mod.rs +3 -0
  166. package/packages/api-peer/src/actors/reschedule.rs +55 -0
  167. package/packages/api-peer/src/actors/sleep.rs +55 -0
  168. package/packages/api-peer/src/envoys.rs +57 -0
  169. package/packages/api-peer/src/internal.rs +441 -24
  170. package/packages/api-peer/src/lib.rs +2 -1
  171. package/packages/api-peer/src/namespaces.rs +24 -9
  172. package/packages/api-peer/src/router.rs +31 -7
  173. package/packages/api-peer/src/runner_configs.rs +66 -19
  174. package/packages/api-peer/src/runners.rs +30 -32
  175. package/packages/api-public/Cargo.toml +2 -0
  176. package/packages/api-public/src/actors/create.rs +8 -17
  177. package/packages/api-public/src/actors/delete.rs +11 -35
  178. package/packages/api-public/src/actors/get_or_create.rs +23 -95
  179. package/packages/api-public/src/actors/kv_get.rs +12 -29
  180. package/packages/api-public/src/actors/list.rs +56 -78
  181. package/packages/api-public/src/actors/list_names.rs +15 -14
  182. package/packages/api-public/src/actors/mod.rs +2 -0
  183. package/packages/api-public/src/actors/reschedule.rs +65 -0
  184. package/packages/api-public/src/actors/sleep.rs +64 -0
  185. package/packages/api-public/src/actors/utils.rs +12 -60
  186. package/packages/api-public/src/ctx.rs +14 -6
  187. package/packages/api-public/src/datacenters.rs +5 -5
  188. package/packages/api-public/src/envoys.rs +57 -0
  189. package/packages/api-public/src/errors.rs +0 -7
  190. package/packages/api-public/src/health.rs +51 -44
  191. package/packages/api-public/src/lib.rs +2 -1
  192. package/packages/api-public/src/metadata.rs +44 -14
  193. package/packages/api-public/src/namespaces.rs +11 -11
  194. package/packages/api-public/src/router.rs +22 -5
  195. package/packages/api-public/src/runner_configs/delete.rs +13 -10
  196. package/packages/api-public/src/runner_configs/list.rs +5 -2
  197. package/packages/api-public/src/runner_configs/refresh_metadata.rs +1 -1
  198. package/packages/api-public/src/runner_configs/serverless_health_check.rs +2 -2
  199. package/packages/api-public/src/runner_configs/upsert.rs +12 -9
  200. package/packages/api-public/src/runner_configs/utils.rs +35 -175
  201. package/packages/api-public/src/runners.rs +17 -45
  202. package/packages/{dump-openapi → api-public-openapi-gen}/Cargo.toml +1 -1
  203. package/packages/api-types/src/actors/create.rs +1 -0
  204. package/packages/api-types/src/actors/delete.rs +20 -0
  205. package/packages/api-types/src/actors/get_or_create.rs +30 -0
  206. package/packages/api-types/src/actors/kv_get.rs +25 -0
  207. package/packages/api-types/src/actors/list.rs +8 -1
  208. package/packages/api-types/src/actors/mod.rs +5 -0
  209. package/packages/api-types/src/actors/reschedule.rs +26 -0
  210. package/packages/api-types/src/actors/sleep.rs +26 -0
  211. package/packages/api-types/src/datacenters/list.rs +2 -2
  212. package/packages/api-types/src/envoys/list.rs +24 -0
  213. package/packages/api-types/src/envoys/mod.rs +1 -0
  214. package/packages/api-types/src/lib.rs +1 -0
  215. package/packages/api-types/src/namespaces/list.rs +4 -0
  216. package/packages/api-types/src/namespaces/runner_configs.rs +23 -2
  217. package/packages/api-types/src/runner_configs/list.rs +6 -1
  218. package/packages/api-types/src/runner_configs/mod.rs +12 -0
  219. package/packages/api-types/src/runners/list.rs +4 -0
  220. package/packages/api-types/src/runners/list_names.rs +21 -0
  221. package/packages/api-types/src/runners/mod.rs +1 -0
  222. package/packages/api-util/src/lib.rs +44 -21
  223. package/packages/bootstrap/Cargo.toml +7 -4
  224. package/packages/bootstrap/src/backfill.rs +53 -0
  225. package/packages/bootstrap/src/lib.rs +43 -7
  226. package/packages/cache/Cargo.toml +3 -1
  227. package/packages/cache/src/driver.rs +43 -151
  228. package/packages/cache/src/getter_ctx.rs +48 -70
  229. package/packages/cache/src/inner.rs +28 -18
  230. package/packages/cache/src/key.rs +17 -3
  231. package/packages/cache/src/lib.rs +0 -2
  232. package/packages/cache/src/metrics.rs +43 -31
  233. package/packages/cache/src/req_config.rs +219 -156
  234. package/packages/cache/tests/fetch.rs +91 -0
  235. package/packages/cache/tests/in_flight.rs +361 -0
  236. package/packages/cache/tests/ttl.rs +314 -0
  237. package/packages/cache-purge/src/lib.rs +1 -1
  238. package/packages/config/Cargo.toml +1 -0
  239. package/packages/config/src/config/cache.rs +10 -3
  240. package/packages/config/src/config/clickhouse.rs +0 -30
  241. package/packages/config/src/config/{db.rs → db/mod.rs} +3 -18
  242. package/packages/config/src/config/db/postgres.rs +59 -0
  243. package/packages/config/src/config/guard.rs +19 -0
  244. package/packages/config/src/config/metrics.rs +22 -0
  245. package/packages/config/src/config/mod.rs +44 -10
  246. package/packages/config/src/config/pegboard.rs +242 -16
  247. package/packages/config/src/config/pubsub.rs +11 -0
  248. package/packages/config/src/config/runtime.rs +58 -0
  249. package/packages/config/src/config/telemetry.rs +1 -0
  250. package/packages/config/src/config/topology.rs +78 -19
  251. package/packages/config/src/defaults.rs +3 -0
  252. package/packages/config/src/lib.rs +10 -1
  253. package/packages/config-schema-gen/Cargo.toml +11 -0
  254. package/packages/config-schema-gen/build.rs +26 -0
  255. package/packages/config-schema-gen/src/lib.rs +2 -0
  256. package/packages/engine/Cargo.toml +11 -2
  257. package/packages/engine/src/commands/db/mod.rs +0 -10
  258. package/packages/engine/src/commands/epoxy.rs +395 -0
  259. package/packages/engine/src/commands/mod.rs +1 -1
  260. package/packages/engine/src/commands/start.rs +43 -63
  261. package/packages/engine/src/commands/udb/cli.rs +148 -4
  262. package/packages/engine/src/commands/wf/mod.rs +83 -12
  263. package/packages/engine/src/commands/wf/signal.rs +38 -0
  264. package/packages/engine/src/lib.rs +6 -3
  265. package/packages/engine/src/main.rs +1 -1
  266. package/packages/engine/src/run_config.rs +6 -7
  267. package/packages/engine/src/util/db.rs +1 -25
  268. package/packages/engine/src/util/wf/mod.rs +39 -5
  269. package/packages/engine/tests/common/actors.rs +50 -332
  270. package/packages/engine/tests/common/api/mod.rs +7 -0
  271. package/packages/engine/tests/common/api/peer.rs +364 -0
  272. package/packages/engine/tests/common/api/public.rs +473 -0
  273. package/packages/engine/tests/common/ctx.rs +15 -3
  274. package/packages/engine/tests/common/mod.rs +8 -5
  275. package/packages/engine/tests/common/test_envoy.rs +87 -0
  276. package/packages/engine/tests/common/test_helpers.rs +218 -130
  277. package/packages/engine/tests/common/test_runner.rs +273 -0
  278. package/packages/engine/tests/envoy/actors_lifecycle.rs +1277 -0
  279. package/packages/engine/tests/envoy/mod.rs +1 -0
  280. package/packages/engine/tests/mod.rs +3 -0
  281. package/packages/engine/tests/runner/actors_alarm.rs +1453 -0
  282. package/packages/engine/tests/runner/actors_kv_crud.rs +996 -0
  283. package/packages/engine/tests/runner/actors_kv_delete_range.rs +126 -0
  284. package/packages/engine/tests/runner/actors_kv_drop.rs +255 -0
  285. package/packages/engine/tests/runner/actors_kv_list.rs +1061 -0
  286. package/packages/engine/tests/runner/actors_kv_misc.rs +882 -0
  287. package/packages/engine/tests/runner/actors_lifecycle.rs +1284 -0
  288. package/packages/engine/tests/runner/actors_scheduling_errors.rs +1005 -0
  289. package/packages/engine/tests/runner/api_actors_create.rs +422 -0
  290. package/packages/engine/tests/runner/api_actors_delete.rs +487 -0
  291. package/packages/engine/tests/runner/api_actors_get_or_create.rs +634 -0
  292. package/packages/engine/tests/runner/api_actors_list.rs +1771 -0
  293. package/packages/engine/tests/runner/api_actors_list_names.rs +691 -0
  294. package/packages/engine/tests/runner/api_namespaces_create.rs +428 -0
  295. package/packages/engine/tests/runner/api_namespaces_list.rs +760 -0
  296. package/packages/engine/tests/runner/api_runner_configs_list.rs +646 -0
  297. package/packages/engine/tests/runner/api_runner_configs_upsert.rs +651 -0
  298. package/packages/engine/tests/runner/api_runners_list.rs +166 -0
  299. package/packages/engine/tests/runner/api_runners_list_names.rs +386 -0
  300. package/packages/engine/tests/runner/mod.rs +20 -0
  301. package/packages/engine/tests/runner/runner_drain_on_version.rs +620 -0
  302. package/packages/env/Cargo.toml +0 -4
  303. package/packages/env/src/lib.rs +0 -18
  304. package/packages/epoxy/Cargo.toml +3 -2
  305. package/packages/epoxy/README.md +554 -93
  306. package/packages/epoxy/src/consts.rs +4 -36
  307. package/packages/epoxy/src/http_client.rs +59 -26
  308. package/packages/epoxy/src/http_routes.rs +73 -10
  309. package/packages/epoxy/src/keys/keys.rs +260 -11
  310. package/packages/epoxy/src/keys/mod.rs +11 -1
  311. package/packages/epoxy/src/keys/replica.rs +5 -260
  312. package/packages/epoxy/src/lib.rs +2 -1
  313. package/packages/epoxy/src/metrics.rs +118 -0
  314. package/packages/epoxy/src/ops/kv/get_local.rs +15 -24
  315. package/packages/epoxy/src/ops/kv/get_optimistic.rs +102 -64
  316. package/packages/epoxy/src/ops/kv/mod.rs +1 -0
  317. package/packages/epoxy/src/ops/kv/purge_local.rs +18 -9
  318. package/packages/epoxy/src/ops/kv/read_value.rs +92 -0
  319. package/packages/epoxy/src/ops/mod.rs +0 -1
  320. package/packages/epoxy/src/ops/propose.rs +1079 -194
  321. package/packages/epoxy/src/replica/ballot.rs +162 -102
  322. package/packages/epoxy/src/replica/changelog.rs +147 -0
  323. package/packages/epoxy/src/replica/commit_kv.rs +69 -66
  324. package/packages/epoxy/src/replica/message_request.rs +33 -48
  325. package/packages/epoxy/src/replica/messages/accept.rs +82 -41
  326. package/packages/epoxy/src/replica/messages/commit.rs +21 -33
  327. package/packages/epoxy/src/replica/messages/mod.rs +0 -8
  328. package/packages/epoxy/src/replica/messages/prepare.rs +68 -69
  329. package/packages/epoxy/src/replica/mod.rs +1 -6
  330. package/packages/epoxy/src/replica/update_config.rs +3 -1
  331. package/packages/epoxy/src/types.rs +30 -54
  332. package/packages/epoxy/src/utils.rs +149 -16
  333. package/packages/epoxy/src/workflows/backfill.rs +233 -0
  334. package/packages/epoxy/src/workflows/coordinator/mod.rs +33 -7
  335. package/packages/epoxy/src/workflows/coordinator/reconfigure.rs +44 -0
  336. package/packages/epoxy/src/workflows/coordinator/replica_status_change.rs +4 -3
  337. package/packages/epoxy/src/workflows/mod.rs +1 -1
  338. package/packages/epoxy/src/workflows/replica/mod.rs +4 -6
  339. package/packages/epoxy/src/workflows/replica/setup.rs +130 -771
  340. package/packages/epoxy/tests/backfill.rs +65 -0
  341. package/packages/epoxy/tests/backfill_snapshot.rs +233 -0
  342. package/packages/epoxy/tests/common/mod.rs +77 -21
  343. package/packages/epoxy/tests/common/utils.rs +366 -10
  344. package/packages/epoxy/tests/consensus_regressions.rs +285 -0
  345. package/packages/epoxy/tests/kv.rs +128 -167
  346. package/packages/epoxy/tests/kv_get_optimistic.rs +257 -157
  347. package/packages/epoxy/tests/migration.rs +75 -0
  348. package/packages/epoxy/tests/proposal.rs +133 -28
  349. package/packages/epoxy/tests/reconfigure.rs +92 -474
  350. package/packages/error/tests/basic.rs +8 -8
  351. package/packages/gasoline/Cargo.toml +1 -0
  352. package/packages/gasoline/src/builder/common/message.rs +19 -47
  353. package/packages/gasoline/src/builder/common/signal.rs +37 -21
  354. package/packages/gasoline/src/builder/common/workflow.rs +19 -15
  355. package/packages/gasoline/src/builder/workflow/lupe.rs +295 -0
  356. package/packages/gasoline/src/builder/workflow/message.rs +24 -47
  357. package/packages/gasoline/src/builder/workflow/mod.rs +1 -0
  358. package/packages/gasoline/src/builder/workflow/signal.rs +68 -22
  359. package/packages/gasoline/src/builder/workflow/sub_workflow.rs +6 -15
  360. package/packages/gasoline/src/ctx/activity.rs +46 -6
  361. package/packages/gasoline/src/ctx/common.rs +26 -23
  362. package/packages/gasoline/src/ctx/listen.rs +33 -50
  363. package/packages/gasoline/src/ctx/message.rs +76 -64
  364. package/packages/gasoline/src/ctx/operation.rs +15 -5
  365. package/packages/gasoline/src/ctx/standalone.rs +32 -4
  366. package/packages/gasoline/src/ctx/test.rs +31 -6
  367. package/packages/gasoline/src/ctx/versioned_workflow.rs +33 -7
  368. package/packages/gasoline/src/ctx/workflow.rs +194 -384
  369. package/packages/gasoline/src/db/debug.rs +49 -9
  370. package/packages/gasoline/src/db/kv/debug.rs +905 -15
  371. package/packages/gasoline/src/db/kv/keys/history.rs +434 -9
  372. package/packages/gasoline/src/db/kv/keys/metric.rs +70 -47
  373. package/packages/gasoline/src/db/kv/keys/signal.rs +19 -3
  374. package/packages/gasoline/src/db/kv/keys/workflow.rs +349 -3
  375. package/packages/gasoline/src/db/kv/mod.rs +975 -514
  376. package/packages/gasoline/src/db/kv/system.rs +155 -18
  377. package/packages/gasoline/src/db/mod.rs +29 -7
  378. package/packages/gasoline/src/error.rs +26 -21
  379. package/packages/gasoline/src/executable.rs +3 -1
  380. package/packages/gasoline/src/history/cursor.rs +436 -336
  381. package/packages/gasoline/src/history/event.rs +24 -15
  382. package/packages/gasoline/src/listen.rs +2 -14
  383. package/packages/gasoline/src/message.rs +1 -1
  384. package/packages/gasoline/src/metrics.rs +260 -143
  385. package/packages/gasoline/src/prelude.rs +1 -1
  386. package/packages/gasoline/src/registry.rs +6 -2
  387. package/packages/gasoline/src/signal.rs +34 -31
  388. package/packages/gasoline/src/utils/mod.rs +1 -18
  389. package/packages/gasoline/src/utils/topic.rs +35 -0
  390. package/packages/gasoline/src/worker.rs +71 -14
  391. package/packages/gasoline/src/workflow.rs +13 -0
  392. package/packages/gasoline/tests/workflows/eviction_test.rs +2 -2
  393. package/packages/gasoline-macros/src/lib.rs +74 -12
  394. package/packages/gasoline-runtime/Cargo.toml +18 -0
  395. package/packages/gasoline-runtime/src/lib.rs +12 -0
  396. package/packages/gasoline-runtime/src/workflows/mod.rs +1 -0
  397. package/packages/gasoline-runtime/src/workflows/pruner.rs +55 -0
  398. package/packages/guard/Cargo.toml +16 -8
  399. package/packages/guard/src/cache/mod.rs +63 -43
  400. package/packages/guard/src/cache/pegboard_gateway.rs +144 -0
  401. package/packages/guard/src/errors.rs +105 -0
  402. package/packages/guard/src/lib.rs +5 -15
  403. package/packages/guard/src/metrics.rs +12 -0
  404. package/packages/guard/src/routing/actor_path.rs +409 -0
  405. package/packages/guard/src/routing/api_public.rs +6 -14
  406. package/packages/guard/src/routing/envoy.rs +98 -0
  407. package/packages/guard/src/routing/mod.rs +152 -206
  408. package/packages/guard/src/routing/pegboard_gateway/mod.rs +572 -0
  409. package/packages/guard/src/routing/pegboard_gateway/resolve_actor_query.rs +236 -0
  410. package/packages/guard/src/routing/runner.rs +24 -54
  411. package/packages/guard/src/routing/ws_health.rs +61 -0
  412. package/packages/guard/src/shared_state.rs +11 -2
  413. package/packages/guard/tests/parse_actor_path.rs +418 -165
  414. package/packages/guard-core/Cargo.toml +3 -10
  415. package/packages/guard-core/src/custom_serve.rs +4 -10
  416. package/packages/guard-core/src/errors.rs +20 -4
  417. package/packages/guard-core/src/lib.rs +6 -4
  418. package/packages/guard-core/src/metrics.rs +66 -53
  419. package/packages/guard-core/src/proxy_service.rs +618 -1520
  420. package/packages/guard-core/src/request_context.rs +149 -169
  421. package/packages/guard-core/src/response_body.rs +65 -0
  422. package/packages/guard-core/src/route.rs +76 -0
  423. package/packages/guard-core/src/server.rs +60 -26
  424. package/packages/guard-core/src/task_group.rs +4 -0
  425. package/packages/guard-core/src/utils.rs +296 -0
  426. package/packages/guard-core/src/websocket_handle.rs +3 -3
  427. package/packages/guard-core/tests/common/mod.rs +0 -1
  428. package/packages/guard-core/tests/custom_serve.rs +4 -6
  429. package/packages/guard-core/tests/simple_websocket.rs +19 -19
  430. package/packages/guard-core/tests/streaming_response.rs +4 -9
  431. package/packages/metrics/Cargo.toml +3 -2
  432. package/packages/metrics/src/buckets.rs +5 -11
  433. package/packages/metrics/src/lib.rs +6 -3
  434. package/packages/metrics/src/providers.rs +2 -42
  435. package/packages/metrics/src/registry.rs +7 -0
  436. package/packages/metrics/src/server.rs +57 -0
  437. package/packages/namespace/Cargo.toml +0 -3
  438. package/packages/namespace/src/keys/metric.rs +301 -0
  439. package/packages/namespace/src/keys/mod.rs +1 -1
  440. package/packages/namespace/src/ops/get_global.rs +7 -4
  441. package/packages/namespace/src/ops/get_local.rs +32 -16
  442. package/packages/namespace/src/ops/mod.rs +0 -1
  443. package/packages/namespace/src/ops/resolve_for_name_global.rs +7 -4
  444. package/packages/namespace/src/ops/resolve_for_name_local.rs +39 -19
  445. package/packages/namespace/src/workflows/namespace.rs +3 -3
  446. package/packages/pegboard/Cargo.toml +22 -0
  447. package/packages/pegboard/src/actor_kv/entry.rs +47 -0
  448. package/packages/pegboard/src/actor_kv/metrics.rs +19 -0
  449. package/packages/pegboard/src/actor_kv/mod.rs +530 -0
  450. package/packages/pegboard/src/actor_kv/preload.rs +363 -0
  451. package/packages/{actor-kv/src → pegboard/src/actor_kv}/utils.rs +36 -35
  452. package/packages/pegboard/src/errors.rs +39 -5
  453. package/packages/pegboard/src/keys/actor.rs +285 -2
  454. package/packages/{actor-kv/src/entry.rs → pegboard/src/keys/actor_kv.rs} +73 -39
  455. package/packages/pegboard/src/keys/backfill.rs +49 -0
  456. package/packages/pegboard/src/keys/envoy.rs +1070 -0
  457. package/packages/pegboard/src/keys/epoxy/ns.rs +1 -1
  458. package/packages/pegboard/src/keys/mod.rs +4 -6
  459. package/packages/pegboard/src/keys/ns.rs +493 -14
  460. package/packages/pegboard/src/keys/runner.rs +281 -0
  461. package/packages/{namespace → pegboard}/src/keys/runner_config.rs +53 -0
  462. package/packages/pegboard/src/lib.rs +15 -2
  463. package/packages/pegboard/src/metrics.rs +57 -16
  464. package/packages/pegboard/src/ops/actor/create.rs +123 -53
  465. package/packages/pegboard/src/ops/actor/get.rs +14 -45
  466. package/packages/pegboard/src/ops/actor/get_for_gateway.rs +16 -0
  467. package/packages/pegboard/src/ops/actor/get_for_key.rs +3 -0
  468. package/packages/pegboard/src/ops/actor/get_for_kv.rs +43 -0
  469. package/packages/pegboard/src/ops/actor/get_for_runner.rs +99 -0
  470. package/packages/pegboard/src/ops/actor/get_reservation_for_key.rs +1 -0
  471. package/packages/pegboard/src/ops/actor/list_for_ns.rs +10 -38
  472. package/packages/pegboard/src/ops/actor/list_names.rs +3 -3
  473. package/packages/pegboard/src/ops/actor/mod.rs +3 -1
  474. package/packages/pegboard/src/ops/actor/util.rs +263 -0
  475. package/packages/pegboard/src/ops/envoy/drain.rs +101 -0
  476. package/packages/pegboard/src/ops/envoy/evict_actors.rs +54 -0
  477. package/packages/pegboard/src/ops/envoy/expire.rs +92 -0
  478. package/packages/pegboard/src/ops/envoy/get.rs +135 -0
  479. package/packages/pegboard/src/ops/envoy/list.rs +131 -0
  480. package/packages/pegboard/src/ops/envoy/mod.rs +6 -0
  481. package/packages/pegboard/src/ops/envoy/update_ping.rs +92 -0
  482. package/packages/pegboard/src/ops/mod.rs +3 -0
  483. package/packages/pegboard/src/ops/runner/drain.rs +110 -0
  484. package/packages/pegboard/src/ops/runner/list_names.rs +3 -3
  485. package/packages/pegboard/src/ops/runner/list_runner_config_enabled_dcs.rs +199 -0
  486. package/packages/pegboard/src/ops/runner/list_runner_config_epoxy_replica_ids.rs +51 -0
  487. package/packages/pegboard/src/ops/runner/mod.rs +3 -1
  488. package/packages/pegboard/src/ops/runner/update_alloc_idx.rs +17 -5
  489. package/packages/{namespace → pegboard}/src/ops/runner_config/delete.rs +18 -9
  490. package/packages/pegboard/src/ops/runner_config/ensure_normal_if_missing.rs +62 -0
  491. package/packages/{namespace → pegboard}/src/ops/runner_config/get.rs +15 -5
  492. package/packages/pegboard/src/ops/runner_config/get_error.rs +146 -0
  493. package/packages/{namespace → pegboard}/src/ops/runner_config/list.rs +13 -12
  494. package/packages/pegboard/src/ops/runner_config/mod.rs +7 -0
  495. package/packages/pegboard/src/ops/runner_config/refresh_metadata.rs +124 -0
  496. package/packages/pegboard/src/ops/runner_config/upsert.rs +206 -0
  497. package/packages/pegboard/src/ops/serverless_metadata/fetch.rs +223 -0
  498. package/packages/pegboard/src/ops/serverless_metadata/mod.rs +1 -0
  499. package/packages/pegboard/src/pubsub_subjects.rs +52 -0
  500. package/packages/pegboard/src/utils.rs +36 -2
  501. package/packages/pegboard/src/workflows/actor/destroy.rs +135 -99
  502. package/packages/pegboard/src/workflows/actor/keys.rs +59 -5
  503. package/packages/pegboard/src/workflows/actor/metrics.rs +345 -0
  504. package/packages/pegboard/src/workflows/actor/mod.rs +848 -204
  505. package/packages/pegboard/src/workflows/actor/runtime.rs +785 -212
  506. package/packages/pegboard/src/workflows/actor/setup.rs +61 -0
  507. package/packages/pegboard/src/workflows/actor2/keys.rs +337 -0
  508. package/packages/pegboard/src/workflows/actor2/metrics.rs +334 -0
  509. package/packages/pegboard/src/workflows/actor2/mod.rs +1251 -0
  510. package/packages/pegboard/src/workflows/actor2/runtime.rs +1005 -0
  511. package/packages/pegboard/src/workflows/actor_runner_name_selector_backfill.rs +266 -0
  512. package/packages/pegboard/src/workflows/metrics_aggregator.rs +282 -0
  513. package/packages/pegboard/src/workflows/mod.rs +8 -0
  514. package/packages/pegboard/src/workflows/runner.rs +62 -56
  515. package/packages/pegboard/src/workflows/runner2.rs +978 -0
  516. package/packages/pegboard/src/workflows/runner_pool.rs +298 -0
  517. package/packages/pegboard/src/workflows/runner_pool_error_tracker.rs +173 -0
  518. package/packages/pegboard/src/workflows/runner_pool_metadata_poller.rs +237 -0
  519. package/packages/pegboard/src/workflows/serverless/backfill.rs +120 -0
  520. package/packages/pegboard/src/workflows/serverless/conn.rs +702 -0
  521. package/packages/pegboard/src/workflows/serverless/mod.rs +3 -0
  522. package/packages/pegboard/src/workflows/serverless/receiver.rs +87 -0
  523. package/packages/pegboard/tests/actor_v1_pre_migration.rs +77 -0
  524. package/packages/{actor-kv/tests/list_edge_cases.rs → pegboard/tests/kv_list_edge_cases.rs} +74 -59
  525. package/packages/{actor-kv → pegboard}/tests/kv_operations.rs +77 -48
  526. package/packages/pegboard-envoy/Cargo.toml +43 -0
  527. package/packages/pegboard-envoy/src/actor_event_demuxer.rs +165 -0
  528. package/packages/pegboard-envoy/src/conn.rs +417 -0
  529. package/packages/pegboard-envoy/src/errors.rs +38 -0
  530. package/packages/pegboard-envoy/src/lib.rs +250 -0
  531. package/packages/pegboard-envoy/src/metrics.rs +44 -0
  532. package/packages/pegboard-envoy/src/ping_task.rs +61 -0
  533. package/packages/pegboard-envoy/src/tunnel_to_ws_task.rs +183 -0
  534. package/packages/pegboard-envoy/src/utils.rs +68 -0
  535. package/packages/pegboard-envoy/src/ws_to_tunnel_task.rs +536 -0
  536. package/packages/pegboard-envoy/tests/support/ws_to_tunnel_task.rs +82 -0
  537. package/packages/pegboard-gateway/Cargo.toml +2 -0
  538. package/packages/pegboard-gateway/src/keepalive_task.rs +1 -1
  539. package/packages/pegboard-gateway/src/lib.rs +506 -128
  540. package/packages/pegboard-gateway/src/metrics.rs +7 -11
  541. package/packages/pegboard-gateway/src/metrics_task.rs +80 -0
  542. package/packages/pegboard-gateway/src/ping_task.rs +9 -2
  543. package/packages/pegboard-gateway/src/shared_state.rs +110 -74
  544. package/packages/pegboard-gateway/src/tunnel_to_ws_task.rs +21 -7
  545. package/packages/pegboard-gateway/src/ws_to_tunnel_task.rs +12 -6
  546. package/packages/pegboard-gateway2/Cargo.toml +37 -0
  547. package/packages/pegboard-gateway2/src/keepalive_task.rs +61 -0
  548. package/packages/pegboard-gateway2/src/lib.rs +1044 -0
  549. package/packages/pegboard-gateway2/src/metrics.rs +10 -0
  550. package/packages/pegboard-gateway2/src/metrics_task.rs +80 -0
  551. package/packages/pegboard-gateway2/src/ping_task.rs +30 -0
  552. package/packages/pegboard-gateway2/src/shared_state.rs +601 -0
  553. package/packages/pegboard-gateway2/src/tunnel_to_ws_task.rs +99 -0
  554. package/packages/pegboard-gateway2/src/ws_to_tunnel_task.rs +71 -0
  555. package/packages/{pegboard-serverless → pegboard-outbound}/Cargo.toml +9 -9
  556. package/packages/pegboard-outbound/src/lib.rs +487 -0
  557. package/packages/pegboard-outbound/src/metrics.rs +17 -0
  558. package/packages/pegboard-runner/Cargo.toml +11 -5
  559. package/packages/pegboard-runner/src/actor_event_demuxer.rs +163 -0
  560. package/packages/pegboard-runner/src/conn.rs +358 -122
  561. package/packages/pegboard-runner/src/errors.rs +5 -0
  562. package/packages/pegboard-runner/src/lib.rs +62 -36
  563. package/packages/pegboard-runner/src/metrics.rs +44 -0
  564. package/packages/pegboard-runner/src/ping_task.rs +60 -13
  565. package/packages/pegboard-runner/src/tunnel_to_ws_task.rs +249 -110
  566. package/packages/pegboard-runner/src/ws_to_tunnel_task.rs +738 -113
  567. package/packages/pegboard-runner/tests/support/ws_to_tunnel_task.rs +150 -0
  568. package/packages/pools/Cargo.toml +1 -2
  569. package/packages/pools/src/db/clickhouse.rs +7 -6
  570. package/packages/pools/src/db/udb.rs +16 -3
  571. package/packages/pools/src/db/ups.rs +27 -5
  572. package/packages/pools/src/error.rs +0 -3
  573. package/packages/pools/src/lib.rs +0 -2
  574. package/packages/pools/src/metrics.rs +33 -28
  575. package/packages/pools/src/pools.rs +15 -39
  576. package/packages/pools/src/prelude.rs +1 -1
  577. package/packages/postgres-util/Cargo.toml +13 -0
  578. package/packages/postgres-util/src/lib.rs +84 -0
  579. package/packages/runner-protocol/build.rs +157 -0
  580. package/packages/runner-protocol/src/lib.rs +16 -0
  581. package/packages/runner-protocol/src/util.rs +14 -0
  582. package/packages/runner-protocol/src/versioned.rs +4345 -0
  583. package/packages/runtime/src/lib.rs +46 -46
  584. package/packages/runtime/src/metrics.rs +39 -30
  585. package/packages/runtime/src/term_signal.rs +25 -12
  586. package/packages/runtime/src/traces.rs +5 -8
  587. package/packages/service-manager/src/lib.rs +66 -15
  588. package/packages/test-deps/src/datacenter.rs +22 -8
  589. package/packages/test-deps/src/lib.rs +47 -25
  590. package/packages/test-deps-docker/src/database.rs +45 -36
  591. package/packages/test-snapshot-gen/Cargo.toml +39 -0
  592. package/packages/test-snapshot-gen/snapshots/.gitkeep +0 -0
  593. package/packages/test-snapshot-gen/snapshots/epoxy-v1/metadata.json +3 -0
  594. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-1/000008.log +0 -0
  595. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-1/000009.sst +3 -0
  596. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-1/CURRENT +3 -0
  597. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-1/MANIFEST-000005 +3 -0
  598. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-1/OPTIONS-000007 +3 -0
  599. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-2/000008.log +0 -0
  600. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-2/000009.sst +3 -0
  601. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-2/CURRENT +3 -0
  602. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-2/MANIFEST-000005 +3 -0
  603. package/packages/test-snapshot-gen/snapshots/epoxy-v1/replica-2/OPTIONS-000007 +3 -0
  604. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/metadata.json +3 -0
  605. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-1/000008.log +0 -0
  606. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-1/000009.sst +3 -0
  607. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-1/CURRENT +3 -0
  608. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-1/MANIFEST-000005 +3 -0
  609. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-1/OPTIONS-000007 +3 -0
  610. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-2/000008.log +0 -0
  611. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-2/000009.sst +3 -0
  612. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-2/CURRENT +3 -0
  613. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-2/MANIFEST-000005 +3 -0
  614. package/packages/test-snapshot-gen/snapshots/pb-actor-v1-pre-migration/replica-2/OPTIONS-000007 +3 -0
  615. package/packages/test-snapshot-gen/src/lib.rs +328 -0
  616. package/packages/test-snapshot-gen/src/main.rs +145 -0
  617. package/packages/test-snapshot-gen/src/scenarios/epoxy_keys.rs +60 -0
  618. package/packages/test-snapshot-gen/src/scenarios/mod.rs +27 -0
  619. package/packages/test-snapshot-gen/src/scenarios/pb_actor_v1_pre_migration.rs +56 -0
  620. package/packages/test-snapshot-gen/src/test_cluster.rs +234 -0
  621. package/packages/tracing-reconfigure/src/lib.rs +1 -1
  622. package/packages/tracing-utils/src/lib.rs +12 -20
  623. package/packages/types/src/actor/error.rs +59 -0
  624. package/packages/types/src/actor/mod.rs +2 -0
  625. package/packages/types/src/actors.rs +5 -0
  626. package/packages/types/src/envoys.rs +21 -0
  627. package/packages/types/src/keys/backfill.rs +5 -0
  628. package/packages/types/src/keys/mod.rs +1 -0
  629. package/packages/types/src/lib.rs +2 -1
  630. package/packages/types/src/runner_configs.rs +43 -14
  631. package/packages/universaldb/Cargo.toml +4 -0
  632. package/packages/universaldb/src/database.rs +50 -5
  633. package/packages/universaldb/src/driver/mod.rs +12 -2
  634. package/packages/universaldb/src/driver/postgres/database.rs +88 -27
  635. package/packages/universaldb/src/driver/postgres/mod.rs +1 -1
  636. package/packages/universaldb/src/driver/postgres/transaction.rs +4 -7
  637. package/packages/universaldb/src/driver/postgres/transaction_task.rs +30 -52
  638. package/packages/universaldb/src/driver/rocksdb/database.rs +13 -7
  639. package/packages/universaldb/src/driver/rocksdb/transaction_conflict_tracker.rs +5 -5
  640. package/packages/universaldb/src/driver/rocksdb/transaction_task.rs +2 -1
  641. package/packages/universaldb/src/metrics.rs +39 -23
  642. package/packages/universaldb/src/prelude.rs +1 -1
  643. package/packages/universaldb/src/transaction.rs +9 -2
  644. package/packages/universaldb/src/utils/cherry_pick.rs +46 -46
  645. package/packages/universaldb/src/utils/keys.rs +21 -2
  646. package/packages/universaldb/src/utils/mod.rs +8 -0
  647. package/packages/universaldb/src/utils/subspace.rs +9 -4
  648. package/packages/universaldb/tests/integration.rs +5 -3
  649. package/packages/universaldb/tests/integration_gas.rs +5 -3
  650. package/packages/universaldb/tests/rocksdb.rs +152 -19
  651. package/packages/universalpubsub/Cargo.toml +8 -2
  652. package/packages/universalpubsub/benches/simple.rs +28 -8
  653. package/packages/universalpubsub/src/chunking.rs +27 -5
  654. package/packages/universalpubsub/src/driver/memory/mod.rs +131 -20
  655. package/packages/universalpubsub/src/driver/mod.rs +5 -0
  656. package/packages/universalpubsub/src/driver/nats/mod.rs +8 -0
  657. package/packages/universalpubsub/src/driver/postgres/mod.rs +505 -96
  658. package/packages/universalpubsub/src/lib.rs +3 -0
  659. package/packages/universalpubsub/src/metrics.rs +60 -0
  660. package/packages/universalpubsub/src/pubsub.rs +227 -87
  661. package/packages/universalpubsub/src/subject.rs +32 -0
  662. package/packages/universalpubsub/tests/chunking.rs +298 -0
  663. package/packages/universalpubsub/tests/integration.rs +148 -7
  664. package/packages/universalpubsub/tests/reconnect.rs +8 -6
  665. package/packages/util/Cargo.toml +1 -3
  666. package/packages/util/build.rs +6 -0
  667. package/packages/util/src/lib.rs +7 -2
  668. package/packages/util/src/metric.rs +1 -0
  669. package/packages/util/src/serde.rs +1 -516
  670. package/packages/{internal → util-serde}/Cargo.toml +4 -5
  671. package/packages/util-serde/src/lib.rs +517 -0
  672. package/packages/workflow-worker/Cargo.toml +4 -4
  673. package/packages/workflow-worker/src/lib.rs +3 -2
  674. package/sdks/go/api-full/client/client.go +17 -4
  675. package/sdks/go/api-full/metadata/client.go +50 -0
  676. package/sdks/go/api-full/namespaces/client.go +3 -0
  677. package/sdks/go/api-full/namespaces.go +6 -4
  678. package/sdks/go/api-full/runners/client.go +3 -0
  679. package/sdks/go/api-full/runners.go +8 -6
  680. package/sdks/go/api-full/types.go +107 -23
  681. package/sdks/rust/api-full/rust/.openapi-generator/FILES +6 -0
  682. package/sdks/rust/api-full/rust/Cargo.toml +1 -1
  683. package/sdks/rust/api-full/rust/README.md +5 -2
  684. package/sdks/rust/api-full/rust/docs/Actor.md +1 -0
  685. package/sdks/rust/api-full/rust/docs/ActorsDeleteApi.md +1 -1
  686. package/sdks/rust/api-full/rust/docs/ActorsKvGetApi.md +2 -1
  687. package/sdks/rust/api-full/rust/docs/ActorsListApi.md +3 -2
  688. package/sdks/rust/api-full/rust/docs/MetadataApi.md +34 -0
  689. package/sdks/rust/api-full/rust/docs/MetadataGetResponse.md +18 -0
  690. package/sdks/rust/api-full/rust/docs/NamespacesApi.md +3 -2
  691. package/sdks/rust/api-full/rust/docs/RunnerConfig.md +1 -0
  692. package/sdks/rust/api-full/rust/docs/RunnerConfigKindOneOf1Serverless.md +1 -0
  693. package/sdks/rust/api-full/rust/docs/RunnerConfigResponse.md +15 -0
  694. package/sdks/rust/api-full/rust/docs/RunnerConfigsListApi.md +3 -2
  695. package/sdks/rust/api-full/rust/docs/RunnerConfigsListResponseRunnerConfigsValue.md +1 -1
  696. package/sdks/rust/api-full/rust/docs/RunnersApi.md +3 -2
  697. package/sdks/rust/api-full/rust/src/apis/actors_create_api.rs +1 -1
  698. package/sdks/rust/api-full/rust/src/apis/actors_delete_api.rs +3 -5
  699. package/sdks/rust/api-full/rust/src/apis/actors_get_or_create_api.rs +1 -1
  700. package/sdks/rust/api-full/rust/src/apis/actors_kv_get_api.rs +4 -2
  701. package/sdks/rust/api-full/rust/src/apis/actors_list_api.rs +9 -2
  702. package/sdks/rust/api-full/rust/src/apis/actors_list_names_api.rs +1 -1
  703. package/sdks/rust/api-full/rust/src/apis/configuration.rs +2 -2
  704. package/sdks/rust/api-full/rust/src/apis/datacenters_api.rs +1 -1
  705. package/sdks/rust/api-full/rust/src/apis/health_api.rs +1 -1
  706. package/sdks/rust/api-full/rust/src/apis/metadata_api.rs +62 -0
  707. package/sdks/rust/api-full/rust/src/apis/mod.rs +1 -0
  708. package/sdks/rust/api-full/rust/src/apis/namespaces_api.rs +9 -2
  709. package/sdks/rust/api-full/rust/src/apis/runner_configs_delete_api.rs +1 -1
  710. package/sdks/rust/api-full/rust/src/apis/runner_configs_list_api.rs +9 -2
  711. package/sdks/rust/api-full/rust/src/apis/runner_configs_refresh_metadata_api.rs +1 -1
  712. package/sdks/rust/api-full/rust/src/apis/runner_configs_serverless_health_check_api.rs +1 -1
  713. package/sdks/rust/api-full/rust/src/apis/runner_configs_upsert_api.rs +1 -1
  714. package/sdks/rust/api-full/rust/src/apis/runners_api.rs +9 -2
  715. package/sdks/rust/api-full/rust/src/models/actor.rs +5 -1
  716. package/sdks/rust/api-full/rust/src/models/actor_name.rs +1 -1
  717. package/sdks/rust/api-full/rust/src/models/actors_create_request.rs +1 -1
  718. package/sdks/rust/api-full/rust/src/models/actors_create_response.rs +1 -1
  719. package/sdks/rust/api-full/rust/src/models/actors_get_or_create_request.rs +1 -1
  720. package/sdks/rust/api-full/rust/src/models/actors_get_or_create_response.rs +1 -1
  721. package/sdks/rust/api-full/rust/src/models/actors_kv_get_response.rs +1 -1
  722. package/sdks/rust/api-full/rust/src/models/actors_list_names_response.rs +1 -1
  723. package/sdks/rust/api-full/rust/src/models/actors_list_response.rs +1 -1
  724. package/sdks/rust/api-full/rust/src/models/crash_policy.rs +1 -1
  725. package/sdks/rust/api-full/rust/src/models/datacenter.rs +1 -1
  726. package/sdks/rust/api-full/rust/src/models/datacenter_health.rs +1 -1
  727. package/sdks/rust/api-full/rust/src/models/datacenters_list_response.rs +1 -1
  728. package/sdks/rust/api-full/rust/src/models/health_fanout_response.rs +1 -1
  729. package/sdks/rust/api-full/rust/src/models/health_response.rs +1 -1
  730. package/sdks/rust/api-full/rust/src/models/health_status.rs +1 -1
  731. package/sdks/rust/api-full/rust/src/models/metadata_get_response.rs +48 -0
  732. package/sdks/rust/api-full/rust/src/models/mod.rs +4 -0
  733. package/sdks/rust/api-full/rust/src/models/namespace.rs +1 -1
  734. package/sdks/rust/api-full/rust/src/models/namespace_list_response.rs +1 -1
  735. package/sdks/rust/api-full/rust/src/models/namespaces_create_request.rs +1 -1
  736. package/sdks/rust/api-full/rust/src/models/namespaces_create_response.rs +1 -1
  737. package/sdks/rust/api-full/rust/src/models/pagination.rs +1 -1
  738. package/sdks/rust/api-full/rust/src/models/runner.rs +1 -1
  739. package/sdks/rust/api-full/rust/src/models/runner_config.rs +4 -1
  740. package/sdks/rust/api-full/rust/src/models/runner_config_kind.rs +1 -1
  741. package/sdks/rust/api-full/rust/src/models/runner_config_kind_one_of.rs +1 -1
  742. package/sdks/rust/api-full/rust/src/models/runner_config_kind_one_of_1.rs +1 -1
  743. package/sdks/rust/api-full/rust/src/models/runner_config_kind_one_of_1_serverless.rs +5 -1
  744. package/sdks/rust/api-full/rust/src/models/runner_config_response.rs +39 -0
  745. package/sdks/rust/api-full/rust/src/models/runner_config_variant.rs +1 -1
  746. package/sdks/rust/api-full/rust/src/models/runner_configs_list_response.rs +1 -1
  747. package/sdks/rust/api-full/rust/src/models/runner_configs_list_response_runner_configs_value.rs +3 -3
  748. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_request.rs +1 -1
  749. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_response.rs +1 -1
  750. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_response_one_of.rs +1 -1
  751. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_response_one_of_1.rs +1 -1
  752. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_response_one_of_1_failure.rs +1 -1
  753. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_health_check_response_one_of_success.rs +1 -1
  754. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error.rs +1 -1
  755. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of.rs +1 -1
  756. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_1.rs +1 -1
  757. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_2.rs +1 -1
  758. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_3.rs +1 -1
  759. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_3_non_success_status.rs +1 -1
  760. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_4.rs +1 -1
  761. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_4_invalid_response_json.rs +1 -1
  762. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_5.rs +1 -1
  763. package/sdks/rust/api-full/rust/src/models/runner_configs_serverless_metadata_error_one_of_5_invalid_response_schema.rs +1 -1
  764. package/sdks/rust/api-full/rust/src/models/runner_configs_upsert_request_body.rs +1 -1
  765. package/sdks/rust/api-full/rust/src/models/runner_configs_upsert_response.rs +1 -1
  766. package/sdks/rust/api-full/rust/src/models/runners_list_names_response.rs +1 -1
  767. package/sdks/rust/api-full/rust/src/models/runners_list_response.rs +1 -1
  768. package/sdks/rust/api-full/src/apis/actors_api.rs +8 -4
  769. package/sdks/rust/api-full/src/apis/ns_api.rs +8 -4
  770. package/sdks/rust/data/src/converted.rs +7 -4
  771. package/sdks/rust/data/src/lib.rs +2 -2
  772. package/sdks/rust/data/src/versioned/mod.rs +47 -4
  773. package/sdks/rust/data/src/versioned/namespace_runner_config.rs +256 -6
  774. package/sdks/rust/envoy-client/Cargo.toml +25 -0
  775. package/sdks/rust/envoy-client/src/actor.rs +992 -0
  776. package/sdks/rust/envoy-client/src/commands.rs +88 -0
  777. package/sdks/rust/envoy-client/src/config.rs +159 -0
  778. package/sdks/rust/envoy-client/src/connection.rs +288 -0
  779. package/sdks/rust/envoy-client/src/context.rs +24 -0
  780. package/sdks/rust/envoy-client/src/envoy.rs +432 -0
  781. package/sdks/rust/envoy-client/src/events.rs +62 -0
  782. package/sdks/rust/envoy-client/src/handle.rs +355 -0
  783. package/sdks/rust/envoy-client/src/kv.rs +132 -0
  784. package/sdks/rust/envoy-client/src/latency_channel.rs +27 -0
  785. package/sdks/rust/envoy-client/src/lib.rs +15 -0
  786. package/sdks/rust/envoy-client/src/stringify.rs +322 -0
  787. package/sdks/rust/envoy-client/src/tunnel.rs +265 -0
  788. package/sdks/rust/envoy-client/src/utils.rs +172 -0
  789. package/sdks/rust/envoy-protocol/Cargo.toml +22 -0
  790. package/sdks/rust/envoy-protocol/build.rs +200 -0
  791. package/sdks/rust/envoy-protocol/src/generated.rs +1 -0
  792. package/sdks/rust/envoy-protocol/src/lib.rs +8 -0
  793. package/sdks/rust/envoy-protocol/src/versioned.rs +208 -0
  794. package/sdks/rust/epoxy-protocol/Cargo.toml +0 -2
  795. package/sdks/rust/epoxy-protocol/build.rs +7 -0
  796. package/sdks/rust/epoxy-protocol/src/lib.rs +2 -5
  797. package/sdks/rust/epoxy-protocol/src/protocol.rs +128 -0
  798. package/sdks/rust/test-envoy/Cargo.toml +23 -0
  799. package/sdks/rust/test-envoy/Dockerfile +22 -0
  800. package/sdks/rust/test-envoy/src/behaviors.rs +141 -0
  801. package/sdks/rust/test-envoy/src/lib.rs +11 -0
  802. package/sdks/rust/test-envoy/src/main.rs +4 -0
  803. package/sdks/rust/test-envoy/src/server.rs +269 -0
  804. package/sdks/schemas/README.md +1 -2
  805. package/sdks/schemas/data/namespace.runner_config.v3.bare +24 -0
  806. package/sdks/schemas/data/namespace.runner_config.v4.bare +25 -0
  807. package/sdks/schemas/data/namespace.runner_config.v5.bare +26 -0
  808. package/sdks/schemas/data/pegboard.namespace.runner_alloc_idx.v2.bare +8 -0
  809. package/sdks/schemas/envoy-protocol/v1.bare +459 -0
  810. package/sdks/schemas/epoxy-protocol/v2.bare +220 -0
  811. package/sdks/schemas/runner-protocol/v4.bare +438 -0
  812. package/sdks/schemas/runner-protocol/v5.bare +430 -0
  813. package/sdks/schemas/runner-protocol/v6.bare +432 -0
  814. package/sdks/schemas/runner-protocol/v7.bare +438 -0
  815. package/sdks/typescript/api-full/.turbo/turbo-build.log +28 -27
  816. package/sdks/typescript/api-full/build.js +7 -1
  817. package/sdks/typescript/api-full/package.json +66 -57
  818. package/sdks/typescript/api-full/rivetkit-engine-api-full-25.5.3.tgz +0 -0
  819. package/sdks/typescript/api-full/src/Client.ts +41 -10
  820. package/sdks/typescript/api-full/src/api/client/requests/ActorsDeleteRequest.ts +4 -2
  821. package/sdks/typescript/api-full/src/api/client/requests/ActorsKvGetRequest.ts +13 -0
  822. package/sdks/typescript/api-full/src/api/client/requests/ActorsListRequest.ts +6 -0
  823. package/sdks/typescript/api-full/src/api/client/requests/RunnerConfigsListRequest.ts +4 -0
  824. package/sdks/typescript/api-full/src/api/client/requests/index.ts +1 -0
  825. package/sdks/typescript/api-full/src/api/resources/index.ts +1 -0
  826. package/sdks/typescript/api-full/src/api/resources/metadata/client/Client.ts +97 -0
  827. package/sdks/typescript/api-full/src/api/resources/metadata/client/index.ts +1 -0
  828. package/sdks/typescript/api-full/src/api/resources/metadata/index.ts +1 -0
  829. package/sdks/typescript/api-full/src/api/resources/namespaces/client/Client.ts +12 -2
  830. package/sdks/typescript/api-full/src/api/resources/namespaces/client/requests/NamespacesListRequest.ts +6 -0
  831. package/sdks/typescript/api-full/src/api/resources/runners/client/Client.ts +12 -2
  832. package/sdks/typescript/api-full/src/api/resources/runners/client/requests/RunnersListRequest.ts +6 -0
  833. package/sdks/typescript/api-full/src/api/types/Actor.ts +2 -0
  834. package/sdks/typescript/api-full/src/api/types/MetadataGetResponse.ts +14 -0
  835. package/sdks/typescript/api-full/src/api/types/RunnerConfig.ts +1 -0
  836. package/sdks/typescript/api-full/src/api/types/RunnerConfigKindServerlessServerless.ts +2 -0
  837. package/sdks/typescript/api-full/src/api/types/RunnerConfigResponse.ts +9 -0
  838. package/sdks/typescript/api-full/src/api/types/RunnerConfigServerless.ts +2 -0
  839. package/sdks/typescript/api-full/src/api/types/RunnerConfigsListResponseRunnerConfigsValue.ts +1 -1
  840. package/sdks/typescript/api-full/src/api/types/index.ts +2 -0
  841. package/sdks/typescript/api-full/src/core/fetcher/stream-wrappers/Node18UniversalStreamWrapper.ts +6 -4
  842. package/sdks/typescript/api-full/src/core/fetcher/stream-wrappers/UndiciStreamWrapper.ts +4 -3
  843. package/sdks/typescript/api-full/src/serialization/types/Actor.ts +2 -0
  844. package/sdks/typescript/api-full/src/serialization/types/MetadataGetResponse.ts +34 -0
  845. package/sdks/typescript/api-full/src/serialization/types/RunnerConfig.ts +5 -0
  846. package/sdks/typescript/api-full/src/serialization/types/RunnerConfigKindServerlessServerless.ts +2 -0
  847. package/sdks/typescript/api-full/src/serialization/types/RunnerConfigResponse.ts +26 -0
  848. package/sdks/typescript/api-full/src/serialization/types/RunnerConfigServerless.ts +2 -0
  849. package/sdks/typescript/api-full/src/serialization/types/RunnerConfigsListResponseRunnerConfigsValue.ts +3 -3
  850. package/sdks/typescript/api-full/src/serialization/types/index.ts +2 -0
  851. package/sdks/typescript/api-full/turbo.json +9 -0
  852. package/sdks/typescript/envoy-protocol/.turbo/turbo-build.log +23 -0
  853. package/sdks/typescript/{runner-protocol → envoy-protocol}/dist/index.cjs +466 -281
  854. package/sdks/typescript/envoy-protocol/dist/index.cjs.map +1 -0
  855. package/sdks/typescript/envoy-protocol/dist/index.d.cts +699 -0
  856. package/sdks/typescript/envoy-protocol/dist/index.d.ts +699 -0
  857. package/sdks/typescript/{runner-protocol → envoy-protocol}/dist/index.js +530 -345
  858. package/sdks/typescript/envoy-protocol/dist/index.js.map +1 -0
  859. package/sdks/typescript/{runner-protocol → envoy-protocol}/node_modules/.bin/tsc +4 -4
  860. package/{tests/load → sdks/typescript/envoy-protocol}/node_modules/.bin/tsserver +4 -4
  861. package/sdks/typescript/envoy-protocol/node_modules/.bin/tsup +21 -0
  862. package/sdks/typescript/envoy-protocol/node_modules/.bin/tsup-node +21 -0
  863. package/sdks/typescript/envoy-protocol/package.json +36 -0
  864. package/sdks/typescript/envoy-protocol/src/index.ts +2331 -0
  865. package/sdks/typescript/envoy-protocol/tsconfig.json +9 -0
  866. package/sdks/typescript/envoy-protocol/tsup.config.ts +4 -0
  867. package/sdks/typescript/runner/package.json +8 -2
  868. package/sdks/typescript/runner/src/actor.ts +38 -0
  869. package/sdks/typescript/runner/src/mod.ts +435 -229
  870. package/sdks/typescript/runner/src/stringify.ts +36 -33
  871. package/sdks/typescript/runner/src/tunnel.ts +52 -56
  872. package/sdks/typescript/runner/src/utils.ts +19 -0
  873. package/sdks/typescript/runner/src/websocket-tunnel-adapter.ts +98 -435
  874. package/sdks/typescript/runner-protocol/package.json +11 -9
  875. package/sdks/typescript/runner-protocol/src/index.ts +224 -156
  876. package/sdks/typescript/runner-protocol/tsconfig.json +1 -9
  877. package/sdks/typescript/test-runner/.turbo/turbo-build.log +5 -5
  878. package/sdks/typescript/test-runner/dist/index.js +53 -44
  879. package/sdks/typescript/test-runner/dist/index.js.map +1 -1
  880. package/sdks/typescript/test-runner/node_modules/.bin/pino +2 -2
  881. package/sdks/typescript/test-runner/node_modules/.bin/tsc +4 -4
  882. package/sdks/typescript/test-runner/node_modules/.bin/tsserver +4 -4
  883. package/sdks/typescript/test-runner/node_modules/.bin/tsup +4 -4
  884. package/sdks/typescript/test-runner/node_modules/.bin/tsup-node +4 -4
  885. package/sdks/typescript/test-runner/node_modules/.bin/tsx +4 -4
  886. package/sdks/typescript/test-runner/node_modules/.bin/vitest +4 -4
  887. package/sdks/typescript/test-runner/package.json +3 -3
  888. package/sdks/typescript/test-runner/src/index.ts +65 -42
  889. package/sdks/typescript/test-runner/src/log.ts +4 -18
  890. package/artifacts/errors/actor.no_runners_available.json +0 -5
  891. package/artifacts/errors/guard.actor_destroyed.json +0 -5
  892. package/artifacts/errors/guard.actor_not_found.json +0 -5
  893. package/contrib-docs/ACTOR_KEY_RESERVATION.md +0 -101
  894. package/contrib-docs/API.md +0 -11
  895. package/contrib-docs/DOCKER.md +0 -5
  896. package/contrib-docs/ERRORS.md +0 -13
  897. package/contrib-docs/GUARD.md +0 -76
  898. package/contrib-docs/PEGBOARD_TUNNEL_RETRIES.md +0 -83
  899. package/contrib-docs/RUNNER_LIFECYCLE.md +0 -172
  900. package/contrib-docs/SDKS.md +0 -9
  901. package/contrib-docs/TEST_DEPENDENCIES.md +0 -43
  902. package/contrib-docs/design-choicse/EMBEDDED_KV.md +0 -80
  903. package/contrib-docs/operate/TRACING_RECONFIGURE.md +0 -78
  904. package/docker/dev/otel-collector-client/config.yaml +0 -39
  905. package/docker/dev-host/otel-collector-client/config.yaml +0 -39
  906. package/docker/dev-multidc/datacenters/dc-a/otel-collector-client/config.yaml +0 -39
  907. package/docker/dev-multidc/datacenters/dc-b/otel-collector-client/config.yaml +0 -39
  908. package/docker/dev-multidc/datacenters/dc-c/otel-collector-client/config.yaml +0 -39
  909. package/docker/dev-multidc-multinode/datacenters/dc-a/otel-collector-client/config.yaml +0 -39
  910. package/docker/dev-multidc-multinode/datacenters/dc-b/otel-collector-client/config.yaml +0 -39
  911. package/docker/dev-multidc-multinode/datacenters/dc-c/otel-collector-client/config.yaml +0 -39
  912. package/docker/dev-multinode/otel-collector-client/config.yaml +0 -39
  913. package/docker/template/src/services/edge/otel-collector-client.ts +0 -64
  914. package/packages/actor-kv/Cargo.toml +0 -31
  915. package/packages/actor-kv/src/key.rs +0 -81
  916. package/packages/actor-kv/src/lib.rs +0 -357
  917. package/packages/cache/src/rate_limit.rs +0 -109
  918. package/packages/cache/tests/integration.rs +0 -582
  919. package/packages/clickhouse-inserter/Cargo.toml +0 -17
  920. package/packages/clickhouse-inserter/src/error.rs +0 -16
  921. package/packages/clickhouse-inserter/src/lib.rs +0 -179
  922. package/packages/clickhouse-user-query/Cargo.toml +0 -16
  923. package/packages/clickhouse-user-query/examples/case_sensitivity_demo.rs +0 -100
  924. package/packages/clickhouse-user-query/examples/group_by_example.rs +0 -53
  925. package/packages/clickhouse-user-query/examples/string_contains_demo.rs +0 -96
  926. package/packages/clickhouse-user-query/src/builder.rs +0 -445
  927. package/packages/clickhouse-user-query/src/error.rs +0 -37
  928. package/packages/clickhouse-user-query/src/lib.rs +0 -61
  929. package/packages/clickhouse-user-query/src/query.rs +0 -143
  930. package/packages/clickhouse-user-query/src/schema.rs +0 -78
  931. package/packages/clickhouse-user-query/tests/builder_tests.rs +0 -619
  932. package/packages/clickhouse-user-query/tests/case_sensitivity_tests.rs +0 -307
  933. package/packages/clickhouse-user-query/tests/integration_tests.rs +0 -540
  934. package/packages/clickhouse-user-query/tests/query_tests.rs +0 -263
  935. package/packages/clickhouse-user-query/tests/schema_tests.rs +0 -44
  936. package/packages/config/src/config/vector.rs +0 -18
  937. package/packages/engine/src/commands/udb_keys.rs +0 -200
  938. package/packages/engine/tests/actors_create.rs +0 -524
  939. package/packages/engine/tests/actors_delete.rs +0 -243
  940. package/packages/engine/tests/actors_general.rs +0 -191
  941. package/packages/engine/tests/actors_get.rs +0 -230
  942. package/packages/engine/tests/actors_get_by_id.rs +0 -170
  943. package/packages/engine/tests/actors_get_or_create.rs +0 -294
  944. package/packages/engine/tests/actors_get_or_create_by_id.rs +0 -147
  945. package/packages/engine/tests/actors_lifecycle.rs +0 -165
  946. package/packages/engine/tests/actors_list.rs +0 -798
  947. package/packages/engine/tests/actors_list_names.rs +0 -353
  948. package/packages/engine/tests/common/ns.rs +0 -36
  949. package/packages/engine/tests/common/runner.rs +0 -134
  950. package/packages/engine/tests/runners_dupe_key.rs +0 -27
  951. package/packages/engine/tests/runners_version.rs +0 -50
  952. package/packages/env/build.rs +0 -8
  953. package/packages/epoxy/spec/KEYS.md +0 -33
  954. package/packages/epoxy/spec/PROPOSAL.md +0 -125
  955. package/packages/epoxy/spec/RECONFIGURE.md +0 -40
  956. package/packages/epoxy/src/ops/explicit_prepare.rs +0 -342
  957. package/packages/epoxy/src/replica/decide_path.rs +0 -51
  958. package/packages/epoxy/src/replica/lead_consensus.rs +0 -65
  959. package/packages/epoxy/src/replica/log.rs +0 -84
  960. package/packages/epoxy/src/replica/messages/accepted.rs +0 -35
  961. package/packages/epoxy/src/replica/messages/committed.rs +0 -41
  962. package/packages/epoxy/src/replica/messages/download_instances.rs +0 -69
  963. package/packages/epoxy/src/replica/messages/pre_accept.rs +0 -69
  964. package/packages/epoxy/src/replica/utils.rs +0 -111
  965. package/packages/epoxy/src/workflows/purger.rs +0 -81
  966. package/packages/guard/src/cache/actor.rs +0 -43
  967. package/packages/guard/src/middleware.rs +0 -42
  968. package/packages/guard/src/routing/pegboard_gateway.rs +0 -260
  969. package/packages/guard-core/src/analytics.rs +0 -46
  970. package/packages/internal/README.md +0 -1
  971. package/packages/internal/src/lib.rs +0 -1
  972. package/packages/internal/src/ops/bump_serverless_autoscaler_global.rs +0 -64
  973. package/packages/internal/src/ops/cache/mod.rs +0 -1
  974. package/packages/internal/src/ops/cache/purge_global.rs +0 -81
  975. package/packages/internal/src/ops/mod.rs +0 -2
  976. package/packages/namespace/src/ops/runner_config/mod.rs +0 -4
  977. package/packages/namespace/src/ops/runner_config/upsert.rs +0 -148
  978. package/packages/pegboard/src/ops/actor/get_runner.rs +0 -64
  979. package/packages/pegboard/src/ops/runner/find_dc_with_runner.rs +0 -222
  980. package/packages/pegboard-serverless/src/lib.rs +0 -523
  981. package/packages/types/src/msgs/mod.rs +0 -1
  982. package/packages/types/src/msgs/pegboard.rs +0 -5
  983. package/sdks/rust/epoxy-protocol/src/versioned.rs +0 -206
  984. package/sdks/rust/runner-protocol/build.rs +0 -115
  985. package/sdks/rust/runner-protocol/src/lib.rs +0 -10
  986. package/sdks/rust/runner-protocol/src/versioned.rs +0 -1734
  987. package/sdks/schemas/epoxy-protocol/v1.bare +0 -260
  988. package/sdks/typescript/runner/.turbo/turbo-build.log +0 -22
  989. package/sdks/typescript/runner/dist/mod.cjs +0 -2951
  990. package/sdks/typescript/runner/dist/mod.cjs.map +0 -1
  991. package/sdks/typescript/runner/dist/mod.d.cts +0 -326
  992. package/sdks/typescript/runner/dist/mod.d.ts +0 -326
  993. package/sdks/typescript/runner/dist/mod.js +0 -2951
  994. package/sdks/typescript/runner/dist/mod.js.map +0 -1
  995. package/sdks/typescript/runner/node_modules/.bin/pino +0 -21
  996. package/sdks/typescript/runner/node_modules/.bin/tsc +0 -21
  997. package/sdks/typescript/runner/node_modules/.bin/tsserver +0 -21
  998. package/sdks/typescript/runner/node_modules/.bin/tsup +0 -21
  999. package/sdks/typescript/runner/node_modules/.bin/tsup-node +0 -21
  1000. package/sdks/typescript/runner/node_modules/.bin/tsx +0 -21
  1001. package/sdks/typescript/runner/node_modules/.bin/uuid +0 -21
  1002. package/sdks/typescript/runner/node_modules/.bin/vitest +0 -21
  1003. package/sdks/typescript/runner-protocol/.turbo/turbo-build.log +0 -22
  1004. package/sdks/typescript/runner-protocol/dist/index.cjs.map +0 -1
  1005. package/sdks/typescript/runner-protocol/dist/index.d.cts +0 -666
  1006. package/sdks/typescript/runner-protocol/dist/index.d.ts +0 -666
  1007. package/sdks/typescript/runner-protocol/dist/index.js.map +0 -1
  1008. package/sdks/typescript/runner-protocol/node_modules/.bin/tsserver +0 -21
  1009. package/sdks/typescript/runner-protocol/node_modules/.bin/tsup +0 -21
  1010. package/sdks/typescript/runner-protocol/node_modules/.bin/tsup-node +0 -21
  1011. package/sdks/typescript/test-runner/Dockerfile +0 -26
  1012. package/tests/load/README.md +0 -28
  1013. package/tests/load/actor-lifecycle/README.md +0 -26
  1014. package/tests/load/actor-lifecycle/actor.ts +0 -41
  1015. package/tests/load/actor-lifecycle/config.ts +0 -14
  1016. package/tests/load/actor-lifecycle/index.ts +0 -62
  1017. package/tests/load/actor-lifecycle/rivet_api.ts +0 -140
  1018. package/tests/load/actor-lifecycle/types.ts +0 -17
  1019. package/tests/load/node_modules/.bin/biome +0 -21
  1020. package/tests/load/node_modules/.bin/tsc +0 -21
  1021. package/tests/load/package.json +0 -15
  1022. package/tests/load/tsconfig.json +0 -20
  1023. package/tests/smoke/README.md +0 -32
  1024. package/tests/smoke/package.json +0 -19
  1025. package/tests/smoke/scripts/connect.ts +0 -41
  1026. package/tests/smoke/src/server/registry.ts +0 -32
  1027. package/tests/smoke/src/server/server.ts +0 -7
  1028. package/tests/smoke/src/smoke-test/index.ts +0 -161
  1029. package/tests/smoke/src/smoke-test/spawn-actor.ts +0 -109
  1030. package/tests/smoke/tsconfig.json +0 -43
  1031. /package/packages/{dump-openapi → api-public-openapi-gen}/build.rs +0 -0
  1032. /package/packages/{dump-openapi → api-public-openapi-gen}/src/lib.rs +0 -0
  1033. /package/{sdks/rust → packages}/runner-protocol/Cargo.toml +0 -0
  1034. /package/{sdks/rust → packages}/runner-protocol/src/compat.rs +0 -0
  1035. /package/{sdks/rust → packages}/runner-protocol/src/generated.rs +0 -0
  1036. /package/{sdks/rust → packages}/runner-protocol/src/uuid_compat.rs +0 -0
  1037. /package/sdks/rust/{runner-protocol → envoy-protocol}/src/util.rs +0 -0
  1038. /package/{tests/smoke → sdks/typescript/envoy-protocol}/turbo.json +0 -0
@@ -5,11 +5,11 @@ use std::{
5
5
  collections::{HashMap, HashSet},
6
6
  hash::{DefaultHasher, Hash, Hasher},
7
7
  sync::Arc,
8
- time::Instant,
8
+ time::{Duration, Instant},
9
9
  };
10
10
 
11
11
  use anyhow::{Context, Result};
12
- use futures_util::{StreamExt, TryStreamExt, stream::BoxStream};
12
+ use futures_util::{StreamExt, TryStreamExt, future::try_join_all, stream::BoxStream};
13
13
  use rivet_util::Id;
14
14
  use rivet_util::future::CustomInstrumentExt;
15
15
  use serde_json::json;
@@ -23,20 +23,20 @@ use universaldb::{
23
23
  value::Value,
24
24
  };
25
25
 
26
- use rivet_metrics::KeyValue;
27
-
28
26
  use super::{BumpSubSubject, Database, PulledWorkflowData, SignalData, WorkflowData};
29
27
  use crate::{
30
28
  error::{WorkflowError, WorkflowResult},
31
29
  history::{
32
30
  event::{
33
31
  ActivityEvent, Event, EventData, EventType, LoopEvent, MessageSendEvent, RemovedEvent,
34
- SignalEvent, SignalSendEvent, SleepEvent, SleepState, SubWorkflowEvent,
32
+ SignalSendEvent, SignalsEvent, SleepEvent, SleepState, SubWorkflowEvent,
33
+ VersionCheckEvent,
35
34
  },
36
35
  location::Location,
37
36
  },
38
37
  metrics,
39
38
  worker::PING_INTERVAL,
39
+ workflow::PruneVariant,
40
40
  };
41
41
 
42
42
  mod debug;
@@ -48,11 +48,13 @@ mod system;
48
48
  const WORKER_LOST_THRESHOLD_MS: i64 = rivet_util::duration::seconds(30);
49
49
  /// How long before overwriting an existing metrics lock.
50
50
  const METRICS_LOCK_TIMEOUT_MS: i64 = rivet_util::duration::seconds(30);
51
+ const EARLY_TXN_TIMEOUT: Duration = Duration::from_millis(2500);
51
52
 
52
53
  pub struct DatabaseKv {
54
+ config: rivet_config::Config,
53
55
  pools: rivet_pools::Pools,
54
56
  subspace: universaldb::utils::Subspace,
55
- system: Mutex<system::SystemInfo>,
57
+ system: Arc<Mutex<system::SystemInfo>>,
56
58
  }
57
59
 
58
60
  impl DatabaseKv {
@@ -142,13 +144,17 @@ impl DatabaseKv {
142
144
  );
143
145
 
144
146
  let workflow_name_key = keys::workflow::NameKey::new(workflow_id);
147
+ let wake_signal_key =
148
+ keys::workflow::WakeSignalKey::new(workflow_id, signal_name.to_string());
149
+
150
+ let (workflow_name_entry, wake_signal_entry) = tokio::try_join!(
151
+ tx.get(&self.subspace.pack(&workflow_name_key), Serializable),
152
+ tx.get(&self.subspace.pack(&wake_signal_key), Serializable),
153
+ )?;
145
154
 
146
155
  // TODO: This does not check if the workflow is silenced
147
156
  // Check if the workflow exists
148
- let Some(workflow_name_entry) = tx
149
- .get(&self.subspace.pack(&workflow_name_key), Serializable)
150
- .await?
151
- else {
157
+ let Some(workflow_name_entry) = workflow_name_entry else {
152
158
  return Err(WorkflowError::WorkflowNotFound.into());
153
159
  };
154
160
 
@@ -200,15 +206,8 @@ impl DatabaseKv {
200
206
  &workflow_id_key.serialize(workflow_id)?,
201
207
  );
202
208
 
203
- let wake_signal_key =
204
- keys::workflow::WakeSignalKey::new(workflow_id, signal_name.to_string());
205
-
206
209
  // If the workflow currently has a wake signal key for this signal, wake it
207
- if tx
208
- .get(&self.subspace.pack(&wake_signal_key), Serializable)
209
- .await?
210
- .is_some()
211
- {
210
+ if wake_signal_entry.is_some() {
212
211
  let mut wake_condition_key = keys::wake::WorkflowWakeConditionKey::new(
213
212
  workflow_name,
214
213
  workflow_id,
@@ -226,7 +225,15 @@ impl DatabaseKv {
226
225
  update_metric(
227
226
  &tx.with_subspace(self.subspace.clone()),
228
227
  None,
229
- Some(keys::metric::GaugeMetric::SignalPending(
228
+ Some(keys::metric::Metric::SignalPending2(
229
+ signal_name.to_string(),
230
+ )),
231
+ );
232
+ update_wf_metric(
233
+ &tx.with_subspace(self.subspace.clone()),
234
+ workflow_id,
235
+ None,
236
+ Some(keys::workflow::Metric::SignalPending(
230
237
  signal_name.to_string(),
231
238
  )),
232
239
  );
@@ -338,7 +345,7 @@ impl DatabaseKv {
338
345
  update_metric(
339
346
  &tx,
340
347
  None,
341
- Some(keys::metric::GaugeMetric::WorkflowSleeping(
348
+ Some(keys::metric::Metric::WorkflowSleeping(
342
349
  workflow_name.to_string(),
343
350
  )),
344
351
  );
@@ -420,11 +427,16 @@ impl Database for DatabaseKv {
420
427
  std::time::Duration::from_secs(4)
421
428
  }
422
429
 
423
- async fn from_pools(pools: rivet_pools::Pools) -> anyhow::Result<Arc<Self>> {
430
+ async fn new(
431
+ config: rivet_config::Config,
432
+ pools: rivet_pools::Pools,
433
+ ) -> anyhow::Result<Arc<Self>> {
434
+ metrics::DB_INSTANCE.inc();
424
435
  Ok(Arc::new(DatabaseKv {
436
+ config,
425
437
  pools,
426
438
  subspace: universaldb::utils::Subspace::new(&(RIVET, GASOLINE, KV)),
427
- system: Mutex::new(system::SystemInfo::new()),
439
+ system: system::SystemInfo::get(),
428
440
  }))
429
441
  }
430
442
 
@@ -439,6 +451,7 @@ impl Database for DatabaseKv {
439
451
  .map_err(WorkflowError::PoolsGeneric)?
440
452
  .subscribe(&subjects::convert(subject))
441
453
  .await
454
+ .context("failed to subscribe to bump sub")
442
455
  .map_err(|x| WorkflowError::CreateSubscription(x.into()))?;
443
456
 
444
457
  let stream = async_stream::stream! {
@@ -466,9 +479,10 @@ impl Database for DatabaseKv {
466
479
  .map_err(WorkflowError::PoolsGeneric)?
467
480
  .run(|tx| {
468
481
  async move {
482
+ let start = Instant::now();
469
483
  let now = rivet_util::timestamp::now();
470
484
 
471
- let mut last_ping_cache: Vec<(Id, i64)> = Vec::new();
485
+ let mut last_ping_cache = HashMap::<Id, i64>::new();
472
486
  let mut lost_worker_ids = HashSet::new();
473
487
  let mut expired_workflow_count = 0;
474
488
 
@@ -487,7 +501,16 @@ impl Database for DatabaseKv {
487
501
  Snapshot,
488
502
  );
489
503
 
490
- while let Some(lease_key_entry) = stream.try_next().await? {
504
+ loop {
505
+ if start.elapsed() > EARLY_TXN_TIMEOUT {
506
+ tracing::warn!("timed out processing expired leases");
507
+ break;
508
+ }
509
+
510
+ let Some(lease_key_entry) = stream.try_next().await? else {
511
+ break;
512
+ };
513
+
491
514
  let lease_key = self
492
515
  .subspace
493
516
  .unpack::<keys::workflow::LeaseKey>(lease_key_entry.key())?;
@@ -496,8 +519,8 @@ impl Database for DatabaseKv {
496
519
  let last_ping_ts_key = keys::worker::LastPingTsKey::new(worker_id);
497
520
 
498
521
  // Get last ping of worker for this lease
499
- let last_ping_ts = if let Some((_, last_ping_ts)) =
500
- last_ping_cache.iter().find(|(k, _)| k == &worker_id)
522
+ let last_ping_ts = if let Some(last_ping_ts) =
523
+ last_ping_cache.get(&worker_id)
501
524
  {
502
525
  *last_ping_ts
503
526
  } else if let Some(last_ping_entry) = tx
@@ -512,12 +535,12 @@ impl Database for DatabaseKv {
512
535
  let last_ping_ts = last_ping_ts_key.deserialize(&last_ping_entry)?;
513
536
 
514
537
  // Update cache
515
- last_ping_cache.push((worker_id, last_ping_ts));
538
+ last_ping_cache.insert(worker_id, last_ping_ts);
516
539
 
517
540
  last_ping_ts
518
541
  } else {
519
542
  // Update cache
520
- last_ping_cache.push((worker_id, 0));
543
+ last_ping_cache.insert(worker_id, 0);
521
544
 
522
545
  0
523
546
  };
@@ -563,10 +586,10 @@ impl Database for DatabaseKv {
563
586
 
564
587
  update_metric(
565
588
  &tx.with_subspace(self.subspace.clone()),
566
- Some(keys::metric::GaugeMetric::WorkflowActive(
589
+ Some(keys::metric::Metric::WorkflowActive(
567
590
  workflow_name.to_string(),
568
591
  )),
569
- Some(keys::metric::GaugeMetric::WorkflowSleeping(
592
+ Some(keys::metric::Metric::WorkflowSleeping(
570
593
  workflow_name.to_string(),
571
594
  )),
572
595
  );
@@ -583,6 +606,7 @@ impl Database for DatabaseKv {
583
606
  })
584
607
  .custom_instrument(tracing::info_span!("clear_expired_leases_tx"))
585
608
  .await
609
+ .context("failed to clear expired leases")
586
610
  .map_err(WorkflowError::Udb)?;
587
611
 
588
612
  if expired_workflow_count != 0 {
@@ -633,9 +657,12 @@ impl Database for DatabaseKv {
633
657
  })
634
658
  .custom_instrument(tracing::info_span!("acquire_lock_tx"))
635
659
  .await
660
+ .context("failed to acquire metrics lock")
636
661
  .map_err(WorkflowError::Udb)?;
637
662
 
638
663
  if acquired_lock {
664
+ metrics::WORKER_LAST_METRICS_PUBLISH.set(rivet_util::timestamp::now());
665
+
639
666
  let entries = self
640
667
  .pools
641
668
  .udb()
@@ -643,9 +670,8 @@ impl Database for DatabaseKv {
643
670
  .run(|tx| async move {
644
671
  let tx = tx.with_subspace(self.subspace.clone());
645
672
 
646
- let metrics_subspace = self
647
- .subspace
648
- .subspace(&keys::metric::GaugeMetricKey::subspace());
673
+ let metrics_subspace =
674
+ self.subspace.subspace(&keys::metric::MetricKey::subspace());
649
675
  tx.get_ranges_keyvalues(
650
676
  universaldb::RangeOption {
651
677
  mode: StreamingMode::WantAll,
@@ -654,7 +680,7 @@ impl Database for DatabaseKv {
654
680
  Serializable,
655
681
  )
656
682
  .map(|res| match res {
657
- Ok(entry) => tx.read_entry::<keys::metric::GaugeMetricKey>(&entry),
683
+ Ok(entry) => tx.read_entry::<keys::metric::MetricKey>(&entry),
658
684
  Err(err) => Err(err.into()),
659
685
  })
660
686
  .try_collect::<Vec<_>>()
@@ -662,82 +688,78 @@ impl Database for DatabaseKv {
662
688
  })
663
689
  .custom_instrument(tracing::info_span!("read_metrics_tx"))
664
690
  .await
691
+ .context("failed to read metrics")
665
692
  .map_err(WorkflowError::Udb)?;
666
693
 
667
- let mut total_workflow_counts: Vec<(String, usize)> = Vec::new();
694
+ let mut total_workflow_counts: Vec<(String, i64)> = Vec::new();
668
695
 
669
696
  for (key, count) in entries {
670
697
  match key.metric {
671
- keys::metric::GaugeMetric::WorkflowActive(workflow_name) => {
672
- metrics::WORKFLOW_ACTIVE.record(
673
- count as u64,
674
- &[KeyValue::new("workflow_name", workflow_name.clone())],
675
- );
698
+ keys::metric::Metric::WorkflowActive(workflow_name) => {
699
+ metrics::WORKFLOW_ACTIVE
700
+ .with_label_values(&[workflow_name.as_str()])
701
+ .set(count);
676
702
 
677
703
  if let Some(entry) = total_workflow_counts
678
704
  .iter_mut()
679
705
  .find(|(name, _)| name == &workflow_name)
680
706
  {
681
- entry.1 += 1;
707
+ entry.1 += count;
682
708
  } else {
683
- total_workflow_counts.push((workflow_name, 1));
709
+ total_workflow_counts.push((workflow_name, count));
684
710
  }
685
711
  }
686
- keys::metric::GaugeMetric::WorkflowSleeping(workflow_name) => {
687
- metrics::WORKFLOW_SLEEPING.record(
688
- count as u64,
689
- &[KeyValue::new("workflow_name", workflow_name.clone())],
690
- );
712
+ keys::metric::Metric::WorkflowSleeping(workflow_name) => {
713
+ metrics::WORKFLOW_SLEEPING
714
+ .with_label_values(&[workflow_name.as_str()])
715
+ .set(count);
691
716
 
692
717
  if let Some(entry) = total_workflow_counts
693
718
  .iter_mut()
694
719
  .find(|(name, _)| name == &workflow_name)
695
720
  {
696
- entry.1 += 1;
721
+ entry.1 += count;
697
722
  } else {
698
- total_workflow_counts.push((workflow_name, 1));
723
+ total_workflow_counts.push((workflow_name, count));
699
724
  }
700
725
  }
701
- keys::metric::GaugeMetric::WorkflowDead(workflow_name, error) => {
702
- metrics::WORKFLOW_DEAD.record(
703
- count as u64,
704
- &[
705
- KeyValue::new("workflow_name", workflow_name.clone()),
706
- KeyValue::new("error", error),
707
- ],
708
- );
726
+ keys::metric::Metric::WorkflowDead(workflow_name, error) => {
727
+ metrics::WORKFLOW_DEAD
728
+ .with_label_values(&[workflow_name.as_str(), error.as_str()])
729
+ .set(count);
709
730
 
710
731
  if let Some(entry) = total_workflow_counts
711
732
  .iter_mut()
712
733
  .find(|(name, _)| name == &workflow_name)
713
734
  {
714
- entry.1 += 1;
735
+ entry.1 += count;
715
736
  } else {
716
- total_workflow_counts.push((workflow_name, 1));
737
+ total_workflow_counts.push((workflow_name, count));
717
738
  }
718
739
  }
719
- keys::metric::GaugeMetric::WorkflowComplete(workflow_name) => {
740
+ keys::metric::Metric::WorkflowComplete(workflow_name) => {
720
741
  if let Some(entry) = total_workflow_counts
721
742
  .iter_mut()
722
743
  .find(|(name, _)| name == &workflow_name)
723
744
  {
724
- entry.1 += 1;
745
+ entry.1 += count;
725
746
  } else {
726
- total_workflow_counts.push((workflow_name, 1));
747
+ total_workflow_counts.push((workflow_name, count));
727
748
  }
728
749
  }
729
- keys::metric::GaugeMetric::SignalPending(signal_name) => {
750
+ keys::metric::Metric::SignalPending(_) => {}
751
+ keys::metric::Metric::SignalPending2(signal_name) => {
730
752
  metrics::SIGNAL_PENDING
731
- .record(count as u64, &[KeyValue::new("signal_name", signal_name)]);
753
+ .with_label_values(&[signal_name.as_str()])
754
+ .set(count);
732
755
  }
733
756
  }
734
757
  }
735
758
 
736
759
  for (workflow_name, count) in total_workflow_counts {
737
- metrics::WORKFLOW_TOTAL.record(
738
- count as u64,
739
- &[KeyValue::new("workflow_name", workflow_name.clone())],
740
- );
760
+ metrics::WORKFLOW_TOTAL
761
+ .with_label_values(&[workflow_name.as_str()])
762
+ .set(count);
741
763
  }
742
764
 
743
765
  // Clear lock
@@ -752,6 +774,7 @@ impl Database for DatabaseKv {
752
774
  })
753
775
  .custom_instrument(tracing::info_span!("clear_lock_tx"))
754
776
  .await
777
+ .context("failed to release metrics lock")
755
778
  .map_err(WorkflowError::Udb)?;
756
779
  }
757
780
 
@@ -759,11 +782,14 @@ impl Database for DatabaseKv {
759
782
  }
760
783
 
761
784
  #[tracing::instrument(skip_all)]
762
- async fn update_worker_ping(&self, worker_id: Id) -> WorkflowResult<()> {
763
- metrics::WORKER_LAST_PING.record(
764
- rivet_util::timestamp::now() as u64,
765
- &[KeyValue::new("worker_id", worker_id.to_string())],
766
- );
785
+ async fn update_worker_ping(
786
+ &self,
787
+ worker_id: Id,
788
+ update_active_idx: bool,
789
+ ) -> WorkflowResult<()> {
790
+ metrics::WORKER_LAST_PING
791
+ .with_label_values(&[&worker_id.to_string()])
792
+ .set(rivet_util::timestamp::now());
767
793
 
768
794
  self.pools
769
795
  .udb()
@@ -774,24 +800,27 @@ impl Database for DatabaseKv {
774
800
  let last_ping_ts = rivet_util::timestamp::now();
775
801
  let last_ping_ts_key = keys::worker::LastPingTsKey::new(worker_id);
776
802
 
777
- if let Some(last_last_ping_ts) =
778
- tx.read_opt(&last_ping_ts_key, Serializable).await?
779
- {
780
- let active_worker_idx_key =
781
- keys::worker::ActiveWorkerIdxKey::new(last_last_ping_ts, worker_id);
782
- tx.delete(&active_worker_idx_key);
783
- }
784
-
785
803
  tx.write(&last_ping_ts_key, last_ping_ts)?;
786
804
 
787
- let active_worker_idx_key =
788
- keys::worker::ActiveWorkerIdxKey::new(last_ping_ts, worker_id);
789
- tx.write(&active_worker_idx_key, ())?;
805
+ if update_active_idx {
806
+ if let Some(last_last_ping_ts) =
807
+ tx.read_opt(&last_ping_ts_key, Serializable).await?
808
+ {
809
+ let active_worker_idx_key =
810
+ keys::worker::ActiveWorkerIdxKey::new(last_last_ping_ts, worker_id);
811
+ tx.delete(&active_worker_idx_key);
812
+ }
813
+
814
+ let active_worker_idx_key =
815
+ keys::worker::ActiveWorkerIdxKey::new(last_ping_ts, worker_id);
816
+ tx.write(&active_worker_idx_key, ())?;
817
+ }
790
818
 
791
819
  Ok(())
792
820
  })
793
821
  .custom_instrument(tracing::info_span!("update_worker_ping_tx"))
794
822
  .await
823
+ .context("failed to update worker ping")
795
824
  .map_err(WorkflowError::Udb)?;
796
825
 
797
826
  Ok(())
@@ -817,6 +846,7 @@ impl Database for DatabaseKv {
817
846
  })
818
847
  .custom_instrument(tracing::info_span!("mark_worker_inactive_tx"))
819
848
  .await
849
+ .context("failed to mark worker inactive")
820
850
  .map_err(WorkflowError::Udb)?;
821
851
 
822
852
  Ok(())
@@ -850,6 +880,7 @@ impl Database for DatabaseKv {
850
880
  })
851
881
  .custom_instrument(tracing::info_span!("dispatch_workflow_tx"))
852
882
  .await
883
+ .context("failed to dispatch workflow")
853
884
  .map_err(WorkflowError::Udb)?;
854
885
 
855
886
  self.bump(BumpSubSubject::Worker);
@@ -869,6 +900,7 @@ impl Database for DatabaseKv {
869
900
  .map(|workflow_id| {
870
901
  let tx = tx.clone();
871
902
  async move {
903
+ let name_key = keys::workflow::NameKey::new(workflow_id);
872
904
  let input_key = keys::workflow::InputKey::new(workflow_id);
873
905
  let input_subspace = self.subspace.subspace(&input_key);
874
906
  let state_key = keys::workflow::StateKey::new(workflow_id);
@@ -880,11 +912,13 @@ impl Database for DatabaseKv {
880
912
 
881
913
  // Read input and output
882
914
  let (
915
+ name_entry,
883
916
  input_chunks,
884
917
  state_chunks,
885
918
  output_chunks,
886
919
  has_wake_condition_entry,
887
920
  ) = tokio::try_join!(
921
+ tx.get(&self.subspace.pack(&name_key), Serializable),
888
922
  tx.get_ranges_keyvalues(
889
923
  universaldb::RangeOption {
890
924
  mode: StreamingMode::WantAll,
@@ -934,6 +968,9 @@ impl Database for DatabaseKv {
934
968
 
935
969
  Ok(Some(WorkflowData {
936
970
  workflow_id,
971
+ name: name_key.deserialize(
972
+ &name_entry.context("name key should exist")?,
973
+ )?,
937
974
  input,
938
975
  state,
939
976
  output,
@@ -951,11 +988,12 @@ impl Database for DatabaseKv {
951
988
  })
952
989
  .custom_instrument(tracing::info_span!("get_workflow_tx"))
953
990
  .await
991
+ .context("failed to get workflows")
954
992
  .map_err(WorkflowError::Udb)
955
993
  }
956
994
 
957
995
  /// Returns the first incomplete workflow with the given name and tags, first meaning the one with the
958
- /// lowest id value (interpreted as u128) because its in a KV store. There is no way to get any other
996
+ /// lowest id value (by internal representation) because its in a KV store. There is no way to get any other
959
997
  /// workflow besides the first.
960
998
  #[tracing::instrument(skip_all, fields(%workflow_name))]
961
999
  async fn find_workflow(
@@ -972,17 +1010,45 @@ impl Database for DatabaseKv {
972
1010
  .run(|tx| async move { self.find_workflow_inner(workflow_name, tags, &tx).await })
973
1011
  .custom_instrument(tracing::info_span!("find_workflow_tx"))
974
1012
  .await
1013
+ .context("failed to find workflow")
975
1014
  .map_err(WorkflowError::Udb)?;
976
1015
 
977
1016
  let dt = start_instant.elapsed().as_secs_f64();
978
- metrics::FIND_WORKFLOWS_DURATION.record(
979
- dt,
980
- &[KeyValue::new("workflow_name", workflow_name.to_string())],
981
- );
1017
+ metrics::FIND_WORKFLOWS_DURATION
1018
+ .with_label_values(&[workflow_name])
1019
+ .observe(dt);
982
1020
 
983
1021
  Ok(workflow_id)
984
1022
  }
985
1023
 
1024
+ #[tracing::instrument(skip_all)]
1025
+ async fn find_workflows(
1026
+ &self,
1027
+ queries: &[(&str, serde_json::Value)],
1028
+ ) -> WorkflowResult<Vec<Option<Id>>> {
1029
+ let start_instant = Instant::now();
1030
+
1031
+ let workflow_ids = self
1032
+ .pools
1033
+ .udb()
1034
+ .map_err(WorkflowError::PoolsGeneric)?
1035
+ .run(|tx| async move {
1036
+ let futures = queries.iter().map(|(workflow_name, tags)| {
1037
+ self.find_workflow_inner(workflow_name, tags, &tx)
1038
+ });
1039
+ try_join_all(futures).await
1040
+ })
1041
+ .custom_instrument(tracing::info_span!("find_workflows_batch_tx"))
1042
+ .await
1043
+ .context("failed to find workflows")
1044
+ .map_err(WorkflowError::Udb)?;
1045
+
1046
+ let dt = start_instant.elapsed().as_secs_f64();
1047
+ metrics::FIND_WORKFLOWS_BATCH_DURATION.observe(dt);
1048
+
1049
+ Ok(workflow_ids)
1050
+ }
1051
+
986
1052
  #[tracing::instrument(skip_all)]
987
1053
  async fn pull_workflows(
988
1054
  &self,
@@ -1006,16 +1072,28 @@ impl Database for DatabaseKv {
1006
1072
  let tx = tx.with_subspace(self.subspace.clone());
1007
1073
  let now = rivet_util::timestamp::now();
1008
1074
 
1009
- // All wake conditions with a timestamp after this timestamp will be pulled
1075
+ // All wake conditions with a timestamp before this timestamp will be pulled
1010
1076
  let pull_before = now + i64::try_from(self.worker_poll_interval().as_millis())?;
1011
1077
  // Only consider workers that have pinged within 2 ping intervals ago
1012
1078
  let active_workers_after = now - i64::try_from(PING_INTERVAL.as_millis() * 2)?;
1013
1079
 
1014
- // Determine load shedding ratio based on linear mapping on cpu usage. We will gradually
1015
- // pull less workflows as the cpu usage increases
1016
- let cpu_usage = { self.system.lock().await.cpu_usage() };
1017
- let load_shed_ratio_x1000 =
1018
- calc_pull_ratio((cpu_usage.max(100.0) * 10.0) as u64, 500, 1000, 800, 100);
1080
+ let cpu_usage_ratio = {
1081
+ self.system
1082
+ .lock()
1083
+ .await
1084
+ .cpu_usage_ratio(self.config.runtime.worker_cpu_max)
1085
+ };
1086
+ let load_shed_curve = self.config.runtime.worker_load_shedding_curve();
1087
+ let load_shed_ratio_x1000 = calc_pull_ratio(
1088
+ (cpu_usage_ratio * 1000.0) as u64,
1089
+ load_shed_curve[0].0,
1090
+ load_shed_curve[0].1,
1091
+ load_shed_curve[1].0,
1092
+ load_shed_curve[1].1,
1093
+ );
1094
+
1095
+ // Record load shedding ratio metric
1096
+ metrics::LOAD_SHEDDING_RATIO.observe(load_shed_ratio_x1000 as f64 / 1000.0);
1019
1097
 
1020
1098
  let active_worker_subspace_start = tx.pack(
1021
1099
  &keys::worker::ActiveWorkerIdxKey::subspace(active_workers_after),
@@ -1042,32 +1120,53 @@ impl Database for DatabaseKv {
1042
1120
  Ok(key.worker_id)
1043
1121
  })
1044
1122
  .try_collect::<Vec<_>>(),
1045
- futures_util::stream::iter(owned_filter)
1046
- .map(|wf_name| {
1047
- let wake_subspace_start = end_of_key_range(&tx.pack(
1048
- &keys::wake::WorkflowWakeConditionKey::subspace_without_ts(
1049
- wf_name.clone(),
1050
- ),
1051
- ));
1052
- let wake_subspace_end =
1053
- tx.pack(&keys::wake::WorkflowWakeConditionKey::subspace(
1054
- wf_name,
1055
- pull_before,
1123
+ async {
1124
+ let start = Instant::now();
1125
+ let mut buffer = Vec::new();
1126
+ let mut stream = futures_util::stream::iter(owned_filter)
1127
+ .map(|wf_name| {
1128
+ let wake_subspace_start = end_of_key_range(&tx.pack(
1129
+ &keys::wake::WorkflowWakeConditionKey::subspace_without_ts(
1130
+ wf_name.clone(),
1131
+ ),
1056
1132
  ));
1133
+ let wake_subspace_end =
1134
+ tx.pack(&keys::wake::WorkflowWakeConditionKey::subspace(
1135
+ wf_name,
1136
+ pull_before,
1137
+ ));
1057
1138
 
1058
- tx.get_ranges_keyvalues(
1059
- universaldb::RangeOption {
1060
- mode: StreamingMode::WantAll,
1061
- ..(wake_subspace_start, wake_subspace_end).into()
1062
- },
1063
- // This is Snapshot to reduce contention with any new wake conditions
1064
- // being inserted. Conflicts are handled by workflow leases.
1065
- Snapshot,
1066
- )
1067
- })
1068
- .flatten()
1069
- .map(|res| tx.unpack::<keys::wake::WorkflowWakeConditionKey>(res?.key()))
1070
- .try_collect::<Vec<_>>(),
1139
+ tx.get_ranges_keyvalues(
1140
+ universaldb::RangeOption {
1141
+ mode: StreamingMode::WantAll,
1142
+ ..(wake_subspace_start, wake_subspace_end).into()
1143
+ },
1144
+ // This is Snapshot to reduce contention with any new wake conditions
1145
+ // being inserted. Conflicts are handled by workflow leases.
1146
+ Snapshot,
1147
+ )
1148
+ })
1149
+ .flatten()
1150
+ .map(|res| {
1151
+ tx.unpack::<keys::wake::WorkflowWakeConditionKey>(res?.key())
1152
+ });
1153
+
1154
+ loop {
1155
+ if start.elapsed() > EARLY_TXN_TIMEOUT {
1156
+ tracing::warn!("timed out pulling wake conditions");
1157
+ break;
1158
+ }
1159
+
1160
+ let Some(wake_key) = stream.try_next().await? else {
1161
+ break;
1162
+ };
1163
+
1164
+ buffer.push(wake_key);
1165
+ }
1166
+
1167
+ anyhow::Ok(buffer)
1168
+ }
1169
+ .custom_instrument(tracing::debug_span!("read_wake_conditions"))
1071
1170
  )?;
1072
1171
 
1073
1172
  // Sort for consistency across all workers
@@ -1089,70 +1188,94 @@ impl Database for DatabaseKv {
1089
1188
 
1090
1189
  0
1091
1190
  };
1092
- let active_worker_count = active_worker_ids.len() as u64;
1191
+ let active_worker_count = active_worker_ids.len().max(1) as u64;
1093
1192
 
1094
1193
  // Collect name and deadline ts for each wf id
1095
- let mut dedup_workflows: Vec<MinimalPulledWorkflow> = Vec::new();
1194
+ let mut dedup_workflows = HashMap::<Id, MinimalPulledWorkflow>::new();
1195
+ let now = rivet_util::timestamp::now(); // More up to date now than prev var
1096
1196
  for wake_key in &wake_keys {
1097
- if let Some(wf) = dedup_workflows
1098
- .iter_mut()
1099
- .find(|wf| wf.workflow_id == wake_key.workflow_id)
1100
- {
1101
- let key_wake_deadline_ts = wake_key.condition.deadline_ts();
1197
+ // Record time difference between when the wake condition was created and when it was
1198
+ // pulled (here). We ignore deadline wake conditions because their ts value is not
1199
+ // representative of when it was created, but rather when it should wake.
1200
+ if !matches!(
1201
+ wake_key.condition,
1202
+ keys::wake::WakeCondition::Deadline { .. }
1203
+ ) {
1204
+ // TODO: This will record metrics even if the txn fails, which is wrong
1205
+ metrics::WORKFLOW_WAKE_DELTA_DURATION
1206
+ .with_label_values(&[&wake_key.workflow_name])
1207
+ .observe(now.saturating_sub(wake_key.ts).max(0) as f64 / 1000.0);
1208
+ }
1102
1209
 
1103
- // Update wake condition ts if earlier
1104
- if wake_key.ts < wf.earliest_wake_condition_ts {
1105
- wf.earliest_wake_condition_ts = wake_key.ts;
1106
- }
1210
+ let Some(wf) = dedup_workflows.get_mut(&wake_key.workflow_id) else {
1211
+ dedup_workflows.insert(
1212
+ wake_key.workflow_id,
1213
+ MinimalPulledWorkflow {
1214
+ workflow_id: wake_key.workflow_id,
1215
+ workflow_name: wake_key.workflow_name.clone(),
1216
+ wake_deadline_ts: wake_key.condition.deadline_ts(),
1217
+ earliest_wake_condition_ts: wake_key.ts,
1218
+ },
1219
+ );
1107
1220
 
1108
- // Update wake deadline ts if earlier
1109
- if wf.wake_deadline_ts.is_none()
1110
- || key_wake_deadline_ts < wf.wake_deadline_ts
1111
- {
1112
- wf.wake_deadline_ts = key_wake_deadline_ts;
1221
+ // Hard limit of 10k deduped workflows, this gets further limited to 1000 at
1222
+ // `assigned_workflows`
1223
+ if dedup_workflows.len() >= 10000 {
1224
+ break;
1113
1225
  }
1114
1226
 
1115
1227
  continue;
1228
+ };
1229
+
1230
+ let key_wake_deadline_ts = wake_key.condition.deadline_ts();
1231
+
1232
+ // Update wake condition ts if earlier
1233
+ if wake_key.ts < wf.earliest_wake_condition_ts {
1234
+ wf.earliest_wake_condition_ts = wake_key.ts;
1116
1235
  }
1117
1236
 
1118
- dedup_workflows.push(MinimalPulledWorkflow {
1119
- workflow_id: wake_key.workflow_id,
1120
- workflow_name: wake_key.workflow_name.clone(),
1121
- wake_deadline_ts: wake_key.condition.deadline_ts(),
1122
- earliest_wake_condition_ts: wake_key.ts,
1123
- });
1237
+ // Update wake deadline ts if earlier
1238
+ if wf.wake_deadline_ts.is_none()
1239
+ || key_wake_deadline_ts < wf.wake_deadline_ts
1240
+ {
1241
+ wf.wake_deadline_ts = key_wake_deadline_ts;
1242
+ }
1124
1243
  }
1125
1244
 
1126
1245
  // Filter workflows in a way that spreads all current pending workflows across all active
1127
1246
  // workers evenly
1128
- let assigned_workflows = dedup_workflows.into_iter().filter(|wf| {
1129
- let mut hasher = DefaultHasher::new();
1130
-
1131
- // Earliest wake condition ts is consistent for hashing purposes because when it
1132
- // changes it means a worker has leased it
1133
- wf.earliest_wake_condition_ts.hash(&mut hasher);
1134
- let wf_hash = hasher.finish();
1135
-
1136
- let pseudorandom_value_x1000 = {
1137
- // Add a little pizazz to the hash so its a different number than wf_hash but
1138
- // still consistent
1139
- 1234i32.hash(&mut hasher);
1140
- hasher.finish() % 1000 // 0-1000
1141
- };
1142
-
1143
- if pseudorandom_value_x1000 < load_shed_ratio_x1000 {
1144
- return false;
1145
- }
1247
+ let assigned_workflows = dedup_workflows
1248
+ .into_values()
1249
+ .filter(|wf| {
1250
+ let mut hasher = DefaultHasher::new();
1251
+
1252
+ // Earliest wake condition ts is consistent for hashing purposes because when it
1253
+ // changes it means a worker has leased it
1254
+ wf.earliest_wake_condition_ts.hash(&mut hasher);
1255
+ let wf_hash = hasher.finish();
1256
+
1257
+ let pseudorandom_value_x1000 = {
1258
+ // Add a little pizazz to the hash so its a different number than wf_hash but
1259
+ // still consistent
1260
+ 1234i32.hash(&mut hasher);
1261
+ hasher.finish() % 1000 // 0-1000
1262
+ };
1263
+
1264
+ if pseudorandom_value_x1000 > load_shed_ratio_x1000 {
1265
+ return false;
1266
+ }
1146
1267
 
1147
- let wf_worker_idx = wf_hash % active_worker_count;
1268
+ let wf_worker_idx = wf_hash % active_worker_count;
1148
1269
 
1149
- // Every worker pulls workflows that has to the current worker as well as the next
1150
- // worker for redundancy. this results in increased txn conflicts but less chance of
1151
- // orphaned workflows
1152
- let next_worker_idx = (current_worker_idx + 1) % active_worker_count;
1270
+ // Every worker pulls workflows that match the current worker idx as well as the next
1271
+ // worker for redundancy. this results in increased txn conflicts but less chance of
1272
+ // orphaned workflows
1273
+ let next_worker_idx = (current_worker_idx + 1) % active_worker_count;
1153
1274
 
1154
- wf_worker_idx == current_worker_idx || wf_worker_idx == next_worker_idx
1155
- });
1275
+ wf_worker_idx == current_worker_idx || wf_worker_idx == next_worker_idx
1276
+ })
1277
+ // Hard limit of 1000 workflows per pull
1278
+ .take(1000);
1156
1279
 
1157
1280
  // Check leases
1158
1281
  let leased_workflows = futures_util::stream::iter(assigned_workflows)
@@ -1174,10 +1297,10 @@ impl Database for DatabaseKv {
1174
1297
 
1175
1298
  update_metric(
1176
1299
  &tx,
1177
- Some(keys::metric::GaugeMetric::WorkflowSleeping(
1300
+ Some(keys::metric::Metric::WorkflowSleeping(
1178
1301
  wf.workflow_name.clone(),
1179
1302
  )),
1180
- Some(keys::metric::GaugeMetric::WorkflowActive(
1303
+ Some(keys::metric::Metric::WorkflowActive(
1181
1304
  wf.workflow_name.clone(),
1182
1305
  )),
1183
1306
  );
@@ -1190,7 +1313,7 @@ impl Database for DatabaseKv {
1190
1313
  .buffer_unordered(1024)
1191
1314
  .try_filter_map(|x| std::future::ready(Ok(x)))
1192
1315
  .try_collect::<Vec<_>>()
1193
- .instrument(tracing::trace_span!("map_to_leased_workflows"))
1316
+ .custom_instrument(tracing::debug_span!("map_to_leased_workflows"))
1194
1317
  .await?;
1195
1318
 
1196
1319
  // Clear all wake conditions from workflows that we have leased
@@ -1205,49 +1328,6 @@ impl Database for DatabaseKv {
1205
1328
  tx.delete(wake_key);
1206
1329
  }
1207
1330
 
1208
- let leased_workflow_ids = leased_workflows
1209
- .iter()
1210
- .map(|wf| wf.workflow_id)
1211
- .collect::<Vec<_>>();
1212
-
1213
- // Clear secondary indexes so that we don't get any new wake conditions inserted while
1214
- // the workflow is running
1215
- futures_util::stream::iter(leased_workflow_ids)
1216
- .map(|workflow_id| {
1217
- let tx = tx.clone();
1218
- async move {
1219
- // Clear sub workflow secondary idx
1220
- let wake_sub_workflow_key =
1221
- keys::workflow::WakeSubWorkflowKey::new(workflow_id);
1222
- if let Some(entry) = tx
1223
- .get(&self.subspace.pack(&wake_sub_workflow_key), Serializable)
1224
- .await?
1225
- {
1226
- let sub_workflow_id =
1227
- wake_sub_workflow_key.deserialize(&entry)?;
1228
-
1229
- let sub_workflow_wake_key = keys::wake::SubWorkflowWakeKey::new(
1230
- sub_workflow_id,
1231
- workflow_id,
1232
- );
1233
-
1234
- tx.clear(&self.subspace.pack(&sub_workflow_wake_key));
1235
- }
1236
-
1237
- // Clear signals secondary index
1238
- let wake_signals_subspace = self.subspace.subspace(
1239
- &keys::workflow::WakeSignalKey::subspace(workflow_id),
1240
- );
1241
- tx.clear_subspace_range(&wake_signals_subspace);
1242
-
1243
- anyhow::Ok(())
1244
- }
1245
- })
1246
- // TODO: How to get rid of this buffer?
1247
- .buffer_unordered(1024)
1248
- .try_collect::<()>()
1249
- .await?;
1250
-
1251
1331
  // NOTE: We don't read any workflow data in this txn since its only for acquiring leases.
1252
1332
  // The less operations we do in this txn the less contention there is with other workers.
1253
1333
  Ok(leased_workflows)
@@ -1255,14 +1335,17 @@ impl Database for DatabaseKv {
1255
1335
  })
1256
1336
  .custom_instrument(tracing::info_span!("pull_workflows_tx"))
1257
1337
  .await
1338
+ .context("failed to lease workflows")
1258
1339
  .map_err(WorkflowError::Udb)?;
1259
1340
 
1260
1341
  let worker_id_str = worker_id.to_string();
1261
1342
  let dt = start_instant.elapsed().as_secs_f64();
1262
1343
  metrics::LAST_PULL_WORKFLOWS_DURATION
1263
- .record(dt, &[KeyValue::new("worker_id", worker_id_str.clone())]);
1344
+ .with_label_values(&[worker_id_str.as_str()])
1345
+ .set(dt);
1264
1346
  metrics::PULL_WORKFLOWS_DURATION
1265
- .record(dt, &[KeyValue::new("worker_id", worker_id_str.clone())]);
1347
+ .with_label_values(&[worker_id_str.as_str()])
1348
+ .observe(dt);
1266
1349
 
1267
1350
  if leased_workflows.is_empty() {
1268
1351
  return Ok(Vec::new());
@@ -1270,7 +1353,81 @@ impl Database for DatabaseKv {
1270
1353
 
1271
1354
  let start_instant2 = Instant::now();
1272
1355
 
1273
- let pulled_workflows = self
1356
+ // Collect metrics on lease counts
1357
+ let mut leased_metrics = HashMap::<&str, usize>::new();
1358
+ for leased_workflow in &leased_workflows {
1359
+ *leased_metrics
1360
+ .entry(leased_workflow.workflow_name.as_str())
1361
+ .or_default() += 1;
1362
+ }
1363
+
1364
+ for (workflow_name, count) in &leased_metrics {
1365
+ metrics::WORKFLOW_LEASED
1366
+ .with_label_values(&[*workflow_name])
1367
+ .observe(*count as f64);
1368
+ }
1369
+
1370
+ let leased_workflow_ids = leased_workflows
1371
+ .iter()
1372
+ .map(|wf| wf.workflow_id)
1373
+ .collect::<Vec<_>>();
1374
+ let clear_secondary_idx_fut = async move {
1375
+ self.pools
1376
+ .udb()
1377
+ .map_err(WorkflowError::PoolsGeneric)?
1378
+ .run(|tx| {
1379
+ let leased_workflow_ids = leased_workflow_ids.clone();
1380
+
1381
+ async move {
1382
+ // Clear secondary indexes so that we don't get any new wake conditions inserted while
1383
+ // the workflow is running
1384
+ futures_util::stream::iter(leased_workflow_ids)
1385
+ .map(|workflow_id| {
1386
+ let tx = tx.clone();
1387
+ async move {
1388
+ // Clear sub workflow secondary idx
1389
+ let wake_sub_workflow_key =
1390
+ keys::workflow::WakeSubWorkflowKey::new(workflow_id);
1391
+ // NOTE: Snapshot read because we prefer having this txn not conflict rather
1392
+ // than unnecessarily insert wake conditions
1393
+ if let Some(entry) = tx
1394
+ .get(&self.subspace.pack(&wake_sub_workflow_key), Snapshot)
1395
+ .await?
1396
+ {
1397
+ let sub_workflow_id =
1398
+ wake_sub_workflow_key.deserialize(&entry)?;
1399
+
1400
+ let sub_workflow_wake_key =
1401
+ keys::wake::SubWorkflowWakeKey::new(
1402
+ sub_workflow_id,
1403
+ workflow_id,
1404
+ );
1405
+
1406
+ tx.clear(&self.subspace.pack(&sub_workflow_wake_key));
1407
+ }
1408
+
1409
+ // Clear signals secondary index
1410
+ let wake_signals_subspace = self.subspace.subspace(
1411
+ &keys::workflow::WakeSignalKey::subspace(workflow_id),
1412
+ );
1413
+ tx.clear_subspace_range(&wake_signals_subspace);
1414
+
1415
+ anyhow::Ok(())
1416
+ }
1417
+ })
1418
+ // TODO: How to get rid of this buffer?
1419
+ .buffer_unordered(1024)
1420
+ .try_collect::<()>()
1421
+ .await
1422
+ }
1423
+ })
1424
+ .custom_instrument(tracing::info_span!("clear_workflow_secondary_idx_tx"))
1425
+ .await
1426
+ .context("failed to clear workflow secondary indexes")
1427
+ };
1428
+
1429
+ let pulled_workflows_fut = async move {
1430
+ self
1274
1431
  .pools
1275
1432
  .udb()
1276
1433
  .map_err(WorkflowError::PoolsGeneric)?
@@ -1288,8 +1445,10 @@ impl Database for DatabaseKv {
1288
1445
  let ray_id_key = keys::workflow::RayIdKey::new(wf.workflow_id);
1289
1446
  let input_key = keys::workflow::InputKey::new(wf.workflow_id);
1290
1447
  let state_key = keys::workflow::StateKey::new(wf.workflow_id);
1448
+ let output_key = keys::workflow::OutputKey::new(wf.workflow_id);
1291
1449
  let input_subspace = self.subspace.subspace(&input_key);
1292
1450
  let state_subspace = self.subspace.subspace(&state_key);
1451
+ let output_subspace = self.subspace.subspace(&output_key);
1293
1452
  let active_history_subspace = self.subspace.subspace(
1294
1453
  &keys::history::HistorySubspaceKey::new(
1295
1454
  wf.workflow_id,
@@ -1302,36 +1461,39 @@ impl Database for DatabaseKv {
1302
1461
  ray_id_entry,
1303
1462
  input_chunks,
1304
1463
  state_chunks,
1464
+ has_output,
1305
1465
  events,
1306
1466
  ) = tokio::try_join!(
1307
- async {
1308
- tx.get(&self.subspace.pack(&create_ts_key), Serializable)
1309
- .await
1310
- },
1311
- async {
1312
- tx.get(&self.subspace.pack(&ray_id_key), Serializable).await
1313
- },
1314
- async {
1315
- tx.get_ranges_keyvalues(
1316
- universaldb::RangeOption {
1317
- mode: StreamingMode::WantAll,
1318
- ..(&input_subspace).into()
1319
- },
1320
- Serializable,
1321
- )
1322
- .try_collect::<Vec<_>>()
1323
- .await
1324
- },
1467
+ tx.get(&self.subspace.pack(&create_ts_key), Serializable),
1468
+ tx.get(&self.subspace.pack(&ray_id_key), Serializable),
1469
+ tx.get_ranges_keyvalues(
1470
+ universaldb::RangeOption {
1471
+ mode: StreamingMode::WantAll,
1472
+ ..(&input_subspace).into()
1473
+ },
1474
+ Serializable,
1475
+ )
1476
+ .try_collect::<Vec<_>>(),
1477
+ tx.get_ranges_keyvalues(
1478
+ universaldb::RangeOption {
1479
+ mode: StreamingMode::WantAll,
1480
+ ..(&state_subspace).into()
1481
+ },
1482
+ Serializable,
1483
+ )
1484
+ .try_collect::<Vec<_>>(),
1325
1485
  async {
1326
1486
  tx.get_ranges_keyvalues(
1327
1487
  universaldb::RangeOption {
1328
- mode: StreamingMode::WantAll,
1329
- ..(&state_subspace).into()
1488
+ mode: StreamingMode::Exact,
1489
+ limit: Some(1),
1490
+ ..(&output_subspace).into()
1330
1491
  },
1331
1492
  Serializable,
1332
1493
  )
1333
- .try_collect::<Vec<_>>()
1494
+ .try_next()
1334
1495
  .await
1496
+ .map(|entry| entry.is_some())
1335
1497
  },
1336
1498
  async {
1337
1499
  let mut events_by_location: HashMap<Location, Vec<Event>> =
@@ -1363,7 +1525,7 @@ impl Database for DatabaseKv {
1363
1525
  if current_event.location.is_empty() {
1364
1526
  current_event =
1365
1527
  WorkflowHistoryEventBuilder::new(
1366
- partial_key.location,
1528
+ partial_key.location.clone(),
1367
1529
  );
1368
1530
  } else {
1369
1531
  // Insert current event builder to into wf events and
@@ -1371,13 +1533,24 @@ impl Database for DatabaseKv {
1371
1533
  let previous_event = std::mem::replace(
1372
1534
  &mut current_event,
1373
1535
  WorkflowHistoryEventBuilder::new(
1374
- partial_key.location,
1536
+ partial_key.location.clone(),
1375
1537
  ),
1376
1538
  );
1377
- events_by_location
1378
- .entry(previous_event.location.root())
1379
- .or_default()
1380
- .push(Event::try_from(previous_event)?);
1539
+
1540
+ let loc = previous_event.location.clone();
1541
+
1542
+ match Event::try_from(previous_event) {
1543
+ Ok(event) => {
1544
+ events_by_location
1545
+ .entry(loc.root())
1546
+ .or_default()
1547
+ .push(event);
1548
+ }
1549
+ Err(err) => {
1550
+ tracing::error!(workflow_id=?wf.workflow_id, location=?loc, ?err, "failed building workflow history");
1551
+ return Ok(None);
1552
+ }
1553
+ }
1381
1554
  }
1382
1555
  }
1383
1556
 
@@ -1477,26 +1650,111 @@ impl Database for DatabaseKv {
1477
1650
 
1478
1651
  current_event.inner_event_type =
1479
1652
  Some(inner_event_type);
1653
+ } else if let Ok(key) = self
1654
+ .subspace
1655
+ .unpack::<keys::history::InnerVersionKey>(
1656
+ entry.key(),
1657
+ ) {
1658
+ let inner_version =
1659
+ key.deserialize(entry.value())?;
1660
+
1661
+ current_event.inner_version = Some(inner_version);
1662
+ } else if let Ok(key) = self
1663
+ .subspace
1664
+ .unpack::<keys::history::IndexedNameKey>(
1665
+ entry.key(),
1666
+ ) {
1667
+ if current_event.indexed_names.len() != key.index {
1668
+ tracing::error!(
1669
+ ?wf,
1670
+ location=?partial_key.location,
1671
+ expected=%current_event.indexed_names.len(),
1672
+ got=%key.index,
1673
+ "corrupt history, indexed name doesn't exist yet or is out of order"
1674
+ );
1675
+ return Ok(None);
1676
+ }
1677
+
1678
+ let name = key.deserialize(entry.value())?;
1679
+ current_event.indexed_names.push(name);
1680
+ } else if let Ok(key) =
1681
+ self.subspace
1682
+ .unpack::<keys::history::IndexedInputChunkKey>(
1683
+ entry.key(),
1684
+ ) {
1685
+ if let Some(input_chunks) = current_event
1686
+ .indexed_input_chunks
1687
+ .get_mut(key.index)
1688
+ {
1689
+ input_chunks.push(entry);
1690
+ } else if current_event.indexed_input_chunks.len()
1691
+ == key.index
1692
+ {
1693
+ current_event
1694
+ .indexed_input_chunks
1695
+ .push(vec![entry]);
1696
+ } else {
1697
+ tracing::error!(
1698
+ ?wf,
1699
+ location=?partial_key.location,
1700
+ expected=%current_event.indexed_input_chunks.len(),
1701
+ got=%key.index,
1702
+ "corrupt history, indexed chunk doesn't exist yet or is out of order"
1703
+ );
1704
+ return Ok(None);
1705
+ }
1480
1706
  }
1481
1707
 
1482
1708
  // We ignore keys we don't need (like tags)
1483
1709
  }
1484
1710
  // Insert final event
1485
1711
  if !current_event.location.is_empty() {
1486
- events_by_location
1487
- .entry(current_event.location.root())
1488
- .or_default()
1489
- .push(Event::try_from(current_event)?);
1712
+ let loc = current_event.location.clone();
1713
+
1714
+ match Event::try_from(current_event) {
1715
+ Ok(event) => {
1716
+ events_by_location
1717
+ .entry(loc.root())
1718
+ .or_default()
1719
+ .push(event);
1720
+ }
1721
+ Err(err) => {
1722
+ tracing::error!(workflow_id=?wf.workflow_id, location=?loc, ?err, "failed building workflow history");
1723
+ return Ok(None);
1724
+ }
1725
+ }
1490
1726
  }
1491
1727
 
1492
- Ok(events_by_location)
1728
+ Ok(Some(events_by_location))
1493
1729
  }
1494
1730
  )?;
1495
1731
 
1732
+ if has_output {
1733
+ tracing::warn!(workflow_id=?wf.workflow_id, "workflow already completed, ignoring");
1734
+
1735
+ // Clear lease
1736
+ let lease_key = keys::workflow::LeaseKey::new(wf.workflow_id);
1737
+ tx.clear(&self.subspace.pack(&lease_key));
1738
+ let worker_id_key =
1739
+ keys::workflow::WorkerIdKey::new(wf.workflow_id);
1740
+ tx.clear(&self.subspace.pack(&worker_id_key));
1741
+
1742
+ return Ok(None);
1743
+ }
1744
+
1745
+ let Some(events) = events else {
1746
+ return Ok(None);
1747
+ };
1748
+
1749
+ let (Some(create_ts_entry), Some(ray_id_entry)) = (create_ts_entry, ray_id_entry) else {
1750
+ tracing::error!(workflow_id=?wf.workflow_id, "create_ts and ray_id keys should exist");
1751
+ return Ok(None);
1752
+ };
1753
+
1496
1754
  let create_ts = create_ts_key
1497
- .deserialize(&create_ts_entry.context("key should exist")?)?;
1755
+ .deserialize(&create_ts_entry)?;
1498
1756
  let ray_id = ray_id_key
1499
- .deserialize(&ray_id_entry.context("key should exist")?)?;
1757
+ .deserialize(&ray_id_entry)?;
1500
1758
  let input = input_key.combine(input_chunks)?;
1501
1759
  let state = if state_chunks.is_empty() {
1502
1760
  serde_json::value::RawValue::NULL.to_owned()
@@ -1504,7 +1762,7 @@ impl Database for DatabaseKv {
1504
1762
  state_key.combine(state_chunks)?
1505
1763
  };
1506
1764
 
1507
- Result::<_>::Ok(PulledWorkflowData {
1765
+ anyhow::Ok(Some(PulledWorkflowData {
1508
1766
  workflow_id: wf.workflow_id,
1509
1767
  workflow_name: wf.workflow_name,
1510
1768
  create_ts,
@@ -1513,11 +1771,12 @@ impl Database for DatabaseKv {
1513
1771
  state,
1514
1772
  wake_deadline_ts: wf.wake_deadline_ts,
1515
1773
  events,
1516
- })
1774
+ }))
1517
1775
  }
1518
1776
  })
1519
1777
  // TODO: How to get rid of this buffer?
1520
1778
  .buffer_unordered(512)
1779
+ .try_filter_map(|x| std::future::ready(Ok(x)))
1521
1780
  .try_collect::<Vec<_>>()
1522
1781
  .instrument(tracing::trace_span!("map_to_partial_workflow"))
1523
1782
  .await
@@ -1525,20 +1784,27 @@ impl Database for DatabaseKv {
1525
1784
  })
1526
1785
  .custom_instrument(tracing::info_span!("pull_workflow_history_tx"))
1527
1786
  .await
1528
- .map_err(WorkflowError::Udb)?;
1787
+ .context("failed to pull workflow history")
1788
+ };
1789
+
1790
+ let (_, pulled_workflows) =
1791
+ tokio::try_join!(clear_secondary_idx_fut, pulled_workflows_fut,)
1792
+ .map_err(WorkflowError::Udb)?;
1529
1793
 
1530
1794
  let dt2 = start_instant2.elapsed().as_secs_f64();
1531
1795
  let dt = start_instant.elapsed().as_secs_f64();
1532
1796
  metrics::LAST_PULL_WORKFLOWS_FULL_DURATION
1533
- .record(dt, &[KeyValue::new("worker_id", worker_id_str.clone())]);
1797
+ .with_label_values(&[worker_id_str.as_str()])
1798
+ .set(dt);
1534
1799
  metrics::PULL_WORKFLOWS_FULL_DURATION
1535
- .record(dt, &[KeyValue::new("worker_id", worker_id_str.clone())]);
1536
- metrics::LAST_PULL_WORKFLOWS_HISTORY_DURATION.record(
1537
- dt2 as u64,
1538
- &[KeyValue::new("worker_id", worker_id_str.clone())],
1539
- );
1800
+ .with_label_values(&[worker_id_str.as_str()])
1801
+ .observe(dt);
1802
+ metrics::LAST_PULL_WORKFLOWS_HISTORY_DURATION
1803
+ .with_label_values(&[worker_id_str.as_str()])
1804
+ .set(dt2);
1540
1805
  metrics::PULL_WORKFLOWS_HISTORY_DURATION
1541
- .record(dt2, &[KeyValue::new("worker_id", worker_id_str.clone())]);
1806
+ .with_label_values(&[worker_id_str.as_str()])
1807
+ .observe(dt2);
1542
1808
 
1543
1809
  Ok(pulled_workflows)
1544
1810
  }
@@ -1549,15 +1815,18 @@ impl Database for DatabaseKv {
1549
1815
  workflow_id: Id,
1550
1816
  workflow_name: &str,
1551
1817
  output: &serde_json::value::RawValue,
1818
+ prune_variant: PruneVariant,
1552
1819
  ) -> WorkflowResult<()> {
1553
1820
  let start_instant = Instant::now();
1554
1821
 
1555
- let wrote_to_wake_idx = self
1822
+ let (wrote_to_wake_idx, pending_signal_cleared_count) = self
1556
1823
  .pools
1557
1824
  .udb()
1558
1825
  .map_err(WorkflowError::PoolsGeneric)?
1559
1826
  .run(|tx| {
1560
1827
  async move {
1828
+ let tx = tx.with_subspace(self.subspace.clone());
1829
+
1561
1830
  let sub_workflow_wake_subspace = self
1562
1831
  .subspace
1563
1832
  .subspace(&keys::wake::SubWorkflowWakeKey::subspace(workflow_id));
@@ -1575,34 +1844,29 @@ impl Database for DatabaseKv {
1575
1844
  Serializable,
1576
1845
  );
1577
1846
 
1578
- let (wrote_to_wake_idx, tag_keys, wake_deadline_entry) = tokio::try_join!(
1847
+ let (wrote_to_wake_idx, tag_keys, wake_deadline) = tokio::try_join!(
1579
1848
  // Check for other workflows waiting on this one, wake all
1580
1849
  async {
1581
1850
  let mut wrote_to_wake_idx = false;
1582
1851
 
1583
1852
  while let Some(entry) = stream.try_next().await? {
1584
- let sub_workflow_wake_key =
1585
- self.subspace
1586
- .unpack::<keys::wake::SubWorkflowWakeKey>(&entry.key())?;
1587
- let workflow_name =
1588
- sub_workflow_wake_key.deserialize(entry.value())?;
1589
-
1590
- let wake_condition_key = keys::wake::WorkflowWakeConditionKey::new(
1591
- workflow_name,
1592
- sub_workflow_wake_key.workflow_id,
1593
- keys::wake::WakeCondition::SubWorkflow {
1594
- sub_workflow_id: workflow_id,
1595
- },
1596
- );
1853
+ let (sub_workflow_wake_key, workflow_name) =
1854
+ tx.read_entry::<keys::wake::SubWorkflowWakeKey>(&entry)?;
1597
1855
 
1598
1856
  // Add wake condition for workflow
1599
- tx.set(
1600
- &self.subspace.pack(&wake_condition_key),
1601
- &wake_condition_key.serialize(())?,
1602
- );
1857
+ tx.write(
1858
+ &keys::wake::WorkflowWakeConditionKey::new(
1859
+ workflow_name,
1860
+ sub_workflow_wake_key.workflow_id,
1861
+ keys::wake::WakeCondition::SubWorkflow {
1862
+ sub_workflow_id: workflow_id,
1863
+ },
1864
+ ),
1865
+ (),
1866
+ )?;
1603
1867
 
1604
1868
  // Clear secondary index
1605
- tx.clear(entry.key());
1869
+ tx.delete(&sub_workflow_wake_key);
1606
1870
 
1607
1871
  wrote_to_wake_idx = true;
1608
1872
  }
@@ -1618,32 +1882,27 @@ impl Database for DatabaseKv {
1618
1882
  Serializable,
1619
1883
  )
1620
1884
  .map(|res| {
1621
- self.subspace
1622
- .unpack::<keys::workflow::TagKey>(res?.key())
1885
+ tx.unpack::<keys::workflow::TagKey>(res?.key())
1623
1886
  .map_err(anyhow::Error::from)
1624
1887
  })
1625
1888
  .try_collect::<Vec<_>>(),
1626
- tx.get(&self.subspace.pack(&wake_deadline_key), Serializable),
1889
+ tx.read_opt(&wake_deadline_key, Serializable),
1627
1890
  )?;
1628
1891
 
1629
1892
  for key in tag_keys {
1630
- let by_name_and_tag_key = keys::workflow::ByNameAndTagKey::new(
1893
+ tx.delete(&keys::workflow::ByNameAndTagKey::new(
1631
1894
  workflow_name.to_string(),
1632
1895
  key.k,
1633
1896
  key.v,
1634
1897
  workflow_id,
1635
- );
1636
- tx.clear(&self.subspace.pack(&by_name_and_tag_key));
1898
+ ));
1637
1899
  }
1638
1900
 
1639
1901
  // Clear null key
1640
- {
1641
- let by_name_and_tag_key = keys::workflow::ByNameAndTagKey::null(
1642
- workflow_name.to_string(),
1643
- workflow_id,
1644
- );
1645
- tx.clear(&self.subspace.pack(&by_name_and_tag_key));
1646
- }
1902
+ tx.delete(&keys::workflow::ByNameAndTagKey::null(
1903
+ workflow_name.to_string(),
1904
+ workflow_id,
1905
+ ));
1647
1906
 
1648
1907
  // Get and clear the pending deadline wake condition, if any. This could be put in the
1649
1908
  // `pull_workflows` function (where we clear secondary indexes) but we chose to clear it
@@ -1651,22 +1910,16 @@ impl Database for DatabaseKv {
1651
1910
  // it inserting more wake conditions. This reduces the load on `pull_workflows`. The
1652
1911
  // reason this isn't immediately cleared in `pull_workflows` along with the rest of the
1653
1912
  // wake conditions is because it might be in the future.
1654
- if let Some(raw) = wake_deadline_entry {
1655
- let deadline_ts = wake_deadline_key.deserialize(&raw)?;
1656
-
1657
- let wake_condition_key = keys::wake::WorkflowWakeConditionKey::new(
1913
+ if let Some(deadline_ts) = wake_deadline {
1914
+ tx.delete(&keys::wake::WorkflowWakeConditionKey::new(
1658
1915
  workflow_name.to_string(),
1659
1916
  workflow_id,
1660
1917
  keys::wake::WakeCondition::Deadline { deadline_ts },
1661
- );
1662
-
1663
- tx.clear(&self.subspace.pack(&wake_condition_key));
1918
+ ));
1664
1919
  }
1665
1920
 
1666
1921
  // Clear "has wake condition"
1667
- let has_wake_condition_key =
1668
- keys::workflow::HasWakeConditionKey::new(workflow_id);
1669
- tx.clear(&self.subspace.pack(&has_wake_condition_key));
1922
+ tx.delete(&keys::workflow::HasWakeConditionKey::new(workflow_id));
1670
1923
 
1671
1924
  // Write output
1672
1925
  let output_key = keys::workflow::OutputKey::new(workflow_id);
@@ -1674,30 +1927,84 @@ impl Database for DatabaseKv {
1674
1927
  for (i, chunk) in output_key.split_ref(output)?.into_iter().enumerate() {
1675
1928
  let chunk_key = output_key.chunk(i);
1676
1929
 
1677
- tx.set(&self.subspace.pack(&chunk_key), &chunk);
1930
+ tx.set(&tx.pack(&chunk_key), &chunk);
1678
1931
  }
1679
1932
 
1680
1933
  // Clear lease
1681
- let lease_key = keys::workflow::LeaseKey::new(workflow_id);
1682
- tx.clear(&self.subspace.pack(&lease_key));
1683
- let worker_id_key = keys::workflow::WorkerIdKey::new(workflow_id);
1684
- tx.clear(&self.subspace.pack(&worker_id_key));
1934
+ tx.delete(&keys::workflow::LeaseKey::new(workflow_id));
1935
+ tx.delete(&keys::workflow::WorkerIdKey::new(workflow_id));
1936
+
1937
+ // Clear pending signals metric for observability
1938
+ let metrics_subspace = self
1939
+ .subspace
1940
+ .subspace(&keys::workflow::MetricKey::subspace(workflow_id));
1941
+ let mut stream = tx.get_ranges_keyvalues(
1942
+ universaldb::RangeOption {
1943
+ mode: StreamingMode::WantAll,
1944
+ ..(&metrics_subspace).into()
1945
+ },
1946
+ Serializable,
1947
+ );
1948
+
1949
+ let mut pending_signal_cleared_count = 0;
1950
+ loop {
1951
+ let Some(entry) = stream.try_next().await? else {
1952
+ break;
1953
+ };
1954
+
1955
+ let (key, metric_count) =
1956
+ tx.read_entry::<keys::workflow::MetricKey>(&entry)?;
1957
+
1958
+ // Ignore negatives and zero
1959
+ if metric_count as isize <= 0 {
1960
+ continue;
1961
+ }
1962
+
1963
+ match key.metric {
1964
+ keys::workflow::Metric::SignalPending(signal_name) => {
1965
+ update_metric_by(
1966
+ &tx,
1967
+ Some(keys::metric::Metric::SignalPending2(signal_name)),
1968
+ None,
1969
+ metric_count,
1970
+ );
1971
+ pending_signal_cleared_count += metric_count;
1972
+ }
1973
+ }
1974
+ }
1975
+
1976
+ // Insert into prune idx if applicable
1977
+ match prune_variant {
1978
+ PruneVariant::All | PruneVariant::History => {
1979
+ tx.write(
1980
+ &keys::workflow::PruneIdxKey::new(workflow_id, prune_variant),
1981
+ (),
1982
+ )?;
1983
+ }
1984
+ PruneVariant::None => {}
1985
+ }
1986
+
1987
+ tx.write(
1988
+ &keys::workflow::CompleteTsKey::new(workflow_id),
1989
+ rivet_util::timestamp::now(),
1990
+ )?;
1685
1991
 
1686
1992
  update_metric(
1687
- &tx.with_subspace(self.subspace.clone()),
1688
- Some(keys::metric::GaugeMetric::WorkflowActive(
1993
+ &tx,
1994
+ Some(keys::metric::Metric::WorkflowActive(
1689
1995
  workflow_name.to_string(),
1690
1996
  )),
1691
- Some(keys::metric::GaugeMetric::WorkflowComplete(
1997
+ Some(keys::metric::Metric::WorkflowComplete(
1692
1998
  workflow_name.to_string(),
1693
1999
  )),
1694
2000
  );
1695
2001
 
1696
- Ok(wrote_to_wake_idx)
2002
+ Ok((wrote_to_wake_idx, pending_signal_cleared_count))
1697
2003
  }
1698
2004
  })
1699
2005
  .custom_instrument(tracing::info_span!("complete_workflows_tx"))
1700
2006
  .await
2007
+ .context("failed to complete workflow")
1701
2008
  .map_err(WorkflowError::Udb)?;
1702
2009
 
1703
2010
  // Wake worker again in case some other workflow was waiting for this one to complete
@@ -1706,11 +2013,14 @@ impl Database for DatabaseKv {
1706
2013
  self.bump(BumpSubSubject::Worker);
1707
2014
  }
1708
2015
 
2016
+ if pending_signal_cleared_count != 0 {
2017
+ tracing::debug!(count=%pending_signal_cleared_count, "cleared pending signals after workflow completed");
2018
+ }
2019
+
1709
2020
  let dt = start_instant.elapsed().as_secs_f64();
1710
- metrics::COMPLETE_WORKFLOW_DURATION.record(
1711
- dt,
1712
- &[KeyValue::new("workflow_name", workflow_name.to_string())],
1713
- );
2021
+ metrics::COMPLETE_WORKFLOW_DURATION
2022
+ .with_label_values(&[workflow_name])
2023
+ .observe(dt);
1714
2024
 
1715
2025
  Ok(())
1716
2026
  }
@@ -1834,13 +2144,13 @@ impl Database for DatabaseKv {
1834
2144
 
1835
2145
  update_metric(
1836
2146
  &tx.with_subspace(self.subspace.clone()),
1837
- Some(keys::metric::GaugeMetric::WorkflowActive(
2147
+ Some(keys::metric::Metric::WorkflowActive(
1838
2148
  workflow_name.to_string(),
1839
2149
  )),
1840
2150
  Some(if has_wake_condition {
1841
- keys::metric::GaugeMetric::WorkflowSleeping(workflow_name.to_string())
2151
+ keys::metric::Metric::WorkflowSleeping(workflow_name.to_string())
1842
2152
  } else {
1843
- keys::metric::GaugeMetric::WorkflowDead(
2153
+ keys::metric::Metric::WorkflowDead(
1844
2154
  workflow_name.to_string(),
1845
2155
  error.to_string(),
1846
2156
  )
@@ -1852,6 +2162,7 @@ impl Database for DatabaseKv {
1852
2162
  })
1853
2163
  .custom_instrument(tracing::info_span!("commit_workflow_tx"))
1854
2164
  .await
2165
+ .context("failed to commit workflow")
1855
2166
  .map_err(WorkflowError::Udb)?;
1856
2167
 
1857
2168
  // Always wake the worker immediately again. This is an IMPORTANT implementation detail to prevent
@@ -1874,16 +2185,15 @@ impl Database for DatabaseKv {
1874
2185
  self.bump(BumpSubSubject::Worker);
1875
2186
 
1876
2187
  let dt = start_instant.elapsed().as_secs_f64();
1877
- metrics::COMMIT_WORKFLOW_DURATION.record(
1878
- dt,
1879
- &[KeyValue::new("workflow_name", workflow_name.to_string())],
1880
- );
2188
+ metrics::COMMIT_WORKFLOW_DURATION
2189
+ .with_label_values(&[workflow_name])
2190
+ .observe(dt);
1881
2191
 
1882
2192
  Ok(())
1883
2193
  }
1884
2194
 
1885
2195
  #[tracing::instrument(skip_all)]
1886
- async fn pull_next_signal(
2196
+ async fn pull_next_signals(
1887
2197
  &self,
1888
2198
  workflow_id: Id,
1889
2199
  _workflow_name: &str,
@@ -1891,176 +2201,191 @@ impl Database for DatabaseKv {
1891
2201
  location: &Location,
1892
2202
  version: usize,
1893
2203
  _loop_location: Option<&Location>,
1894
- last_try: bool,
1895
- ) -> WorkflowResult<Option<SignalData>> {
2204
+ limit: usize,
2205
+ last_attempt: bool,
2206
+ ) -> WorkflowResult<Vec<SignalData>> {
1896
2207
  let owned_filter = filter
1897
2208
  .into_iter()
1898
2209
  .map(|x| x.to_string())
1899
2210
  .collect::<Vec<_>>();
1900
2211
 
1901
- // Fetch signal from UDB
1902
- let signal =
1903
- self.pools
1904
- .udb()
1905
- .map_err(WorkflowError::PoolsGeneric)?
1906
- .run(|tx| {
1907
- let owned_filter = owned_filter.clone();
2212
+ // Fetch signals from UDB
2213
+ let signals = self
2214
+ .pools
2215
+ .udb()
2216
+ .map_err(WorkflowError::PoolsGeneric)?
2217
+ .run(|tx| {
2218
+ let owned_filter = owned_filter.clone();
1908
2219
 
1909
- async move {
1910
- let signal = {
1911
- // Create a stream for each signal name subspace
1912
- let streams = owned_filter
1913
- .iter()
1914
- .map(|signal_name| {
1915
- let pending_signal_subspace = self.subspace.subspace(
1916
- &keys::workflow::PendingSignalKey::subspace(
1917
- workflow_id,
1918
- signal_name.to_string(),
1919
- ),
1920
- );
2220
+ async move {
2221
+ // Fetch signals from all streams at the same time
2222
+ let mut signals = futures_util::stream::iter(owned_filter.clone())
2223
+ .map(|signal_name| {
2224
+ let pending_signal_subspace = self.subspace.subspace(
2225
+ &keys::workflow::PendingSignalKey::subspace(
2226
+ workflow_id,
2227
+ signal_name.to_string(),
2228
+ ),
2229
+ );
1921
2230
 
1922
- tx.get_ranges_keyvalues(
1923
- universaldb::RangeOption {
1924
- mode: StreamingMode::WantAll,
1925
- limit: Some(1),
1926
- ..(&pending_signal_subspace).into()
1927
- },
1928
- // NOTE: This is Serializable because any insert into this subspace
1929
- // should cause a conflict and retry of this txn
1930
- Serializable,
1931
- )
1932
- })
1933
- .collect::<Vec<_>>();
1934
-
1935
- // Fetch the next entry from all streams at the same time
1936
- let mut results = futures_util::future::try_join_all(
1937
- streams.into_iter().map(|mut stream| async move {
1938
- if let Some(entry) = stream.try_next().await? {
1939
- Result::<_>::Ok(Some((
1940
- entry.key().to_vec(),
1941
- self.subspace
1942
- .unpack::<keys::workflow::PendingSignalKey>(
1943
- &entry.key(),
1944
- )?,
1945
- )))
1946
- } else {
1947
- Ok(None)
1948
- }
1949
- }),
1950
- )
1951
- .instrument(tracing::trace_span!("map_signals"))
1952
- .await?;
1953
-
1954
- // Sort by ts
1955
- results.sort_by_key(|res| res.as_ref().map(|(_, key)| key.ts));
1956
-
1957
- results.into_iter().flatten().next().map(
1958
- |(raw_key, pending_signal_key)| {
1959
- (
1960
- raw_key,
1961
- pending_signal_key.signal_name,
1962
- pending_signal_key.ts,
1963
- pending_signal_key.signal_id,
1964
- )
2231
+ tx.get_ranges_keyvalues(
2232
+ universaldb::RangeOption {
2233
+ mode: StreamingMode::Exact,
2234
+ // Each individual stream is limited to our max limit, we apply this
2235
+ // limit again after they are all aggregated further down
2236
+ limit: Some(limit),
2237
+ ..(&pending_signal_subspace).into()
1965
2238
  },
2239
+ // NOTE: This is Serializable because any insert into this subspace
2240
+ // should cause a conflict and retry of this txn
2241
+ Serializable,
1966
2242
  )
1967
- };
1968
-
1969
- // Signal found
1970
- if let Some((raw_key, signal_name, ts, signal_id)) = signal {
1971
- let ack_ts_key = keys::signal::AckTsKey::new(signal_id);
2243
+ })
2244
+ .flatten()
2245
+ .map(|res| {
2246
+ let entry = res?;
1972
2247
 
1973
- // Ack signal
1974
- tx.add_conflict_range(
1975
- &raw_key,
1976
- &end_of_key_range(&raw_key),
1977
- ConflictRangeType::Read,
1978
- )?;
1979
- tx.set(
1980
- &self.subspace.pack(&ack_ts_key),
1981
- &ack_ts_key.serialize(rivet_util::timestamp::now())?,
1982
- );
2248
+ anyhow::Ok(
2249
+ self.subspace
2250
+ .unpack::<keys::workflow::PendingSignalKey>(&entry.key())?,
2251
+ )
2252
+ })
2253
+ .try_collect::<Vec<_>>()
2254
+ .instrument(tracing::trace_span!("map_signals"))
2255
+ .await?;
1983
2256
 
1984
- update_metric(
1985
- &tx.with_subspace(self.subspace.clone()),
1986
- Some(keys::metric::GaugeMetric::SignalPending(
1987
- signal_name.to_string(),
1988
- )),
1989
- None,
1990
- );
2257
+ if !signals.is_empty() {
2258
+ let now = rivet_util::timestamp::now();
1991
2259
 
1992
- // TODO: Split txn into two after acking here?
2260
+ // Insert history event
2261
+ keys::history::insert::signals_event(
2262
+ &self.subspace,
2263
+ &tx,
2264
+ workflow_id,
2265
+ &location,
2266
+ version,
2267
+ now,
2268
+ )?;
1993
2269
 
1994
- // Clear pending signal key
1995
- tx.clear(&raw_key);
2270
+ // Sort by ts after aggregating but before applying limit again. Signals are already
2271
+ // in order by ts in their individual streams so this should be cheap
2272
+ signals.sort_by_key(|key| key.ts);
2273
+
2274
+ // Read signal data in parallel
2275
+ let signals =
2276
+ futures_util::stream::iter(signals.into_iter().take(limit).enumerate())
2277
+ .map(|(index, key)| {
2278
+ let tx = tx.clone();
2279
+ async move {
2280
+ let ack_ts_key = keys::signal::AckTsKey::new(key.signal_id);
2281
+
2282
+ let packed_key = self.subspace.pack(&key);
2283
+
2284
+ // Ack signal
2285
+ tx.add_conflict_range(
2286
+ &packed_key,
2287
+ &end_of_key_range(&packed_key),
2288
+ ConflictRangeType::Read,
2289
+ )?;
2290
+ tx.set(
2291
+ &self.subspace.pack(&ack_ts_key),
2292
+ &ack_ts_key.serialize(now)?,
2293
+ );
1996
2294
 
1997
- // Read signal body
1998
- let body_key = keys::signal::BodyKey::new(signal_id);
1999
- let body_subspace = self.subspace.subspace(&body_key);
2295
+ update_metric(
2296
+ &tx.with_subspace(self.subspace.clone()),
2297
+ Some(keys::metric::Metric::SignalPending2(
2298
+ key.signal_name.clone(),
2299
+ )),
2300
+ None,
2301
+ );
2302
+ update_wf_metric(
2303
+ &tx.with_subspace(self.subspace.clone()),
2304
+ workflow_id,
2305
+ Some(keys::workflow::Metric::SignalPending(
2306
+ key.signal_name.clone(),
2307
+ )),
2308
+ None,
2309
+ );
2000
2310
 
2001
- let chunks = tx
2002
- .get_ranges_keyvalues(
2003
- universaldb::RangeOption {
2004
- mode: StreamingMode::WantAll,
2005
- ..(&body_subspace).into()
2006
- },
2007
- Serializable,
2008
- )
2311
+ // Clear pending signal key
2312
+ tx.clear(&packed_key);
2313
+
2314
+ // Read signal body
2315
+ let body_key = keys::signal::BodyKey::new(key.signal_id);
2316
+ let body_subspace = self.subspace.subspace(&body_key);
2317
+
2318
+ let chunks = tx
2319
+ .get_ranges_keyvalues(
2320
+ universaldb::RangeOption {
2321
+ mode: StreamingMode::WantAll,
2322
+ ..(&body_subspace).into()
2323
+ },
2324
+ Serializable,
2325
+ )
2326
+ .try_collect::<Vec<_>>()
2327
+ .await?;
2328
+
2329
+ let body = body_key.combine(chunks)?;
2330
+
2331
+ // Insert each signal body into the signals event
2332
+ keys::history::insert::signals_event_signal(
2333
+ &self.subspace,
2334
+ &tx,
2335
+ workflow_id,
2336
+ &location,
2337
+ index,
2338
+ key.signal_id,
2339
+ &key.signal_name,
2340
+ &body,
2341
+ )?;
2342
+
2343
+ anyhow::Ok(SignalData {
2344
+ signal_id: key.signal_id,
2345
+ signal_name: key.signal_name,
2346
+ create_ts: key.ts,
2347
+ body,
2348
+ })
2349
+ }
2350
+ })
2351
+ // IMPORTANT: The signals need to stay in order
2352
+ .buffered(1024)
2009
2353
  .try_collect::<Vec<_>>()
2010
2354
  .await?;
2011
2355
 
2012
- let body = body_key.combine(chunks)?;
2013
-
2014
- // Insert history event
2015
- keys::history::insert::signal_event(
2016
- &self.subspace,
2017
- &tx,
2356
+ Ok(signals)
2357
+ }
2358
+ // No signals found
2359
+ else {
2360
+ // Write signal wake index if no signal was received. Normally this is done in
2361
+ // `commit_workflow` but without this code there would be a race condition if the
2362
+ // signal is published between after this transaction and before `commit_workflow`.
2363
+ // There is a possibility of `commit_workflow` NOT writing a signal secondary index
2364
+ // after this in which case there might be an unnecessary wake condition inserted
2365
+ // causing the workflow to wake up again, but this is not as big of an issue because
2366
+ // workflow wakes should be idempotent if no events happen.
2367
+ // It is important that this is only written on the last try to pull workflows
2368
+ // (the workflow engine internally retries a few times) because it should only
2369
+ // write signal wake indexes before going to sleep (with err `NoSignalFound`) and
2370
+ // not during a retry.
2371
+ if last_attempt {
2372
+ self.write_signal_wake_idxs(
2018
2373
  workflow_id,
2019
- &location,
2020
- version,
2021
- rivet_util::timestamp::now(),
2022
- signal_id,
2023
- &signal_name,
2024
- &body,
2374
+ &owned_filter.iter().map(|x| x.as_str()).collect::<Vec<_>>(),
2375
+ &tx,
2025
2376
  )?;
2026
-
2027
- Ok(Some(SignalData {
2028
- signal_id,
2029
- signal_name,
2030
- create_ts: ts,
2031
- body,
2032
- }))
2033
2377
  }
2034
- // No signal found
2035
- else {
2036
- // Write signal wake index if no signal was received. Normally this is done in
2037
- // `commit_workflow` but without this code there would be a race condition if the
2038
- // signal is published between after this transaction and before `commit_workflow`.
2039
- // There is a possibility of `commit_workflow` NOT writing a signal secondary index
2040
- // after this in which case there might be an unnecessary wake condition inserted
2041
- // causing the workflow to wake up again, but this is not as big of an issue because
2042
- // workflow wakes should be idempotent if no events happen.
2043
- // It is important that this is only written on the last try to pull workflows
2044
- // (the workflow engine internally retries a few times) because it should only
2045
- // write signal wake indexes before going to sleep (with err `NoSignalFound`) and
2046
- // not during a retry.
2047
- if last_try {
2048
- self.write_signal_wake_idxs(
2049
- workflow_id,
2050
- &owned_filter.iter().map(|x| x.as_str()).collect::<Vec<_>>(),
2051
- &tx,
2052
- )?;
2053
- }
2054
2378
 
2055
- Ok(None)
2056
- }
2379
+ Ok(Vec::new())
2057
2380
  }
2058
- })
2059
- .custom_instrument(tracing::info_span!("pull_next_signal_tx"))
2060
- .await
2061
- .map_err(WorkflowError::Udb)?;
2381
+ }
2382
+ })
2383
+ .custom_instrument(tracing::info_span!("pull_next_signals_tx"))
2384
+ .await
2385
+ .context("failed to pull signals")
2386
+ .map_err(WorkflowError::Udb)?;
2062
2387
 
2063
- Ok(signal)
2388
+ Ok(signals)
2064
2389
  }
2065
2390
 
2066
2391
  #[tracing::instrument(skip_all)]
@@ -2075,6 +2400,7 @@ impl Database for DatabaseKv {
2075
2400
  .map_err(WorkflowError::PoolsGeneric)?
2076
2401
  .run(|tx| {
2077
2402
  async move {
2403
+ let name_key = keys::workflow::NameKey::new(sub_workflow_id);
2078
2404
  let input_key = keys::workflow::InputKey::new(sub_workflow_id);
2079
2405
  let input_subspace = self.subspace.subspace(&input_key);
2080
2406
  let state_key = keys::workflow::StateKey::new(sub_workflow_id);
@@ -2085,7 +2411,14 @@ impl Database for DatabaseKv {
2085
2411
  keys::workflow::HasWakeConditionKey::new(sub_workflow_id);
2086
2412
 
2087
2413
  // Read input and output
2088
- let (input_chunks, state_chunks, output_chunks, has_wake_condition_entry) = tokio::try_join!(
2414
+ let (
2415
+ name_entry,
2416
+ input_chunks,
2417
+ state_chunks,
2418
+ output_chunks,
2419
+ has_wake_condition_entry,
2420
+ ) = tokio::try_join!(
2421
+ tx.get(&self.subspace.pack(&name_key), Serializable),
2089
2422
  tx.get_ranges_keyvalues(
2090
2423
  universaldb::RangeOption {
2091
2424
  mode: StreamingMode::WantAll,
@@ -2147,6 +2480,8 @@ impl Database for DatabaseKv {
2147
2480
 
2148
2481
  Ok(Some(WorkflowData {
2149
2482
  workflow_id: sub_workflow_id,
2483
+ name: name_key
2484
+ .deserialize(&name_entry.context("name key should exist")?)?,
2150
2485
  input,
2151
2486
  state,
2152
2487
  output,
@@ -2157,6 +2492,7 @@ impl Database for DatabaseKv {
2157
2492
  })
2158
2493
  .custom_instrument(tracing::info_span!("get_sub_workflow_tx"))
2159
2494
  .await
2495
+ .context("failed to get sub workflow")
2160
2496
  .map_err(WorkflowError::Udb)
2161
2497
  }
2162
2498
 
@@ -2178,6 +2514,7 @@ impl Database for DatabaseKv {
2178
2514
  })
2179
2515
  .custom_instrument(tracing::info_span!("publish_signal_tx"))
2180
2516
  .await
2517
+ .context("failed to publish signal")
2181
2518
  .map_err(WorkflowError::Udb)?;
2182
2519
 
2183
2520
  self.bump(BumpSubSubject::SignalPublish {
@@ -2233,6 +2570,7 @@ impl Database for DatabaseKv {
2233
2570
  })
2234
2571
  .custom_instrument(tracing::info_span!("publish_signal_from_workflow_tx"))
2235
2572
  .await
2573
+ .context("failed to publish signal from workflow")
2236
2574
  .map_err(WorkflowError::Udb)?;
2237
2575
 
2238
2576
  self.bump(BumpSubSubject::SignalPublish { to_workflow_id });
@@ -2290,6 +2628,7 @@ impl Database for DatabaseKv {
2290
2628
  })
2291
2629
  .custom_instrument(tracing::info_span!("dispatch_sub_workflow_tx"))
2292
2630
  .await
2631
+ .context("failed to dispatch sub workflow")
2293
2632
  .map_err(WorkflowError::Udb)?;
2294
2633
 
2295
2634
  self.bump(BumpSubSubject::Worker);
@@ -2379,6 +2718,7 @@ impl Database for DatabaseKv {
2379
2718
  })
2380
2719
  .custom_instrument(tracing::info_span!("update_workflow_tags_tx"))
2381
2720
  .await
2721
+ .context("failed to update workflow tags")
2382
2722
  .map_err(WorkflowError::Udb)?;
2383
2723
 
2384
2724
  Ok(())
@@ -2413,6 +2753,7 @@ impl Database for DatabaseKv {
2413
2753
  })
2414
2754
  .custom_instrument(tracing::info_span!("update_workflow_state_tx"))
2415
2755
  .await
2756
+ .context("failed to update workflow state")
2416
2757
  .map_err(WorkflowError::Udb)?;
2417
2758
 
2418
2759
  Ok(())
@@ -2450,6 +2791,7 @@ impl Database for DatabaseKv {
2450
2791
  })
2451
2792
  .custom_instrument(tracing::info_span!("commit_workflow_activity_event_tx"))
2452
2793
  .await
2794
+ .context("failed to commit activity event")
2453
2795
  .map_err(WorkflowError::Udb)?;
2454
2796
 
2455
2797
  Ok(())
@@ -2486,6 +2828,7 @@ impl Database for DatabaseKv {
2486
2828
  })
2487
2829
  .custom_instrument(tracing::info_span!("commit_workflow_message_send_event_tx"))
2488
2830
  .await
2831
+ .context("failed to commit message send event")
2489
2832
  .map_err(WorkflowError::Udb)?;
2490
2833
 
2491
2834
  Ok(())
@@ -2544,23 +2887,41 @@ impl Database for DatabaseKv {
2544
2887
  keys::history::HistorySubspaceVariant::Forgotten,
2545
2888
  ));
2546
2889
 
2547
- let loop_events_subspace =
2548
- self.subspace
2549
- .subspace(&keys::history::EventHistorySubspaceKey::entire(
2550
- from_workflow_id,
2551
- location.clone(),
2552
- false,
2553
- ));
2890
+ // Start is {loop location, 0, ...}
2891
+ let loop_events_subspace_start = self
2892
+ .subspace
2893
+ .subspace(&keys::history::EventHistorySubspaceKey::entire(
2894
+ from_workflow_id,
2895
+ location.clone(),
2896
+ false,
2897
+ ))
2898
+ .range()
2899
+ .0;
2900
+ // End is {loop location, iteration - 1, ...}
2901
+ let loop_events_subspace_end = self
2902
+ .subspace
2903
+ .subspace(&keys::history::EventHistorySubspaceKey::new(
2904
+ from_workflow_id,
2905
+ location.clone(),
2906
+ iteration.saturating_sub(1),
2907
+ false,
2908
+ ))
2909
+ .range()
2910
+ .1;
2554
2911
 
2555
2912
  let mut stream = tx.get_ranges_keyvalues(
2556
2913
  universaldb::RangeOption {
2557
2914
  mode: StreamingMode::WantAll,
2558
- ..(&loop_events_subspace).into()
2915
+ ..(
2916
+ loop_events_subspace_start.as_slice(),
2917
+ loop_events_subspace_end.as_slice(),
2918
+ )
2919
+ .into()
2559
2920
  },
2560
2921
  Serializable,
2561
2922
  );
2562
2923
 
2563
- // Move all current events under this loop to the forgotten history
2924
+ // Move all events under this loop up to the current iteration to the forgotten history
2564
2925
  loop {
2565
2926
  let Some(entry) = stream.try_next().await? else {
2566
2927
  break;
@@ -2570,7 +2931,7 @@ impl Database for DatabaseKv {
2570
2931
  return Err(universaldb::tuple::PackError::BadPrefix.into());
2571
2932
  }
2572
2933
 
2573
- // Truncate tuple up to ACTIVE and replace it with FORGOTTEN
2934
+ // Truncate tuple up to ...ACTIVE and replace it with ...FORGOTTEN
2574
2935
  let truncated_key = &entry.key()[active_history_subspace.bytes().len()..];
2575
2936
  let forgotten_key =
2576
2937
  [forgotten_history_subspace.bytes(), truncated_key].concat();
@@ -2578,7 +2939,7 @@ impl Database for DatabaseKv {
2578
2939
  tx.set(&forgotten_key, entry.value());
2579
2940
  }
2580
2941
 
2581
- tx.clear_subspace_range(&loop_events_subspace);
2942
+ tx.clear_range(&loop_events_subspace_start, &loop_events_subspace_end);
2582
2943
 
2583
2944
  // Only retain last 100 events in forgotten history
2584
2945
  if iteration > 100 {
@@ -2607,6 +2968,7 @@ impl Database for DatabaseKv {
2607
2968
  })
2608
2969
  .custom_instrument(tracing::info_span!("upsert_loop_event_tx"))
2609
2970
  .await
2971
+ .context("failed to upsert loop event")
2610
2972
  .map_err(WorkflowError::Udb)?;
2611
2973
 
2612
2974
  Ok(())
@@ -2640,6 +3002,7 @@ impl Database for DatabaseKv {
2640
3002
  })
2641
3003
  .custom_instrument(tracing::info_span!("commit_workflow_sleep_event_tx"))
2642
3004
  .await
3005
+ .context("failed to commit sleep event")
2643
3006
  .map_err(WorkflowError::Udb)?;
2644
3007
 
2645
3008
  Ok(())
@@ -2668,6 +3031,7 @@ impl Database for DatabaseKv {
2668
3031
  })
2669
3032
  .custom_instrument(tracing::info_span!("update_workflow_sleep_state_tx"))
2670
3033
  .await
3034
+ .context("failed to update sleep state")
2671
3035
  .map_err(WorkflowError::Udb)?;
2672
3036
 
2673
3037
  Ok(())
@@ -2698,6 +3062,7 @@ impl Database for DatabaseKv {
2698
3062
  })
2699
3063
  .custom_instrument(tracing::info_span!("commit_workflow_branch_event_tx"))
2700
3064
  .await
3065
+ .context("failed to commit branch event")
2701
3066
  .map_err(WorkflowError::Udb)?;
2702
3067
 
2703
3068
  Ok(())
@@ -2731,6 +3096,7 @@ impl Database for DatabaseKv {
2731
3096
  })
2732
3097
  .custom_instrument(tracing::info_span!("commit_workflow_removed_event_tx"))
2733
3098
  .await
3099
+ .context("failed to commit removed event")
2734
3100
  .map_err(WorkflowError::Udb)?;
2735
3101
 
2736
3102
  Ok(())
@@ -2742,6 +3108,7 @@ impl Database for DatabaseKv {
2742
3108
  from_workflow_id: Id,
2743
3109
  location: &Location,
2744
3110
  version: usize,
3111
+ inner_version: usize,
2745
3112
  _loop_location: Option<&Location>,
2746
3113
  ) -> WorkflowResult<()> {
2747
3114
  self.pools
@@ -2754,6 +3121,7 @@ impl Database for DatabaseKv {
2754
3121
  from_workflow_id,
2755
3122
  location,
2756
3123
  version,
3124
+ inner_version,
2757
3125
  rivet_util::timestamp::now(),
2758
3126
  )?;
2759
3127
 
@@ -2763,12 +3131,19 @@ impl Database for DatabaseKv {
2763
3131
  "commit_workflow_version_check_event_tx"
2764
3132
  ))
2765
3133
  .await
3134
+ .context("failed to commit version check event")
2766
3135
  .map_err(WorkflowError::Udb)?;
2767
3136
 
2768
3137
  Ok(())
2769
3138
  }
2770
3139
  }
2771
3140
 
3141
+ impl Drop for DatabaseKv {
3142
+ fn drop(&mut self) {
3143
+ metrics::DB_INSTANCE.dec();
3144
+ }
3145
+ }
3146
+
2772
3147
  #[derive(Debug, Clone)]
2773
3148
  struct MinimalPulledWorkflow {
2774
3149
  workflow_id: Id,
@@ -2779,8 +3154,17 @@ struct MinimalPulledWorkflow {
2779
3154
 
2780
3155
  fn update_metric(
2781
3156
  tx: &universaldb::Transaction,
2782
- previous: Option<keys::metric::GaugeMetric>,
2783
- current: Option<keys::metric::GaugeMetric>,
3157
+ previous: Option<keys::metric::Metric>,
3158
+ current: Option<keys::metric::Metric>,
3159
+ ) {
3160
+ update_metric_by(tx, previous, current, 1)
3161
+ }
3162
+
3163
+ fn update_metric_by(
3164
+ tx: &universaldb::Transaction,
3165
+ previous: Option<keys::metric::Metric>,
3166
+ current: Option<keys::metric::Metric>,
3167
+ by: i64,
2784
3168
  ) {
2785
3169
  if &previous == &current {
2786
3170
  return;
@@ -2788,16 +3172,43 @@ fn update_metric(
2788
3172
 
2789
3173
  if let Some(previous) = previous {
2790
3174
  tx.atomic_op(
2791
- &keys::metric::GaugeMetricKey::new(previous),
2792
- &(-1isize).to_le_bytes(),
3175
+ &keys::metric::MetricKey::new(previous),
3176
+ &(by * -1).to_le_bytes(),
2793
3177
  MutationType::Add,
2794
3178
  );
2795
3179
  }
2796
3180
 
2797
3181
  if let Some(current) = current {
2798
3182
  tx.atomic_op(
2799
- &keys::metric::GaugeMetricKey::new(current),
2800
- &1usize.to_le_bytes(),
3183
+ &keys::metric::MetricKey::new(current),
3184
+ &by.to_le_bytes(),
3185
+ MutationType::Add,
3186
+ );
3187
+ }
3188
+ }
3189
+
3190
+ fn update_wf_metric(
3191
+ tx: &universaldb::Transaction,
3192
+ workflow_id: Id,
3193
+ previous: Option<keys::workflow::Metric>,
3194
+ current: Option<keys::workflow::Metric>,
3195
+ ) {
3196
+ if &previous == &current {
3197
+ return;
3198
+ }
3199
+
3200
+ if let Some(previous) = previous {
3201
+ tx.atomic_op(
3202
+ &keys::workflow::MetricKey::new(workflow_id, previous),
3203
+ &(-1i64).to_le_bytes(),
3204
+ MutationType::Add,
3205
+ );
3206
+ }
3207
+
3208
+ if let Some(current) = current {
3209
+ tx.atomic_op(
3210
+ &keys::workflow::MetricKey::new(workflow_id, current),
3211
+ &1i64.to_le_bytes(),
2801
3212
  MutationType::Add,
2802
3213
  );
2803
3214
  }
@@ -2818,6 +3229,10 @@ struct WorkflowHistoryEventBuilder {
2818
3229
  deadline_ts: Option<i64>,
2819
3230
  sleep_state: Option<SleepState>,
2820
3231
  inner_event_type: Option<EventType>,
3232
+ inner_version: Option<usize>,
3233
+
3234
+ indexed_names: Vec<String>,
3235
+ indexed_input_chunks: Vec<Vec<Value>>,
2821
3236
  }
2822
3237
 
2823
3238
  impl WorkflowHistoryEventBuilder {
@@ -2837,6 +3252,10 @@ impl WorkflowHistoryEventBuilder {
2837
3252
  deadline_ts: None,
2838
3253
  sleep_state: None,
2839
3254
  inner_event_type: None,
3255
+ inner_version: None,
3256
+
3257
+ indexed_names: Vec::new(),
3258
+ indexed_input_chunks: Vec::new(),
2840
3259
  }
2841
3260
  }
2842
3261
  }
@@ -2860,7 +3279,21 @@ impl TryFrom<WorkflowHistoryEventBuilder> for Event {
2860
3279
  .ok_or(WorkflowError::MissingEventData("version"))?,
2861
3280
  data: match event_type {
2862
3281
  EventType::Activity => EventData::Activity(value.try_into()?),
2863
- EventType::Signal => EventData::Signal(value.try_into()?),
3282
+ // Deprecated, manually convert to newer type
3283
+ EventType::Signal => {
3284
+ EventData::Signals(SignalsEvent {
3285
+ names: vec![value.name.ok_or(WorkflowError::MissingEventData("name"))?],
3286
+ bodies: vec![if value.input_chunks.is_empty() {
3287
+ return Err(WorkflowError::MissingEventData("input"));
3288
+ } else {
3289
+ // workflow_id not needed
3290
+ let input_key = keys::history::InputKey::new(Id::nil(), value.location);
3291
+ input_key
3292
+ .combine(value.input_chunks)
3293
+ .map_err(WorkflowError::DeserializeEventData)?
3294
+ }],
3295
+ })
3296
+ }
2864
3297
  EventType::SignalSend => EventData::SignalSend(value.try_into()?),
2865
3298
  EventType::MessageSend => EventData::MessageSend(value.try_into()?),
2866
3299
  EventType::SubWorkflow => EventData::SubWorkflow(value.try_into()?),
@@ -2868,7 +3301,8 @@ impl TryFrom<WorkflowHistoryEventBuilder> for Event {
2868
3301
  EventType::Sleep => EventData::Sleep(value.try_into()?),
2869
3302
  EventType::Branch => EventData::Branch,
2870
3303
  EventType::Removed => EventData::Removed(value.try_into()?),
2871
- EventType::VersionCheck => EventData::VersionCheck,
3304
+ EventType::VersionCheck => EventData::VersionCheck(value.try_into()?),
3305
+ EventType::Signals => EventData::Signals(value.try_into()?),
2872
3306
  },
2873
3307
  })
2874
3308
  }
@@ -2901,27 +3335,6 @@ impl TryFrom<WorkflowHistoryEventBuilder> for ActivityEvent {
2901
3335
  }
2902
3336
  }
2903
3337
 
2904
- impl TryFrom<WorkflowHistoryEventBuilder> for SignalEvent {
2905
- type Error = WorkflowError;
2906
-
2907
- fn try_from(value: WorkflowHistoryEventBuilder) -> WorkflowResult<Self> {
2908
- Ok(SignalEvent {
2909
- name: value.name.ok_or(WorkflowError::MissingEventData("name"))?,
2910
- body: {
2911
- if value.input_chunks.is_empty() {
2912
- return Err(WorkflowError::MissingEventData("input"));
2913
- } else {
2914
- // workflow_id not needed
2915
- let input_key = keys::history::InputKey::new(Id::nil(), value.location);
2916
- input_key
2917
- .combine(value.input_chunks)
2918
- .map_err(WorkflowError::DeserializeEventData)?
2919
- }
2920
- },
2921
- })
2922
- }
2923
- }
2924
-
2925
3338
  impl TryFrom<WorkflowHistoryEventBuilder> for SignalSendEvent {
2926
3339
  type Error = WorkflowError;
2927
3340
 
@@ -3024,6 +3437,51 @@ impl TryFrom<WorkflowHistoryEventBuilder> for RemovedEvent {
3024
3437
  }
3025
3438
  }
3026
3439
 
3440
+ impl TryFrom<WorkflowHistoryEventBuilder> for VersionCheckEvent {
3441
+ type Error = WorkflowError;
3442
+
3443
+ fn try_from(value: WorkflowHistoryEventBuilder) -> WorkflowResult<Self> {
3444
+ Ok(VersionCheckEvent {
3445
+ // Fallback to event version for old events that don't have inner version
3446
+ inner_version: value.inner_version.unwrap_or(
3447
+ value
3448
+ .version
3449
+ .ok_or(WorkflowError::MissingEventData("version"))?,
3450
+ ),
3451
+ })
3452
+ }
3453
+ }
3454
+
3455
+ impl TryFrom<WorkflowHistoryEventBuilder> for SignalsEvent {
3456
+ type Error = WorkflowError;
3457
+
3458
+ fn try_from(value: WorkflowHistoryEventBuilder) -> WorkflowResult<Self> {
3459
+ Ok(SignalsEvent {
3460
+ names: if value.indexed_names.is_empty() {
3461
+ return Err(WorkflowError::MissingEventData("name"));
3462
+ } else {
3463
+ value.indexed_names
3464
+ },
3465
+ bodies: if value.indexed_input_chunks.is_empty() {
3466
+ return Err(WorkflowError::MissingEventData("input"));
3467
+ } else {
3468
+ value
3469
+ .indexed_input_chunks
3470
+ .into_iter()
3471
+ .map(|input_chunks| {
3472
+ // workflow_id not needed
3473
+ let input_key =
3474
+ keys::history::InputKey::new(Id::nil(), value.location.clone());
3475
+ input_key
3476
+ .combine(input_chunks)
3477
+ .map_err(WorkflowError::DeserializeEventData)
3478
+ })
3479
+ .collect::<std::result::Result<_, _>>()?
3480
+ },
3481
+ })
3482
+ }
3483
+ }
3484
+
3027
3485
  fn value_to_str(v: &serde_json::Value) -> WorkflowResult<String> {
3028
3486
  match v {
3029
3487
  serde_json::Value::String(s) => Ok(s.clone()),
@@ -3032,13 +3490,16 @@ fn value_to_str(v: &serde_json::Value) -> WorkflowResult<String> {
3032
3490
  }
3033
3491
 
3034
3492
  fn calc_pull_ratio(x: u64, ax: u64, ay: u64, bx: u64, by: u64) -> u64 {
3035
- // must have neg slope, inversely proportional
3493
+ // Must have neg slope, inversely proportional
3036
3494
  assert!(ax < bx);
3037
3495
  assert!(ay > by);
3038
3496
 
3039
- let neg_dy = ay - by;
3497
+ // Bound domain
3498
+ let x = x.max(ax).min(bx);
3499
+
3040
3500
  let dx = bx - ax;
3041
- let neg_b = ay * neg_dy / dx;
3501
+ let neg_dy = ay - by;
3502
+ let b = ay + ax * neg_dy / dx;
3042
3503
 
3043
- return neg_b.saturating_sub(x * neg_dy / dx);
3504
+ return b.saturating_sub(x * neg_dy / dx);
3044
3505
  }