aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. aethergraph/__init__.py +4 -10
  2. aethergraph/__main__.py +293 -0
  3. aethergraph/api/v1/__init__.py +0 -0
  4. aethergraph/api/v1/agents.py +46 -0
  5. aethergraph/api/v1/apps.py +70 -0
  6. aethergraph/api/v1/artifacts.py +415 -0
  7. aethergraph/api/v1/channels.py +89 -0
  8. aethergraph/api/v1/deps.py +168 -0
  9. aethergraph/api/v1/graphs.py +259 -0
  10. aethergraph/api/v1/identity.py +25 -0
  11. aethergraph/api/v1/memory.py +353 -0
  12. aethergraph/api/v1/misc.py +47 -0
  13. aethergraph/api/v1/pagination.py +29 -0
  14. aethergraph/api/v1/runs.py +568 -0
  15. aethergraph/api/v1/schemas.py +535 -0
  16. aethergraph/api/v1/session.py +323 -0
  17. aethergraph/api/v1/stats.py +201 -0
  18. aethergraph/api/v1/viz.py +152 -0
  19. aethergraph/config/config.py +22 -0
  20. aethergraph/config/loader.py +3 -2
  21. aethergraph/config/storage.py +209 -0
  22. aethergraph/contracts/__init__.py +0 -0
  23. aethergraph/contracts/services/__init__.py +0 -0
  24. aethergraph/contracts/services/artifacts.py +27 -14
  25. aethergraph/contracts/services/memory.py +45 -17
  26. aethergraph/contracts/services/metering.py +129 -0
  27. aethergraph/contracts/services/runs.py +50 -0
  28. aethergraph/contracts/services/sessions.py +87 -0
  29. aethergraph/contracts/services/state_stores.py +3 -0
  30. aethergraph/contracts/services/viz.py +44 -0
  31. aethergraph/contracts/storage/artifact_index.py +88 -0
  32. aethergraph/contracts/storage/artifact_store.py +99 -0
  33. aethergraph/contracts/storage/async_kv.py +34 -0
  34. aethergraph/contracts/storage/blob_store.py +50 -0
  35. aethergraph/contracts/storage/doc_store.py +35 -0
  36. aethergraph/contracts/storage/event_log.py +31 -0
  37. aethergraph/contracts/storage/vector_index.py +48 -0
  38. aethergraph/core/__init__.py +0 -0
  39. aethergraph/core/execution/forward_scheduler.py +13 -2
  40. aethergraph/core/execution/global_scheduler.py +21 -15
  41. aethergraph/core/execution/step_forward.py +10 -1
  42. aethergraph/core/graph/__init__.py +0 -0
  43. aethergraph/core/graph/graph_builder.py +8 -4
  44. aethergraph/core/graph/graph_fn.py +156 -15
  45. aethergraph/core/graph/graph_spec.py +8 -0
  46. aethergraph/core/graph/graphify.py +146 -27
  47. aethergraph/core/graph/node_spec.py +0 -2
  48. aethergraph/core/graph/node_state.py +3 -0
  49. aethergraph/core/graph/task_graph.py +39 -1
  50. aethergraph/core/runtime/__init__.py +0 -0
  51. aethergraph/core/runtime/ad_hoc_context.py +64 -4
  52. aethergraph/core/runtime/base_service.py +28 -4
  53. aethergraph/core/runtime/execution_context.py +13 -15
  54. aethergraph/core/runtime/graph_runner.py +222 -37
  55. aethergraph/core/runtime/node_context.py +510 -6
  56. aethergraph/core/runtime/node_services.py +12 -5
  57. aethergraph/core/runtime/recovery.py +15 -1
  58. aethergraph/core/runtime/run_manager.py +783 -0
  59. aethergraph/core/runtime/run_manager_local.py +204 -0
  60. aethergraph/core/runtime/run_registration.py +2 -2
  61. aethergraph/core/runtime/run_types.py +89 -0
  62. aethergraph/core/runtime/runtime_env.py +136 -7
  63. aethergraph/core/runtime/runtime_metering.py +71 -0
  64. aethergraph/core/runtime/runtime_registry.py +36 -13
  65. aethergraph/core/runtime/runtime_services.py +194 -6
  66. aethergraph/core/tools/builtins/toolset.py +1 -1
  67. aethergraph/core/tools/toolkit.py +5 -0
  68. aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
  69. aethergraph/plugins/agents/default_chat_agent.py +171 -0
  70. aethergraph/plugins/agents/shared.py +81 -0
  71. aethergraph/plugins/channel/adapters/webui.py +112 -112
  72. aethergraph/plugins/channel/routes/webui_routes.py +367 -102
  73. aethergraph/plugins/channel/utils/slack_utils.py +115 -59
  74. aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
  75. aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
  76. aethergraph/runtime/__init__.py +15 -0
  77. aethergraph/server/app_factory.py +190 -34
  78. aethergraph/server/clients/channel_client.py +202 -0
  79. aethergraph/server/http/channel_http_routes.py +116 -0
  80. aethergraph/server/http/channel_ws_routers.py +45 -0
  81. aethergraph/server/loading.py +117 -0
  82. aethergraph/server/server.py +131 -0
  83. aethergraph/server/server_state.py +240 -0
  84. aethergraph/server/start.py +227 -66
  85. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  86. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  87. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  88. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  89. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  90. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  91. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  92. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  93. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  94. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  95. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  96. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  97. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  98. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  99. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  100. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  101. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  102. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  103. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  104. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  105. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  106. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  107. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  108. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  109. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  110. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  111. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  112. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  113. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  114. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  115. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  116. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  117. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  118. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  119. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  120. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  121. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  122. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  123. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  124. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  125. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  126. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  127. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  128. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  129. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  130. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  131. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  132. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  133. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  134. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  135. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  136. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  137. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  138. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  139. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  140. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  141. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  142. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  143. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  144. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
  145. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
  146. aethergraph/server/ui_static/index.html +15 -0
  147. aethergraph/server/ui_static/logo.png +0 -0
  148. aethergraph/services/artifacts/__init__.py +0 -0
  149. aethergraph/services/artifacts/facade.py +1239 -132
  150. aethergraph/services/auth/{dev.py → authn.py} +0 -8
  151. aethergraph/services/auth/authz.py +100 -0
  152. aethergraph/services/channel/__init__.py +0 -0
  153. aethergraph/services/channel/channel_bus.py +19 -1
  154. aethergraph/services/channel/factory.py +13 -1
  155. aethergraph/services/channel/ingress.py +311 -0
  156. aethergraph/services/channel/queue_adapter.py +75 -0
  157. aethergraph/services/channel/session.py +502 -19
  158. aethergraph/services/container/default_container.py +122 -43
  159. aethergraph/services/continuations/continuation.py +6 -0
  160. aethergraph/services/continuations/stores/fs_store.py +19 -0
  161. aethergraph/services/eventhub/event_hub.py +76 -0
  162. aethergraph/services/kv/__init__.py +0 -0
  163. aethergraph/services/kv/ephemeral.py +244 -0
  164. aethergraph/services/llm/__init__.py +0 -0
  165. aethergraph/services/llm/generic_client copy.py +691 -0
  166. aethergraph/services/llm/generic_client.py +1288 -187
  167. aethergraph/services/llm/providers.py +3 -1
  168. aethergraph/services/llm/types.py +47 -0
  169. aethergraph/services/llm/utils.py +284 -0
  170. aethergraph/services/logger/std.py +3 -0
  171. aethergraph/services/mcp/__init__.py +9 -0
  172. aethergraph/services/mcp/http_client.py +38 -0
  173. aethergraph/services/mcp/service.py +225 -1
  174. aethergraph/services/mcp/stdio_client.py +41 -6
  175. aethergraph/services/mcp/ws_client.py +44 -2
  176. aethergraph/services/memory/__init__.py +0 -0
  177. aethergraph/services/memory/distillers/llm_long_term.py +234 -0
  178. aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
  179. aethergraph/services/memory/distillers/long_term.py +225 -0
  180. aethergraph/services/memory/facade/__init__.py +3 -0
  181. aethergraph/services/memory/facade/chat.py +440 -0
  182. aethergraph/services/memory/facade/core.py +447 -0
  183. aethergraph/services/memory/facade/distillation.py +424 -0
  184. aethergraph/services/memory/facade/rag.py +410 -0
  185. aethergraph/services/memory/facade/results.py +315 -0
  186. aethergraph/services/memory/facade/retrieval.py +139 -0
  187. aethergraph/services/memory/facade/types.py +77 -0
  188. aethergraph/services/memory/facade/utils.py +43 -0
  189. aethergraph/services/memory/facade_dep.py +1539 -0
  190. aethergraph/services/memory/factory.py +9 -3
  191. aethergraph/services/memory/utils.py +10 -0
  192. aethergraph/services/metering/eventlog_metering.py +470 -0
  193. aethergraph/services/metering/noop.py +25 -4
  194. aethergraph/services/rag/__init__.py +0 -0
  195. aethergraph/services/rag/facade.py +279 -23
  196. aethergraph/services/rag/index_factory.py +2 -2
  197. aethergraph/services/rag/node_rag.py +317 -0
  198. aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
  199. aethergraph/services/registry/__init__.py +0 -0
  200. aethergraph/services/registry/agent_app_meta.py +419 -0
  201. aethergraph/services/registry/registry_key.py +1 -1
  202. aethergraph/services/registry/unified_registry.py +74 -6
  203. aethergraph/services/scope/scope.py +159 -0
  204. aethergraph/services/scope/scope_factory.py +164 -0
  205. aethergraph/services/state_stores/serialize.py +5 -0
  206. aethergraph/services/state_stores/utils.py +2 -1
  207. aethergraph/services/viz/__init__.py +0 -0
  208. aethergraph/services/viz/facade.py +413 -0
  209. aethergraph/services/viz/viz_service.py +69 -0
  210. aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
  211. aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
  212. aethergraph/storage/artifacts/cas_store.py +422 -0
  213. aethergraph/storage/artifacts/fs_cas.py +18 -0
  214. aethergraph/storage/artifacts/s3_cas.py +14 -0
  215. aethergraph/storage/artifacts/utils.py +124 -0
  216. aethergraph/storage/blob/fs_blob.py +86 -0
  217. aethergraph/storage/blob/s3_blob.py +115 -0
  218. aethergraph/storage/continuation_store/fs_cont.py +283 -0
  219. aethergraph/storage/continuation_store/inmem_cont.py +146 -0
  220. aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
  221. aethergraph/storage/docstore/fs_doc.py +63 -0
  222. aethergraph/storage/docstore/sqlite_doc.py +31 -0
  223. aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
  224. aethergraph/storage/eventlog/fs_event.py +136 -0
  225. aethergraph/storage/eventlog/sqlite_event.py +47 -0
  226. aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
  227. aethergraph/storage/factory.py +432 -0
  228. aethergraph/storage/fs_utils.py +28 -0
  229. aethergraph/storage/graph_state_store/state_store.py +64 -0
  230. aethergraph/storage/kv/inmem_kv.py +103 -0
  231. aethergraph/storage/kv/layered_kv.py +52 -0
  232. aethergraph/storage/kv/sqlite_kv.py +39 -0
  233. aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
  234. aethergraph/storage/memory/event_persist.py +68 -0
  235. aethergraph/storage/memory/fs_persist.py +118 -0
  236. aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
  237. aethergraph/{services → storage}/memory/indices.py +31 -7
  238. aethergraph/storage/metering/meter_event.py +55 -0
  239. aethergraph/storage/runs/doc_store.py +280 -0
  240. aethergraph/storage/runs/inmen_store.py +82 -0
  241. aethergraph/storage/runs/sqlite_run_store.py +403 -0
  242. aethergraph/storage/sessions/doc_store.py +183 -0
  243. aethergraph/storage/sessions/inmem_store.py +110 -0
  244. aethergraph/storage/sessions/sqlite_session_store.py +399 -0
  245. aethergraph/storage/vector_index/chroma_index.py +138 -0
  246. aethergraph/storage/vector_index/faiss_index.py +179 -0
  247. aethergraph/storage/vector_index/sqlite_index.py +187 -0
  248. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
  249. aethergraph-0.1.0a2.dist-info/RECORD +356 -0
  250. aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  251. aethergraph/services/artifacts/factory.py +0 -35
  252. aethergraph/services/artifacts/fs_store.py +0 -656
  253. aethergraph/services/artifacts/jsonl_index.py +0 -123
  254. aethergraph/services/artifacts/sqlite_index.py +0 -209
  255. aethergraph/services/memory/distillers/episode.py +0 -116
  256. aethergraph/services/memory/distillers/rolling.py +0 -74
  257. aethergraph/services/memory/facade.py +0 -633
  258. aethergraph/services/memory/persist_fs.py +0 -40
  259. aethergraph/services/rag/index/base.py +0 -27
  260. aethergraph/services/rag/index/faiss_index.py +0 -121
  261. aethergraph/services/rag/index/sqlite_index.py +0 -134
  262. aethergraph-0.1.0a1.dist-info/RECORD +0 -182
  263. aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
  264. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
  265. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
  266. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
  267. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,783 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from datetime import datetime, timezone
5
+ from typing import Any
6
+ from uuid import uuid4
7
+
8
+ from aethergraph.api.v1.deps import RequestIdentity
9
+ from aethergraph.contracts.errors.errors import GraphHasPendingWaits
10
+ from aethergraph.contracts.services.runs import RunStore
11
+ from aethergraph.core.execution.forward_scheduler import ForwardScheduler
12
+ from aethergraph.core.execution.global_scheduler import GlobalForwardScheduler
13
+ from aethergraph.core.runtime.run_types import (
14
+ RunImportance,
15
+ RunOrigin,
16
+ RunRecord,
17
+ RunStatus,
18
+ RunVisibility,
19
+ )
20
+ from aethergraph.core.runtime.runtime_metering import current_metering
21
+ from aethergraph.core.runtime.runtime_registry import current_registry
22
+ from aethergraph.core.runtime.runtime_services import current_services
23
+ from aethergraph.services.registry.unified_registry import UnifiedRegistry
24
+
25
+
26
+ def _utcnow() -> datetime:
27
+ return datetime.now(tz=timezone.utc)
28
+
29
+
30
+ def _is_task_graph(obj: Any) -> bool:
31
+ # Replace with proper isinstance check in your codebase
32
+ return hasattr(obj, "spec") and hasattr(obj, "io_signature")
33
+
34
+
35
+ def _is_graphfn(obj: Any) -> bool:
36
+ from aethergraph.core.graph.graph_fn import GraphFunction # adjust path
37
+
38
+ return isinstance(obj, GraphFunction)
39
+
40
+
41
+ class RunManager:
42
+ """
43
+ High-level coordinator for running graphs.
44
+
45
+ Responsibilities
46
+ ----------------
47
+ - Resolve graph targets (TaskGraph / GraphFunction) from the UnifiedRegistry.
48
+ - Create and persist RunRecord metadata in the RunStore.
49
+ - Enforce a soft concurrency limit via an in-process run slot counter.
50
+ - Drive execution via run_or_resume_async and record status / errors.
51
+ - Emit metering events (duration, status, user/org, graph_id).
52
+ - Best-effort cancellation by talking to the scheduler registry.
53
+
54
+ Key entrypoints
55
+ ---------------
56
+ submit_run(...)
57
+ Non-blocking API entrypoint (used by HTTP routes).
58
+ - Acquires a run slot (respecting max_concurrent_runs).
59
+ - Creates a RunRecord (status=running) and saves it.
60
+ - Schedules a background coroutine (_bg) that:
61
+ * Calls _run_and_finalize(...)
62
+ * Always releases the run slot in a finally block.
63
+ - Returns immediately with the RunRecord so the caller can poll status.
64
+
65
+ start_run(...)
66
+ Blocking helper (tests / CLI).
67
+ - Same setup as submit_run, but runs _run_and_finalize(...) inline.
68
+ - Returns (RunRecord, outputs, has_waits, continuations).
69
+
70
+ _run_and_finalize(...)
71
+ Shared core logic used by both submit_run and start_run.
72
+ - Calls run_or_resume_async(target, inputs, run_id, session_id).
73
+ - Maps successful results into a dict of outputs.
74
+ - Handles:
75
+ * Normal completion -> status = succeeded.
76
+ * GraphHasPendingWaits -> status = failed (for now), has_waits=True.
77
+ * asyncio.CancelledError -> status = canceled.
78
+ * Other exceptions -> status = failed, error message recorded.
79
+ - Updates RunStore status fields (finished_at, error).
80
+ - Sends a metering event with status / duration.
81
+
82
+ Concurrency model
83
+ -----------------
84
+ - _acquire_run_slot / _release_run_slot protect a _running counter with an
85
+ asyncio.Lock to enforce max_concurrent_runs within this process.
86
+ - submit_run takes ownership of a slot until responsibility is handed to
87
+ the background runner (_bg). Once _bg is scheduled, it is responsible
88
+ for releasing the slot in its finally block.
89
+ - If submit_run fails before the handoff, it releases the slot itself to
90
+ avoid leaks.
91
+
92
+ Cancellation
93
+ ------------
94
+ cancel_run(run_id)
95
+ - Looks up the RunRecord (if available) and, if not terminal, marks it
96
+ as cancellation_requested in the RunStore.
97
+ - Uses the scheduler registry to find the scheduler for this run:
98
+ * GlobalForwardScheduler: terminate_run(run_id)
99
+ * ForwardScheduler: terminate()
100
+ - The actual transition to RunStatus.canceled happens when the
101
+ scheduler cancels the task and run_or_resume_async raises
102
+ asyncio.CancelledError, which _run_and_finalize() translates into
103
+ a canceled run.
104
+
105
+ TODO: for global schedulers, we may want to have a dedicated run manager -- current
106
+ implementation utilize the async_run which create a local ForwardScheduler instance
107
+ each graph run. This is fine for concurrent graphs under thousands but may
108
+ not scale well for large number of concurrent graphs.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ *,
114
+ run_store: RunStore | None = None,
115
+ registry: UnifiedRegistry | None = None,
116
+ sched_registry: Any | None = None, # placeholder for future use
117
+ max_concurrent_runs: int | None = None,
118
+ ):
119
+ self._store = run_store
120
+ self._registry = registry
121
+ self._sched_registry = sched_registry
122
+ self._max_concurrent_runs = max_concurrent_runs
123
+ self._running = 0
124
+ self._lock = asyncio.Lock()
125
+ self._run_waiters: dict[str, asyncio.Future] = {}
126
+ self._run_waiters_lock = (
127
+ asyncio.Lock()
128
+ ) # no need for thread lock because run_manager is used within event loop
129
+
130
+ # -------- concurrency helpers --------
131
+ async def _acquire_run_slot(self) -> None:
132
+ if self._max_concurrent_runs is None:
133
+ return
134
+ async with self._lock:
135
+ if self._running >= self._max_concurrent_runs:
136
+ from fastapi import HTTPException, status
137
+
138
+ raise HTTPException(
139
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
140
+ detail="Too many runs are currently executing. Please wait and try again.",
141
+ )
142
+ self._running += 1
143
+
144
+ async def _release_run_slot(self) -> None:
145
+ if self._max_concurrent_runs is None:
146
+ return
147
+ async with self._lock:
148
+ self._running = max(0, self._running - 1)
149
+
150
+ # -------- registry helpers --------
151
+
152
+ def registry(self) -> UnifiedRegistry:
153
+ return self._registry or current_registry()
154
+
155
+ async def _resolve_target(self, graph_id: str) -> Any:
156
+ reg = self.registry()
157
+ # Try static TaskGraph
158
+ try:
159
+ return reg.get_graph(name=graph_id, version=None)
160
+ except KeyError:
161
+ pass
162
+ # Try GraphFunction
163
+ try:
164
+ return reg.get_graphfn(name=graph_id, version=None)
165
+ except KeyError:
166
+ pass
167
+ raise KeyError(f"Graph '{graph_id}' not found")
168
+
169
+ # -------- core execution helper --------
170
+
171
+ async def _run_and_finalize(
172
+ self,
173
+ *,
174
+ record: RunRecord,
175
+ target: Any,
176
+ graph_id: str,
177
+ inputs: dict[str, Any],
178
+ identity: RequestIdentity,
179
+ # user_id: str | None,
180
+ # org_id: str | None,
181
+ ) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
182
+ """
183
+ Shared core logic that actually calls run_or_resume_async, updates
184
+ RunStore, and records metering.
185
+
186
+ Returns:
187
+ (record, outputs, has_waits, continuations)
188
+ """
189
+ from aethergraph.core.runtime.graph_runner import run_or_resume_async
190
+
191
+ user_id = identity.user_id
192
+ org_id = identity.org_id
193
+
194
+ # tags = record.tags or []
195
+ started_at = record.started_at or _utcnow()
196
+
197
+ outputs: dict[str, Any] | None = None
198
+ has_waits = False
199
+ continuations: list[dict[str, Any]] = []
200
+ error_msg: str | None = None
201
+
202
+ try:
203
+ result = await run_or_resume_async(
204
+ target,
205
+ inputs or {},
206
+ run_id=record.run_id,
207
+ session_id=record.meta.get("session_id"),
208
+ identity=identity,
209
+ agent_id=record.agent_id,
210
+ app_id=record.app_id,
211
+ )
212
+ # If we get here without GraphHasPendingWaits, run is completed
213
+ outputs = result if isinstance(result, dict) else {"result": result}
214
+ record.status = RunStatus.succeeded
215
+ record.finished_at = _utcnow()
216
+
217
+ except asyncio.CancelledError:
218
+ # Cancellation path: scheduler.terminate() or external cancel.
219
+ import logging
220
+
221
+ record.status = RunStatus.canceled
222
+ record.finished_at = _utcnow()
223
+ error_msg = "Run cancelled by user"
224
+ logging.getLogger("aethergraph.runtime.run_manager").info(
225
+ "Run %s was cancelled", record.run_id
226
+ )
227
+
228
+ except GraphHasPendingWaits as e:
229
+ # Graph quiesced with pending waits
230
+ record.status = RunStatus.failed # consider 'waiting' status later
231
+ has_waits = True
232
+ continuations = getattr(e, "continuations", [])
233
+ # outputs remain None
234
+
235
+ except Exception as exc: # noqa: BLE001
236
+ record.status = RunStatus.failed
237
+ record.finished_at = _utcnow()
238
+ error_msg = str(exc)
239
+ record.error = error_msg
240
+ import logging
241
+
242
+ logging.getLogger("aethergraph.runtime.run_manager").exception(
243
+ "Run %s failed with exception: %s", record.run_id, error_msg
244
+ )
245
+
246
+ # Persist status update
247
+ if self._store is not None:
248
+ await self._store.update_status(
249
+ record.run_id,
250
+ record.status,
251
+ finished_at=record.finished_at,
252
+ error=error_msg,
253
+ )
254
+
255
+ # Metering
256
+ meter = current_metering()
257
+ finished_at = record.finished_at or _utcnow()
258
+ duration_s = (finished_at - started_at).total_seconds()
259
+
260
+ if has_waits:
261
+ meter_status = "waiting"
262
+ else:
263
+ status_str = getattr(record.status, "value", str(record.status))
264
+ meter_status = status_str
265
+
266
+ try:
267
+ await meter.record_run(
268
+ user_id=user_id,
269
+ org_id=org_id,
270
+ run_id=record.run_id,
271
+ graph_id=graph_id,
272
+ status=meter_status,
273
+ duration_s=duration_s,
274
+ )
275
+ except Exception: # noqa: BLE001
276
+ import logging
277
+
278
+ logging.getLogger("aethergraph.runtime.run_manager").exception(
279
+ "Error recording run metering for run_id=%s", record.run_id
280
+ )
281
+
282
+ try:
283
+ if record.status in {RunStatus.succeeded, RunStatus.failed, RunStatus.canceled}:
284
+ await self._resolve_run_future(record.run_id, record)
285
+ except Exception: # noqa: BLE001
286
+ import logging
287
+
288
+ logging.getLogger("aethergraph.runtime.run_manager").exception(
289
+ "Error resolving run future for run_id=%s", record.run_id
290
+ )
291
+
292
+ return record, outputs, has_waits, continuations
293
+
294
+ # -------- new: non-blocking submit_run --------
295
+
296
+ async def submit_run(
297
+ self,
298
+ graph_id: str,
299
+ *,
300
+ inputs: dict[str, Any],
301
+ run_id: str | None = None,
302
+ session_id: str | None = None,
303
+ tags: list[str] | None = None,
304
+ identity: RequestIdentity | None = None,
305
+ origin: RunOrigin | None = None,
306
+ visibility: RunVisibility | None = None,
307
+ importance: RunImportance | None = None,
308
+ agent_id: str | None = None,
309
+ app_id: str | None = None,
310
+ ) -> RunRecord:
311
+ """
312
+ Non-blocking entrypoint for the HTTP API.
313
+
314
+ - Creates a RunRecord (status=running).
315
+ - Persists it to RunStore.
316
+ - Schedules background execution via asyncio.create_task.
317
+ - Returns immediately with the record (for run_id, status, etc).
318
+ """
319
+ if identity is None:
320
+ identity = RequestIdentity(user_id="local", org_id="local", mode="local")
321
+
322
+ user_id = identity.user_id
323
+ org_id = identity.org_id
324
+
325
+ # Acquire run slot (rate limiting)
326
+ await self._acquire_run_slot()
327
+ # Tracks whether responsibility for releasing the slot has been handed
328
+ # over to the background runner (_bg). If False, submit_run must
329
+ # release the slot on exception; if True, _bg will do it its finally.
330
+ slot_handed_to_bg = False
331
+
332
+ try:
333
+ tags = tags or []
334
+ target = await self._resolve_target(graph_id)
335
+ rid = run_id or f"run-{uuid4().hex[:8]}"
336
+ started_at = _utcnow()
337
+
338
+ if _is_task_graph(target):
339
+ kind = "taskgraph"
340
+ elif _is_graphfn(target):
341
+ kind = "graphfn"
342
+ else:
343
+ kind = "other"
344
+
345
+ # pull flow_id and entrypoint from registry if possible
346
+ flow_id: str | None = None
347
+ reg = self.registry()
348
+ if reg is not None:
349
+ if kind == "taskgraph":
350
+ meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
351
+ elif kind == "graphfn":
352
+ meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
353
+ else:
354
+ meta = {}
355
+ flow_id = meta.get("flow_id") or graph_id
356
+
357
+ # use run_id as session_id if not provided
358
+ if session_id is None:
359
+ session_id = rid
360
+
361
+ record = RunRecord(
362
+ run_id=rid,
363
+ graph_id=graph_id,
364
+ kind=kind,
365
+ status=RunStatus.running, # we go straight to running as before
366
+ started_at=started_at,
367
+ tags=list(tags),
368
+ user_id=user_id,
369
+ org_id=org_id,
370
+ meta={},
371
+ session_id=session_id,
372
+ origin=origin or RunOrigin.app, # app is a typical default for graph runs
373
+ visibility=visibility or RunVisibility.normal,
374
+ importance=importance or RunImportance.normal,
375
+ agent_id=agent_id,
376
+ app_id=app_id,
377
+ )
378
+
379
+ if flow_id:
380
+ record.meta["flow_id"] = flow_id
381
+ if f"flow:{flow_id}" not in record.tags:
382
+ record.tags.append(f"flow:{flow_id}") # add flow tag if missing
383
+ if session_id:
384
+ record.meta["session_id"] = session_id
385
+ if f"session:{session_id}" not in record.tags:
386
+ record.tags.append(f"session:{session_id}") # add session tag if missing
387
+
388
+ if self._store is not None:
389
+ await self._store.create(record)
390
+
391
+ async def _bg():
392
+ try:
393
+ await self._run_and_finalize(
394
+ record=record,
395
+ target=target,
396
+ graph_id=graph_id,
397
+ inputs=inputs,
398
+ # user_id=user_id,
399
+ # org_id=org_id,
400
+ identity=identity,
401
+ )
402
+ finally:
403
+ await self._release_run_slot()
404
+
405
+ # If we're in an event loop (server), schedule in the background.
406
+ # If not (CLI), just run inline so behaviour is still sane.
407
+ try:
408
+ loop = asyncio.get_running_loop()
409
+ except RuntimeError:
410
+ # Not inside a running loop – e.g., CLI usage.
411
+ slot_handed_to_bg = True
412
+ # _bg() is responsible for releasing the slot in its finally.
413
+ await _bg()
414
+ else:
415
+ slot_handed_to_bg = True
416
+ # Background tasks; _bg() will release the slot in its finally.
417
+ loop.create_task(_bg())
418
+
419
+ return record
420
+ except Exception:
421
+ # If submit_run itself fails *before* handing off to _bg, we must release the slot here.
422
+ # Once slot_handed_to_bg is True, _bg is responsible for releasing the slot.
423
+ if not slot_handed_to_bg:
424
+ await self._release_run_slot()
425
+ raise
426
+
427
+ async def run_and_wait(
428
+ self,
429
+ graph_id: str,
430
+ *,
431
+ inputs: dict[str, Any],
432
+ run_id: str | None = None,
433
+ session_id: str | None = None,
434
+ tags: list[str] | None = None,
435
+ identity: RequestIdentity | None = None,
436
+ origin: RunOrigin | None = None,
437
+ visibility: RunVisibility | None = None,
438
+ importance: RunImportance | None = None,
439
+ agent_id: str | None = None,
440
+ app_id: str | None = None,
441
+ count_slot: bool = False, # important for nested orchestration
442
+ ) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
443
+ """
444
+ Blocking run that still goes through RunStore so UI can visualize it.
445
+
446
+ - Creates + persists RunRecord (status=running)
447
+ - Runs inline (awaits completion)
448
+ - Updates RunStore status + metering (via _run_and_finalize)
449
+ - Returns (record, outputs, has_waits, continuations)
450
+
451
+ count_slot=False is recommended for "parent run awaiting child run" orchestration
452
+ to avoid deadlocks when max_concurrent_runs is small.
453
+ """
454
+ if identity is None:
455
+ identity = RequestIdentity(user_id="local", org_id="local", mode="local")
456
+
457
+ if count_slot:
458
+ await self._acquire_run_slot()
459
+
460
+ try:
461
+ tags = tags or []
462
+ target = await self._resolve_target(
463
+ graph_id
464
+ ) # same resolver as submit_run :contentReference[oaicite:1]{index=1}
465
+ rid = run_id or f"run-{uuid4().hex[:8]}"
466
+ started_at = _utcnow()
467
+
468
+ if _is_task_graph(target):
469
+ kind = "taskgraph"
470
+ elif _is_graphfn(target):
471
+ kind = "graphfn"
472
+ else:
473
+ kind = "other"
474
+
475
+ # flow_id extraction same pattern as submit_run :contentReference[oaicite:2]{index=2}
476
+ flow_id: str | None = None
477
+ reg = self.registry()
478
+ if reg is not None:
479
+ if kind == "taskgraph":
480
+ meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
481
+ elif kind == "graphfn":
482
+ meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
483
+ else:
484
+ meta = {}
485
+ flow_id = meta.get("flow_id") or graph_id
486
+
487
+ if session_id is None:
488
+ session_id = rid
489
+
490
+ record = RunRecord(
491
+ run_id=rid,
492
+ graph_id=graph_id,
493
+ kind=kind,
494
+ status=RunStatus.running,
495
+ started_at=started_at,
496
+ tags=list(tags),
497
+ user_id=identity.user_id,
498
+ org_id=identity.org_id,
499
+ meta={},
500
+ session_id=session_id,
501
+ origin=origin or RunOrigin.app,
502
+ visibility=visibility or RunVisibility.normal,
503
+ importance=importance or RunImportance.normal,
504
+ agent_id=agent_id,
505
+ app_id=app_id,
506
+ )
507
+
508
+ if flow_id:
509
+ record.meta["flow_id"] = flow_id
510
+ if f"flow:{flow_id}" not in record.tags:
511
+ record.tags.append(f"flow:{flow_id}")
512
+ if session_id:
513
+ record.meta["session_id"] = session_id
514
+ if f"session:{session_id}" not in record.tags:
515
+ record.tags.append(f"session:{session_id}")
516
+
517
+ if self._store is not None:
518
+ await self._store.create(record)
519
+
520
+ # Inline execution; still uses run_or_resume_async under the hood :contentReference[oaicite:3]{index=3}
521
+ return await self._run_and_finalize(
522
+ record=record,
523
+ target=target,
524
+ graph_id=graph_id,
525
+ inputs=inputs,
526
+ identity=identity,
527
+ )
528
+ finally:
529
+ if count_slot:
530
+ await self._release_run_slot()
531
+
532
+ # -------- old: blocking start_run (CLI/tests) --------
533
+ async def start_run(
534
+ self,
535
+ graph_id: str,
536
+ *,
537
+ inputs: dict[str, Any],
538
+ run_id: str | None = None,
539
+ session_id: str | None = None,
540
+ tags: list[str] | None = None,
541
+ identity: RequestIdentity | None = None,
542
+ agent_id: str | None = None,
543
+ app_id: str | None = None,
544
+ ) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
545
+ """
546
+ Blocking helper (original behaviour).
547
+
548
+ - Resolves target.
549
+ - Creates RunRecord with status=running.
550
+ - Runs once via run_or_resume_async.
551
+ - Updates store + metering.
552
+ - Returns (record, outputs, has_waits, continuations).
553
+
554
+ Still useful for tests/CLI, but the HTTP route should prefer submit_run().
555
+
556
+ NOTE:
557
+ agent_id and app_id will override any value pulled from original graphs. Use it
558
+ only when you want to explicitly set these fields for tracking purpose.
559
+ """
560
+ if identity is None:
561
+ identity = RequestIdentity(user_id="local", org_id="local", mode="local")
562
+
563
+ tags = tags or []
564
+ target = await self._resolve_target(graph_id)
565
+ rid = run_id or f"run-{uuid4().hex[:8]}"
566
+ started_at = _utcnow()
567
+
568
+ if _is_task_graph(target):
569
+ kind = "taskgraph"
570
+ elif _is_graphfn(target):
571
+ kind = "graphfn"
572
+ else:
573
+ kind = "other"
574
+
575
+ # pull flow_id and entrypoint from registry if possible
576
+ flow_id: str | None = None
577
+ reg = self.registry()
578
+ if reg is not None:
579
+ if kind == "taskgraph":
580
+ meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
581
+ elif kind == "graphfn":
582
+ meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
583
+ else:
584
+ meta = {}
585
+ flow_id = meta.get("flow_id") or graph_id
586
+
587
+ # use run_id as session_id if not provided
588
+ if session_id is None:
589
+ session_id = rid
590
+
591
+ record = RunRecord(
592
+ run_id=rid,
593
+ graph_id=graph_id,
594
+ kind=kind,
595
+ status=RunStatus.running, # we go straight to running as before
596
+ started_at=started_at,
597
+ tags=list(tags),
598
+ user_id=identity.user_id,
599
+ org_id=identity.org_id,
600
+ meta={},
601
+ session_id=session_id,
602
+ origin=RunOrigin.app, # app is a typical default for graph runs
603
+ visibility=RunVisibility.normal,
604
+ importance=RunImportance.normal,
605
+ agent_id=agent_id,
606
+ app_id=app_id,
607
+ )
608
+
609
+ if flow_id:
610
+ record.meta["flow_id"] = flow_id
611
+ if f"flow:{flow_id}" not in record.tags:
612
+ record.tags.append(f"flow:{flow_id}") # add flow tag if missing
613
+ if session_id:
614
+ record.meta["session_id"] = session_id
615
+ if f"session:{session_id}" not in record.tags:
616
+ record.tags.append(f"session:{session_id}") # add session tag if missing
617
+
618
+ if self._store is not None:
619
+ await self._store.create(record)
620
+
621
+ return await self._run_and_finalize(
622
+ record=record,
623
+ target=target,
624
+ graph_id=graph_id,
625
+ inputs=inputs,
626
+ identity=identity,
627
+ # agent_id=agent_id,
628
+ # app_id=app_id,
629
+ )
630
+
631
+ async def get_record(self, run_id: str) -> RunRecord | None:
632
+ if self._store is None:
633
+ return None
634
+ out = await self._store.get(run_id)
635
+ return out
636
+
637
+ async def list_records(
638
+ self,
639
+ *,
640
+ graph_id: str | None = None,
641
+ status: RunStatus | None = None,
642
+ flow_id: str | None = None,
643
+ user_id: str | None = None,
644
+ org_id: str | None = None,
645
+ session_id: str | None = None,
646
+ limit: int = 100,
647
+ offset: int = 0,
648
+ ) -> list[RunRecord]:
649
+ records = await self._store.list(
650
+ graph_id=graph_id,
651
+ status=status,
652
+ user_id=user_id,
653
+ org_id=org_id,
654
+ session_id=session_id,
655
+ limit=limit,
656
+ offset=offset,
657
+ )
658
+ # Optional: still filter flow_id in Python for now since it's in meta/tags
659
+ if flow_id is not None:
660
+ records = [rec for rec in records if (rec.meta or {}).get("flow_id") == flow_id]
661
+
662
+ return records
663
+
664
+ def _get_sched_registry(self):
665
+ if self._sched_registry is not None:
666
+ return self._sched_registry
667
+ try:
668
+ container = current_services()
669
+ except Exception:
670
+ return None
671
+ return getattr(container, "sched_registry", None)
672
+
673
+ async def cancel_run(self, run_id: str) -> RunRecord | None:
674
+ """
675
+ Best-effort cancellation for a run.
676
+
677
+ Behaviour:
678
+ - If the run is found and not yet terminal:
679
+ - Mark status = cancellation_requested and persist.
680
+ - Look up scheduler in sched_registry and call terminate().
681
+ - If the run is already terminal, return it unchanged.
682
+ - If no record is found, we still try scheduler-level termination
683
+ (in case the run hasn't been persisted yet), then return None.
684
+
685
+ The actual transition to RunStatus.canceled happens inside
686
+ _run_and_finalize() when the scheduler raises asyncio.CancelledError.
687
+ """
688
+ record: RunRecord | None = None
689
+ if self._store is not None:
690
+ record = await self._store.get(run_id)
691
+
692
+ # Helper: scheduler-level termination
693
+ async def _terminate_scheduler() -> None:
694
+ reg = self._get_sched_registry()
695
+ if reg is None:
696
+ return
697
+ sched = reg.get(run_id)
698
+ if sched is None:
699
+ return
700
+
701
+ try:
702
+ # if local scheduler -> terminate
703
+ # if global scheduler -> terminate_run(run_id)
704
+ if isinstance(sched, GlobalForwardScheduler):
705
+ await sched.terminate_run(run_id)
706
+ return
707
+ elif isinstance(sched, ForwardScheduler):
708
+ await sched.terminate()
709
+ return
710
+ except Exception: # noqa: BLE001
711
+ import logging
712
+
713
+ logging.getLogger("aethergraph.runtime.run_manager").exception(
714
+ "Error terminating scheduler for run_id=%s", run_id
715
+ )
716
+
717
+ # No record in store – still try to terminate scheduler, then bail
718
+ if record is None:
719
+ await _terminate_scheduler()
720
+ return None
721
+
722
+ # If already terminal, don't change status
723
+ if record.status in {
724
+ RunStatus.succeeded,
725
+ RunStatus.failed,
726
+ RunStatus.canceled,
727
+ }:
728
+ return record
729
+
730
+ # Mark cancellation requested so UI can react immediately
731
+ record.status = RunStatus.cancellation_requested
732
+ if self._store is not None:
733
+ await self._store.update_status(
734
+ run_id,
735
+ RunStatus.cancellation_requested,
736
+ finished_at=None,
737
+ error=None,
738
+ )
739
+
740
+ # Ask the scheduler to stop
741
+ await _terminate_scheduler()
742
+
743
+ return record
744
+
745
+ # ------- run waiters for orchestration --------
746
+ async def wait_run(
747
+ self,
748
+ run_id: str,
749
+ *,
750
+ timeout_s: float | None = None,
751
+ ) -> RunRecord:
752
+ # Fast path: already terminal in store
753
+ rec = await self.get_record(run_id)
754
+ if rec and rec.status in {RunStatus.succeeded, RunStatus.failed, RunStatus.canceled}:
755
+ return rec
756
+
757
+ fut = await self._get_or_create_run_future(run_id)
758
+ if timeout_s is not None:
759
+ return await asyncio.wait_for(fut, timeout=timeout_s)
760
+ return await fut
761
+
762
+ async def _get_or_create_run_future(self, run_id: str) -> asyncio.Future:
763
+ async with self._run_waiters_lock:
764
+ fut = self._run_waiters.get(run_id)
765
+ if fut is None or fut.done():
766
+ fut = asyncio.get_running_loop().create_future()
767
+ self._run_waiters[run_id] = fut
768
+ return fut
769
+
770
+ async def _resolve_run_future(self, run_id: str, value: Any) -> None:
771
+ async with self._run_waiters_lock:
772
+ fut = self._run_waiters.get(run_id)
773
+ if fut and not fut.done():
774
+ fut.set_result(value)
775
+ # optional cleanup
776
+ self._run_waiters.pop(run_id, None)
777
+
778
+ async def _reject_run_future(self, run_id: str, err: Exception) -> None:
779
+ async with self._run_waiters_lock:
780
+ fut = self._run_waiters.get(run_id)
781
+ if fut and not fut.done():
782
+ fut.set_exception(err)
783
+ self._run_waiters.pop(run_id, None)