aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +4 -10
- aethergraph/__main__.py +293 -0
- aethergraph/api/v1/__init__.py +0 -0
- aethergraph/api/v1/agents.py +46 -0
- aethergraph/api/v1/apps.py +70 -0
- aethergraph/api/v1/artifacts.py +415 -0
- aethergraph/api/v1/channels.py +89 -0
- aethergraph/api/v1/deps.py +168 -0
- aethergraph/api/v1/graphs.py +259 -0
- aethergraph/api/v1/identity.py +25 -0
- aethergraph/api/v1/memory.py +353 -0
- aethergraph/api/v1/misc.py +47 -0
- aethergraph/api/v1/pagination.py +29 -0
- aethergraph/api/v1/runs.py +568 -0
- aethergraph/api/v1/schemas.py +535 -0
- aethergraph/api/v1/session.py +323 -0
- aethergraph/api/v1/stats.py +201 -0
- aethergraph/api/v1/viz.py +152 -0
- aethergraph/config/config.py +22 -0
- aethergraph/config/loader.py +3 -2
- aethergraph/config/storage.py +209 -0
- aethergraph/contracts/__init__.py +0 -0
- aethergraph/contracts/services/__init__.py +0 -0
- aethergraph/contracts/services/artifacts.py +27 -14
- aethergraph/contracts/services/memory.py +45 -17
- aethergraph/contracts/services/metering.py +129 -0
- aethergraph/contracts/services/runs.py +50 -0
- aethergraph/contracts/services/sessions.py +87 -0
- aethergraph/contracts/services/state_stores.py +3 -0
- aethergraph/contracts/services/viz.py +44 -0
- aethergraph/contracts/storage/artifact_index.py +88 -0
- aethergraph/contracts/storage/artifact_store.py +99 -0
- aethergraph/contracts/storage/async_kv.py +34 -0
- aethergraph/contracts/storage/blob_store.py +50 -0
- aethergraph/contracts/storage/doc_store.py +35 -0
- aethergraph/contracts/storage/event_log.py +31 -0
- aethergraph/contracts/storage/vector_index.py +48 -0
- aethergraph/core/__init__.py +0 -0
- aethergraph/core/execution/forward_scheduler.py +13 -2
- aethergraph/core/execution/global_scheduler.py +21 -15
- aethergraph/core/execution/step_forward.py +10 -1
- aethergraph/core/graph/__init__.py +0 -0
- aethergraph/core/graph/graph_builder.py +8 -4
- aethergraph/core/graph/graph_fn.py +156 -15
- aethergraph/core/graph/graph_spec.py +8 -0
- aethergraph/core/graph/graphify.py +146 -27
- aethergraph/core/graph/node_spec.py +0 -2
- aethergraph/core/graph/node_state.py +3 -0
- aethergraph/core/graph/task_graph.py +39 -1
- aethergraph/core/runtime/__init__.py +0 -0
- aethergraph/core/runtime/ad_hoc_context.py +64 -4
- aethergraph/core/runtime/base_service.py +28 -4
- aethergraph/core/runtime/execution_context.py +13 -15
- aethergraph/core/runtime/graph_runner.py +222 -37
- aethergraph/core/runtime/node_context.py +510 -6
- aethergraph/core/runtime/node_services.py +12 -5
- aethergraph/core/runtime/recovery.py +15 -1
- aethergraph/core/runtime/run_manager.py +783 -0
- aethergraph/core/runtime/run_manager_local.py +204 -0
- aethergraph/core/runtime/run_registration.py +2 -2
- aethergraph/core/runtime/run_types.py +89 -0
- aethergraph/core/runtime/runtime_env.py +136 -7
- aethergraph/core/runtime/runtime_metering.py +71 -0
- aethergraph/core/runtime/runtime_registry.py +36 -13
- aethergraph/core/runtime/runtime_services.py +194 -6
- aethergraph/core/tools/builtins/toolset.py +1 -1
- aethergraph/core/tools/toolkit.py +5 -0
- aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
- aethergraph/plugins/agents/default_chat_agent.py +171 -0
- aethergraph/plugins/agents/shared.py +81 -0
- aethergraph/plugins/channel/adapters/webui.py +112 -112
- aethergraph/plugins/channel/routes/webui_routes.py +367 -102
- aethergraph/plugins/channel/utils/slack_utils.py +115 -59
- aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
- aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
- aethergraph/runtime/__init__.py +15 -0
- aethergraph/server/app_factory.py +190 -34
- aethergraph/server/clients/channel_client.py +202 -0
- aethergraph/server/http/channel_http_routes.py +116 -0
- aethergraph/server/http/channel_ws_routers.py +45 -0
- aethergraph/server/loading.py +117 -0
- aethergraph/server/server.py +131 -0
- aethergraph/server/server_state.py +240 -0
- aethergraph/server/start.py +227 -66
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
- aethergraph/server/ui_static/index.html +15 -0
- aethergraph/server/ui_static/logo.png +0 -0
- aethergraph/services/artifacts/__init__.py +0 -0
- aethergraph/services/artifacts/facade.py +1239 -132
- aethergraph/services/auth/{dev.py → authn.py} +0 -8
- aethergraph/services/auth/authz.py +100 -0
- aethergraph/services/channel/__init__.py +0 -0
- aethergraph/services/channel/channel_bus.py +19 -1
- aethergraph/services/channel/factory.py +13 -1
- aethergraph/services/channel/ingress.py +311 -0
- aethergraph/services/channel/queue_adapter.py +75 -0
- aethergraph/services/channel/session.py +502 -19
- aethergraph/services/container/default_container.py +122 -43
- aethergraph/services/continuations/continuation.py +6 -0
- aethergraph/services/continuations/stores/fs_store.py +19 -0
- aethergraph/services/eventhub/event_hub.py +76 -0
- aethergraph/services/kv/__init__.py +0 -0
- aethergraph/services/kv/ephemeral.py +244 -0
- aethergraph/services/llm/__init__.py +0 -0
- aethergraph/services/llm/generic_client copy.py +691 -0
- aethergraph/services/llm/generic_client.py +1288 -187
- aethergraph/services/llm/providers.py +3 -1
- aethergraph/services/llm/types.py +47 -0
- aethergraph/services/llm/utils.py +284 -0
- aethergraph/services/logger/std.py +3 -0
- aethergraph/services/mcp/__init__.py +9 -0
- aethergraph/services/mcp/http_client.py +38 -0
- aethergraph/services/mcp/service.py +225 -1
- aethergraph/services/mcp/stdio_client.py +41 -6
- aethergraph/services/mcp/ws_client.py +44 -2
- aethergraph/services/memory/__init__.py +0 -0
- aethergraph/services/memory/distillers/llm_long_term.py +234 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
- aethergraph/services/memory/distillers/long_term.py +225 -0
- aethergraph/services/memory/facade/__init__.py +3 -0
- aethergraph/services/memory/facade/chat.py +440 -0
- aethergraph/services/memory/facade/core.py +447 -0
- aethergraph/services/memory/facade/distillation.py +424 -0
- aethergraph/services/memory/facade/rag.py +410 -0
- aethergraph/services/memory/facade/results.py +315 -0
- aethergraph/services/memory/facade/retrieval.py +139 -0
- aethergraph/services/memory/facade/types.py +77 -0
- aethergraph/services/memory/facade/utils.py +43 -0
- aethergraph/services/memory/facade_dep.py +1539 -0
- aethergraph/services/memory/factory.py +9 -3
- aethergraph/services/memory/utils.py +10 -0
- aethergraph/services/metering/eventlog_metering.py +470 -0
- aethergraph/services/metering/noop.py +25 -4
- aethergraph/services/rag/__init__.py +0 -0
- aethergraph/services/rag/facade.py +279 -23
- aethergraph/services/rag/index_factory.py +2 -2
- aethergraph/services/rag/node_rag.py +317 -0
- aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
- aethergraph/services/registry/__init__.py +0 -0
- aethergraph/services/registry/agent_app_meta.py +419 -0
- aethergraph/services/registry/registry_key.py +1 -1
- aethergraph/services/registry/unified_registry.py +74 -6
- aethergraph/services/scope/scope.py +159 -0
- aethergraph/services/scope/scope_factory.py +164 -0
- aethergraph/services/state_stores/serialize.py +5 -0
- aethergraph/services/state_stores/utils.py +2 -1
- aethergraph/services/viz/__init__.py +0 -0
- aethergraph/services/viz/facade.py +413 -0
- aethergraph/services/viz/viz_service.py +69 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
- aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
- aethergraph/storage/artifacts/cas_store.py +422 -0
- aethergraph/storage/artifacts/fs_cas.py +18 -0
- aethergraph/storage/artifacts/s3_cas.py +14 -0
- aethergraph/storage/artifacts/utils.py +124 -0
- aethergraph/storage/blob/fs_blob.py +86 -0
- aethergraph/storage/blob/s3_blob.py +115 -0
- aethergraph/storage/continuation_store/fs_cont.py +283 -0
- aethergraph/storage/continuation_store/inmem_cont.py +146 -0
- aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
- aethergraph/storage/docstore/fs_doc.py +63 -0
- aethergraph/storage/docstore/sqlite_doc.py +31 -0
- aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
- aethergraph/storage/eventlog/fs_event.py +136 -0
- aethergraph/storage/eventlog/sqlite_event.py +47 -0
- aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
- aethergraph/storage/factory.py +432 -0
- aethergraph/storage/fs_utils.py +28 -0
- aethergraph/storage/graph_state_store/state_store.py +64 -0
- aethergraph/storage/kv/inmem_kv.py +103 -0
- aethergraph/storage/kv/layered_kv.py +52 -0
- aethergraph/storage/kv/sqlite_kv.py +39 -0
- aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
- aethergraph/storage/memory/event_persist.py +68 -0
- aethergraph/storage/memory/fs_persist.py +118 -0
- aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
- aethergraph/{services → storage}/memory/indices.py +31 -7
- aethergraph/storage/metering/meter_event.py +55 -0
- aethergraph/storage/runs/doc_store.py +280 -0
- aethergraph/storage/runs/inmen_store.py +82 -0
- aethergraph/storage/runs/sqlite_run_store.py +403 -0
- aethergraph/storage/sessions/doc_store.py +183 -0
- aethergraph/storage/sessions/inmem_store.py +110 -0
- aethergraph/storage/sessions/sqlite_session_store.py +399 -0
- aethergraph/storage/vector_index/chroma_index.py +138 -0
- aethergraph/storage/vector_index/faiss_index.py +179 -0
- aethergraph/storage/vector_index/sqlite_index.py +187 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
- aethergraph-0.1.0a2.dist-info/RECORD +356 -0
- aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- aethergraph/services/artifacts/factory.py +0 -35
- aethergraph/services/artifacts/fs_store.py +0 -656
- aethergraph/services/artifacts/jsonl_index.py +0 -123
- aethergraph/services/artifacts/sqlite_index.py +0 -209
- aethergraph/services/memory/distillers/episode.py +0 -116
- aethergraph/services/memory/distillers/rolling.py +0 -74
- aethergraph/services/memory/facade.py +0 -633
- aethergraph/services/memory/persist_fs.py +0 -40
- aethergraph/services/rag/index/base.py +0 -27
- aethergraph/services/rag/index/faiss_index.py +0 -121
- aethergraph/services/rag/index/sqlite_index.py +0 -134
- aethergraph-0.1.0a1.dist-info/RECORD +0 -182
- aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
from aethergraph.api.v1.deps import RequestIdentity
|
|
9
|
+
from aethergraph.contracts.errors.errors import GraphHasPendingWaits
|
|
10
|
+
from aethergraph.contracts.services.runs import RunStore
|
|
11
|
+
from aethergraph.core.execution.forward_scheduler import ForwardScheduler
|
|
12
|
+
from aethergraph.core.execution.global_scheduler import GlobalForwardScheduler
|
|
13
|
+
from aethergraph.core.runtime.run_types import (
|
|
14
|
+
RunImportance,
|
|
15
|
+
RunOrigin,
|
|
16
|
+
RunRecord,
|
|
17
|
+
RunStatus,
|
|
18
|
+
RunVisibility,
|
|
19
|
+
)
|
|
20
|
+
from aethergraph.core.runtime.runtime_metering import current_metering
|
|
21
|
+
from aethergraph.core.runtime.runtime_registry import current_registry
|
|
22
|
+
from aethergraph.core.runtime.runtime_services import current_services
|
|
23
|
+
from aethergraph.services.registry.unified_registry import UnifiedRegistry
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _utcnow() -> datetime:
|
|
27
|
+
return datetime.now(tz=timezone.utc)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _is_task_graph(obj: Any) -> bool:
|
|
31
|
+
# Replace with proper isinstance check in your codebase
|
|
32
|
+
return hasattr(obj, "spec") and hasattr(obj, "io_signature")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _is_graphfn(obj: Any) -> bool:
|
|
36
|
+
from aethergraph.core.graph.graph_fn import GraphFunction # adjust path
|
|
37
|
+
|
|
38
|
+
return isinstance(obj, GraphFunction)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RunManager:
|
|
42
|
+
"""
|
|
43
|
+
High-level coordinator for running graphs.
|
|
44
|
+
|
|
45
|
+
Responsibilities
|
|
46
|
+
----------------
|
|
47
|
+
- Resolve graph targets (TaskGraph / GraphFunction) from the UnifiedRegistry.
|
|
48
|
+
- Create and persist RunRecord metadata in the RunStore.
|
|
49
|
+
- Enforce a soft concurrency limit via an in-process run slot counter.
|
|
50
|
+
- Drive execution via run_or_resume_async and record status / errors.
|
|
51
|
+
- Emit metering events (duration, status, user/org, graph_id).
|
|
52
|
+
- Best-effort cancellation by talking to the scheduler registry.
|
|
53
|
+
|
|
54
|
+
Key entrypoints
|
|
55
|
+
---------------
|
|
56
|
+
submit_run(...)
|
|
57
|
+
Non-blocking API entrypoint (used by HTTP routes).
|
|
58
|
+
- Acquires a run slot (respecting max_concurrent_runs).
|
|
59
|
+
- Creates a RunRecord (status=running) and saves it.
|
|
60
|
+
- Schedules a background coroutine (_bg) that:
|
|
61
|
+
* Calls _run_and_finalize(...)
|
|
62
|
+
* Always releases the run slot in a finally block.
|
|
63
|
+
- Returns immediately with the RunRecord so the caller can poll status.
|
|
64
|
+
|
|
65
|
+
start_run(...)
|
|
66
|
+
Blocking helper (tests / CLI).
|
|
67
|
+
- Same setup as submit_run, but runs _run_and_finalize(...) inline.
|
|
68
|
+
- Returns (RunRecord, outputs, has_waits, continuations).
|
|
69
|
+
|
|
70
|
+
_run_and_finalize(...)
|
|
71
|
+
Shared core logic used by both submit_run and start_run.
|
|
72
|
+
- Calls run_or_resume_async(target, inputs, run_id, session_id).
|
|
73
|
+
- Maps successful results into a dict of outputs.
|
|
74
|
+
- Handles:
|
|
75
|
+
* Normal completion -> status = succeeded.
|
|
76
|
+
* GraphHasPendingWaits -> status = failed (for now), has_waits=True.
|
|
77
|
+
* asyncio.CancelledError -> status = canceled.
|
|
78
|
+
* Other exceptions -> status = failed, error message recorded.
|
|
79
|
+
- Updates RunStore status fields (finished_at, error).
|
|
80
|
+
- Sends a metering event with status / duration.
|
|
81
|
+
|
|
82
|
+
Concurrency model
|
|
83
|
+
-----------------
|
|
84
|
+
- _acquire_run_slot / _release_run_slot protect a _running counter with an
|
|
85
|
+
asyncio.Lock to enforce max_concurrent_runs within this process.
|
|
86
|
+
- submit_run takes ownership of a slot until responsibility is handed to
|
|
87
|
+
the background runner (_bg). Once _bg is scheduled, it is responsible
|
|
88
|
+
for releasing the slot in its finally block.
|
|
89
|
+
- If submit_run fails before the handoff, it releases the slot itself to
|
|
90
|
+
avoid leaks.
|
|
91
|
+
|
|
92
|
+
Cancellation
|
|
93
|
+
------------
|
|
94
|
+
cancel_run(run_id)
|
|
95
|
+
- Looks up the RunRecord (if available) and, if not terminal, marks it
|
|
96
|
+
as cancellation_requested in the RunStore.
|
|
97
|
+
- Uses the scheduler registry to find the scheduler for this run:
|
|
98
|
+
* GlobalForwardScheduler: terminate_run(run_id)
|
|
99
|
+
* ForwardScheduler: terminate()
|
|
100
|
+
- The actual transition to RunStatus.canceled happens when the
|
|
101
|
+
scheduler cancels the task and run_or_resume_async raises
|
|
102
|
+
asyncio.CancelledError, which _run_and_finalize() translates into
|
|
103
|
+
a canceled run.
|
|
104
|
+
|
|
105
|
+
TODO: for global schedulers, we may want to have a dedicated run manager -- current
|
|
106
|
+
implementation utilize the async_run which create a local ForwardScheduler instance
|
|
107
|
+
each graph run. This is fine for concurrent graphs under thousands but may
|
|
108
|
+
not scale well for large number of concurrent graphs.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(
|
|
112
|
+
self,
|
|
113
|
+
*,
|
|
114
|
+
run_store: RunStore | None = None,
|
|
115
|
+
registry: UnifiedRegistry | None = None,
|
|
116
|
+
sched_registry: Any | None = None, # placeholder for future use
|
|
117
|
+
max_concurrent_runs: int | None = None,
|
|
118
|
+
):
|
|
119
|
+
self._store = run_store
|
|
120
|
+
self._registry = registry
|
|
121
|
+
self._sched_registry = sched_registry
|
|
122
|
+
self._max_concurrent_runs = max_concurrent_runs
|
|
123
|
+
self._running = 0
|
|
124
|
+
self._lock = asyncio.Lock()
|
|
125
|
+
self._run_waiters: dict[str, asyncio.Future] = {}
|
|
126
|
+
self._run_waiters_lock = (
|
|
127
|
+
asyncio.Lock()
|
|
128
|
+
) # no need for thread lock because run_manager is used within event loop
|
|
129
|
+
|
|
130
|
+
# -------- concurrency helpers --------
|
|
131
|
+
async def _acquire_run_slot(self) -> None:
|
|
132
|
+
if self._max_concurrent_runs is None:
|
|
133
|
+
return
|
|
134
|
+
async with self._lock:
|
|
135
|
+
if self._running >= self._max_concurrent_runs:
|
|
136
|
+
from fastapi import HTTPException, status
|
|
137
|
+
|
|
138
|
+
raise HTTPException(
|
|
139
|
+
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
|
140
|
+
detail="Too many runs are currently executing. Please wait and try again.",
|
|
141
|
+
)
|
|
142
|
+
self._running += 1
|
|
143
|
+
|
|
144
|
+
async def _release_run_slot(self) -> None:
|
|
145
|
+
if self._max_concurrent_runs is None:
|
|
146
|
+
return
|
|
147
|
+
async with self._lock:
|
|
148
|
+
self._running = max(0, self._running - 1)
|
|
149
|
+
|
|
150
|
+
# -------- registry helpers --------
|
|
151
|
+
|
|
152
|
+
def registry(self) -> UnifiedRegistry:
|
|
153
|
+
return self._registry or current_registry()
|
|
154
|
+
|
|
155
|
+
async def _resolve_target(self, graph_id: str) -> Any:
|
|
156
|
+
reg = self.registry()
|
|
157
|
+
# Try static TaskGraph
|
|
158
|
+
try:
|
|
159
|
+
return reg.get_graph(name=graph_id, version=None)
|
|
160
|
+
except KeyError:
|
|
161
|
+
pass
|
|
162
|
+
# Try GraphFunction
|
|
163
|
+
try:
|
|
164
|
+
return reg.get_graphfn(name=graph_id, version=None)
|
|
165
|
+
except KeyError:
|
|
166
|
+
pass
|
|
167
|
+
raise KeyError(f"Graph '{graph_id}' not found")
|
|
168
|
+
|
|
169
|
+
# -------- core execution helper --------
|
|
170
|
+
|
|
171
|
+
async def _run_and_finalize(
|
|
172
|
+
self,
|
|
173
|
+
*,
|
|
174
|
+
record: RunRecord,
|
|
175
|
+
target: Any,
|
|
176
|
+
graph_id: str,
|
|
177
|
+
inputs: dict[str, Any],
|
|
178
|
+
identity: RequestIdentity,
|
|
179
|
+
# user_id: str | None,
|
|
180
|
+
# org_id: str | None,
|
|
181
|
+
) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
|
|
182
|
+
"""
|
|
183
|
+
Shared core logic that actually calls run_or_resume_async, updates
|
|
184
|
+
RunStore, and records metering.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
(record, outputs, has_waits, continuations)
|
|
188
|
+
"""
|
|
189
|
+
from aethergraph.core.runtime.graph_runner import run_or_resume_async
|
|
190
|
+
|
|
191
|
+
user_id = identity.user_id
|
|
192
|
+
org_id = identity.org_id
|
|
193
|
+
|
|
194
|
+
# tags = record.tags or []
|
|
195
|
+
started_at = record.started_at or _utcnow()
|
|
196
|
+
|
|
197
|
+
outputs: dict[str, Any] | None = None
|
|
198
|
+
has_waits = False
|
|
199
|
+
continuations: list[dict[str, Any]] = []
|
|
200
|
+
error_msg: str | None = None
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
result = await run_or_resume_async(
|
|
204
|
+
target,
|
|
205
|
+
inputs or {},
|
|
206
|
+
run_id=record.run_id,
|
|
207
|
+
session_id=record.meta.get("session_id"),
|
|
208
|
+
identity=identity,
|
|
209
|
+
agent_id=record.agent_id,
|
|
210
|
+
app_id=record.app_id,
|
|
211
|
+
)
|
|
212
|
+
# If we get here without GraphHasPendingWaits, run is completed
|
|
213
|
+
outputs = result if isinstance(result, dict) else {"result": result}
|
|
214
|
+
record.status = RunStatus.succeeded
|
|
215
|
+
record.finished_at = _utcnow()
|
|
216
|
+
|
|
217
|
+
except asyncio.CancelledError:
|
|
218
|
+
# Cancellation path: scheduler.terminate() or external cancel.
|
|
219
|
+
import logging
|
|
220
|
+
|
|
221
|
+
record.status = RunStatus.canceled
|
|
222
|
+
record.finished_at = _utcnow()
|
|
223
|
+
error_msg = "Run cancelled by user"
|
|
224
|
+
logging.getLogger("aethergraph.runtime.run_manager").info(
|
|
225
|
+
"Run %s was cancelled", record.run_id
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
except GraphHasPendingWaits as e:
|
|
229
|
+
# Graph quiesced with pending waits
|
|
230
|
+
record.status = RunStatus.failed # consider 'waiting' status later
|
|
231
|
+
has_waits = True
|
|
232
|
+
continuations = getattr(e, "continuations", [])
|
|
233
|
+
# outputs remain None
|
|
234
|
+
|
|
235
|
+
except Exception as exc: # noqa: BLE001
|
|
236
|
+
record.status = RunStatus.failed
|
|
237
|
+
record.finished_at = _utcnow()
|
|
238
|
+
error_msg = str(exc)
|
|
239
|
+
record.error = error_msg
|
|
240
|
+
import logging
|
|
241
|
+
|
|
242
|
+
logging.getLogger("aethergraph.runtime.run_manager").exception(
|
|
243
|
+
"Run %s failed with exception: %s", record.run_id, error_msg
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Persist status update
|
|
247
|
+
if self._store is not None:
|
|
248
|
+
await self._store.update_status(
|
|
249
|
+
record.run_id,
|
|
250
|
+
record.status,
|
|
251
|
+
finished_at=record.finished_at,
|
|
252
|
+
error=error_msg,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Metering
|
|
256
|
+
meter = current_metering()
|
|
257
|
+
finished_at = record.finished_at or _utcnow()
|
|
258
|
+
duration_s = (finished_at - started_at).total_seconds()
|
|
259
|
+
|
|
260
|
+
if has_waits:
|
|
261
|
+
meter_status = "waiting"
|
|
262
|
+
else:
|
|
263
|
+
status_str = getattr(record.status, "value", str(record.status))
|
|
264
|
+
meter_status = status_str
|
|
265
|
+
|
|
266
|
+
try:
|
|
267
|
+
await meter.record_run(
|
|
268
|
+
user_id=user_id,
|
|
269
|
+
org_id=org_id,
|
|
270
|
+
run_id=record.run_id,
|
|
271
|
+
graph_id=graph_id,
|
|
272
|
+
status=meter_status,
|
|
273
|
+
duration_s=duration_s,
|
|
274
|
+
)
|
|
275
|
+
except Exception: # noqa: BLE001
|
|
276
|
+
import logging
|
|
277
|
+
|
|
278
|
+
logging.getLogger("aethergraph.runtime.run_manager").exception(
|
|
279
|
+
"Error recording run metering for run_id=%s", record.run_id
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
if record.status in {RunStatus.succeeded, RunStatus.failed, RunStatus.canceled}:
|
|
284
|
+
await self._resolve_run_future(record.run_id, record)
|
|
285
|
+
except Exception: # noqa: BLE001
|
|
286
|
+
import logging
|
|
287
|
+
|
|
288
|
+
logging.getLogger("aethergraph.runtime.run_manager").exception(
|
|
289
|
+
"Error resolving run future for run_id=%s", record.run_id
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return record, outputs, has_waits, continuations
|
|
293
|
+
|
|
294
|
+
# -------- new: non-blocking submit_run --------
|
|
295
|
+
|
|
296
|
+
async def submit_run(
|
|
297
|
+
self,
|
|
298
|
+
graph_id: str,
|
|
299
|
+
*,
|
|
300
|
+
inputs: dict[str, Any],
|
|
301
|
+
run_id: str | None = None,
|
|
302
|
+
session_id: str | None = None,
|
|
303
|
+
tags: list[str] | None = None,
|
|
304
|
+
identity: RequestIdentity | None = None,
|
|
305
|
+
origin: RunOrigin | None = None,
|
|
306
|
+
visibility: RunVisibility | None = None,
|
|
307
|
+
importance: RunImportance | None = None,
|
|
308
|
+
agent_id: str | None = None,
|
|
309
|
+
app_id: str | None = None,
|
|
310
|
+
) -> RunRecord:
|
|
311
|
+
"""
|
|
312
|
+
Non-blocking entrypoint for the HTTP API.
|
|
313
|
+
|
|
314
|
+
- Creates a RunRecord (status=running).
|
|
315
|
+
- Persists it to RunStore.
|
|
316
|
+
- Schedules background execution via asyncio.create_task.
|
|
317
|
+
- Returns immediately with the record (for run_id, status, etc).
|
|
318
|
+
"""
|
|
319
|
+
if identity is None:
|
|
320
|
+
identity = RequestIdentity(user_id="local", org_id="local", mode="local")
|
|
321
|
+
|
|
322
|
+
user_id = identity.user_id
|
|
323
|
+
org_id = identity.org_id
|
|
324
|
+
|
|
325
|
+
# Acquire run slot (rate limiting)
|
|
326
|
+
await self._acquire_run_slot()
|
|
327
|
+
# Tracks whether responsibility for releasing the slot has been handed
|
|
328
|
+
# over to the background runner (_bg). If False, submit_run must
|
|
329
|
+
# release the slot on exception; if True, _bg will do it its finally.
|
|
330
|
+
slot_handed_to_bg = False
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
tags = tags or []
|
|
334
|
+
target = await self._resolve_target(graph_id)
|
|
335
|
+
rid = run_id or f"run-{uuid4().hex[:8]}"
|
|
336
|
+
started_at = _utcnow()
|
|
337
|
+
|
|
338
|
+
if _is_task_graph(target):
|
|
339
|
+
kind = "taskgraph"
|
|
340
|
+
elif _is_graphfn(target):
|
|
341
|
+
kind = "graphfn"
|
|
342
|
+
else:
|
|
343
|
+
kind = "other"
|
|
344
|
+
|
|
345
|
+
# pull flow_id and entrypoint from registry if possible
|
|
346
|
+
flow_id: str | None = None
|
|
347
|
+
reg = self.registry()
|
|
348
|
+
if reg is not None:
|
|
349
|
+
if kind == "taskgraph":
|
|
350
|
+
meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
|
|
351
|
+
elif kind == "graphfn":
|
|
352
|
+
meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
|
|
353
|
+
else:
|
|
354
|
+
meta = {}
|
|
355
|
+
flow_id = meta.get("flow_id") or graph_id
|
|
356
|
+
|
|
357
|
+
# use run_id as session_id if not provided
|
|
358
|
+
if session_id is None:
|
|
359
|
+
session_id = rid
|
|
360
|
+
|
|
361
|
+
record = RunRecord(
|
|
362
|
+
run_id=rid,
|
|
363
|
+
graph_id=graph_id,
|
|
364
|
+
kind=kind,
|
|
365
|
+
status=RunStatus.running, # we go straight to running as before
|
|
366
|
+
started_at=started_at,
|
|
367
|
+
tags=list(tags),
|
|
368
|
+
user_id=user_id,
|
|
369
|
+
org_id=org_id,
|
|
370
|
+
meta={},
|
|
371
|
+
session_id=session_id,
|
|
372
|
+
origin=origin or RunOrigin.app, # app is a typical default for graph runs
|
|
373
|
+
visibility=visibility or RunVisibility.normal,
|
|
374
|
+
importance=importance or RunImportance.normal,
|
|
375
|
+
agent_id=agent_id,
|
|
376
|
+
app_id=app_id,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
if flow_id:
|
|
380
|
+
record.meta["flow_id"] = flow_id
|
|
381
|
+
if f"flow:{flow_id}" not in record.tags:
|
|
382
|
+
record.tags.append(f"flow:{flow_id}") # add flow tag if missing
|
|
383
|
+
if session_id:
|
|
384
|
+
record.meta["session_id"] = session_id
|
|
385
|
+
if f"session:{session_id}" not in record.tags:
|
|
386
|
+
record.tags.append(f"session:{session_id}") # add session tag if missing
|
|
387
|
+
|
|
388
|
+
if self._store is not None:
|
|
389
|
+
await self._store.create(record)
|
|
390
|
+
|
|
391
|
+
async def _bg():
|
|
392
|
+
try:
|
|
393
|
+
await self._run_and_finalize(
|
|
394
|
+
record=record,
|
|
395
|
+
target=target,
|
|
396
|
+
graph_id=graph_id,
|
|
397
|
+
inputs=inputs,
|
|
398
|
+
# user_id=user_id,
|
|
399
|
+
# org_id=org_id,
|
|
400
|
+
identity=identity,
|
|
401
|
+
)
|
|
402
|
+
finally:
|
|
403
|
+
await self._release_run_slot()
|
|
404
|
+
|
|
405
|
+
# If we're in an event loop (server), schedule in the background.
|
|
406
|
+
# If not (CLI), just run inline so behaviour is still sane.
|
|
407
|
+
try:
|
|
408
|
+
loop = asyncio.get_running_loop()
|
|
409
|
+
except RuntimeError:
|
|
410
|
+
# Not inside a running loop – e.g., CLI usage.
|
|
411
|
+
slot_handed_to_bg = True
|
|
412
|
+
# _bg() is responsible for releasing the slot in its finally.
|
|
413
|
+
await _bg()
|
|
414
|
+
else:
|
|
415
|
+
slot_handed_to_bg = True
|
|
416
|
+
# Background tasks; _bg() will release the slot in its finally.
|
|
417
|
+
loop.create_task(_bg())
|
|
418
|
+
|
|
419
|
+
return record
|
|
420
|
+
except Exception:
|
|
421
|
+
# If submit_run itself fails *before* handing off to _bg, we must release the slot here.
|
|
422
|
+
# Once slot_handed_to_bg is True, _bg is responsible for releasing the slot.
|
|
423
|
+
if not slot_handed_to_bg:
|
|
424
|
+
await self._release_run_slot()
|
|
425
|
+
raise
|
|
426
|
+
|
|
427
|
+
async def run_and_wait(
|
|
428
|
+
self,
|
|
429
|
+
graph_id: str,
|
|
430
|
+
*,
|
|
431
|
+
inputs: dict[str, Any],
|
|
432
|
+
run_id: str | None = None,
|
|
433
|
+
session_id: str | None = None,
|
|
434
|
+
tags: list[str] | None = None,
|
|
435
|
+
identity: RequestIdentity | None = None,
|
|
436
|
+
origin: RunOrigin | None = None,
|
|
437
|
+
visibility: RunVisibility | None = None,
|
|
438
|
+
importance: RunImportance | None = None,
|
|
439
|
+
agent_id: str | None = None,
|
|
440
|
+
app_id: str | None = None,
|
|
441
|
+
count_slot: bool = False, # important for nested orchestration
|
|
442
|
+
) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
|
|
443
|
+
"""
|
|
444
|
+
Blocking run that still goes through RunStore so UI can visualize it.
|
|
445
|
+
|
|
446
|
+
- Creates + persists RunRecord (status=running)
|
|
447
|
+
- Runs inline (awaits completion)
|
|
448
|
+
- Updates RunStore status + metering (via _run_and_finalize)
|
|
449
|
+
- Returns (record, outputs, has_waits, continuations)
|
|
450
|
+
|
|
451
|
+
count_slot=False is recommended for "parent run awaiting child run" orchestration
|
|
452
|
+
to avoid deadlocks when max_concurrent_runs is small.
|
|
453
|
+
"""
|
|
454
|
+
if identity is None:
|
|
455
|
+
identity = RequestIdentity(user_id="local", org_id="local", mode="local")
|
|
456
|
+
|
|
457
|
+
if count_slot:
|
|
458
|
+
await self._acquire_run_slot()
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
tags = tags or []
|
|
462
|
+
target = await self._resolve_target(
|
|
463
|
+
graph_id
|
|
464
|
+
) # same resolver as submit_run :contentReference[oaicite:1]{index=1}
|
|
465
|
+
rid = run_id or f"run-{uuid4().hex[:8]}"
|
|
466
|
+
started_at = _utcnow()
|
|
467
|
+
|
|
468
|
+
if _is_task_graph(target):
|
|
469
|
+
kind = "taskgraph"
|
|
470
|
+
elif _is_graphfn(target):
|
|
471
|
+
kind = "graphfn"
|
|
472
|
+
else:
|
|
473
|
+
kind = "other"
|
|
474
|
+
|
|
475
|
+
# flow_id extraction same pattern as submit_run :contentReference[oaicite:2]{index=2}
|
|
476
|
+
flow_id: str | None = None
|
|
477
|
+
reg = self.registry()
|
|
478
|
+
if reg is not None:
|
|
479
|
+
if kind == "taskgraph":
|
|
480
|
+
meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
|
|
481
|
+
elif kind == "graphfn":
|
|
482
|
+
meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
|
|
483
|
+
else:
|
|
484
|
+
meta = {}
|
|
485
|
+
flow_id = meta.get("flow_id") or graph_id
|
|
486
|
+
|
|
487
|
+
if session_id is None:
|
|
488
|
+
session_id = rid
|
|
489
|
+
|
|
490
|
+
record = RunRecord(
|
|
491
|
+
run_id=rid,
|
|
492
|
+
graph_id=graph_id,
|
|
493
|
+
kind=kind,
|
|
494
|
+
status=RunStatus.running,
|
|
495
|
+
started_at=started_at,
|
|
496
|
+
tags=list(tags),
|
|
497
|
+
user_id=identity.user_id,
|
|
498
|
+
org_id=identity.org_id,
|
|
499
|
+
meta={},
|
|
500
|
+
session_id=session_id,
|
|
501
|
+
origin=origin or RunOrigin.app,
|
|
502
|
+
visibility=visibility or RunVisibility.normal,
|
|
503
|
+
importance=importance or RunImportance.normal,
|
|
504
|
+
agent_id=agent_id,
|
|
505
|
+
app_id=app_id,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
if flow_id:
|
|
509
|
+
record.meta["flow_id"] = flow_id
|
|
510
|
+
if f"flow:{flow_id}" not in record.tags:
|
|
511
|
+
record.tags.append(f"flow:{flow_id}")
|
|
512
|
+
if session_id:
|
|
513
|
+
record.meta["session_id"] = session_id
|
|
514
|
+
if f"session:{session_id}" not in record.tags:
|
|
515
|
+
record.tags.append(f"session:{session_id}")
|
|
516
|
+
|
|
517
|
+
if self._store is not None:
|
|
518
|
+
await self._store.create(record)
|
|
519
|
+
|
|
520
|
+
# Inline execution; still uses run_or_resume_async under the hood :contentReference[oaicite:3]{index=3}
|
|
521
|
+
return await self._run_and_finalize(
|
|
522
|
+
record=record,
|
|
523
|
+
target=target,
|
|
524
|
+
graph_id=graph_id,
|
|
525
|
+
inputs=inputs,
|
|
526
|
+
identity=identity,
|
|
527
|
+
)
|
|
528
|
+
finally:
|
|
529
|
+
if count_slot:
|
|
530
|
+
await self._release_run_slot()
|
|
531
|
+
|
|
532
|
+
# -------- old: blocking start_run (CLI/tests) --------
|
|
533
|
+
async def start_run(
|
|
534
|
+
self,
|
|
535
|
+
graph_id: str,
|
|
536
|
+
*,
|
|
537
|
+
inputs: dict[str, Any],
|
|
538
|
+
run_id: str | None = None,
|
|
539
|
+
session_id: str | None = None,
|
|
540
|
+
tags: list[str] | None = None,
|
|
541
|
+
identity: RequestIdentity | None = None,
|
|
542
|
+
agent_id: str | None = None,
|
|
543
|
+
app_id: str | None = None,
|
|
544
|
+
) -> tuple[RunRecord, dict[str, Any] | None, bool, list[dict[str, Any]]]:
|
|
545
|
+
"""
|
|
546
|
+
Blocking helper (original behaviour).
|
|
547
|
+
|
|
548
|
+
- Resolves target.
|
|
549
|
+
- Creates RunRecord with status=running.
|
|
550
|
+
- Runs once via run_or_resume_async.
|
|
551
|
+
- Updates store + metering.
|
|
552
|
+
- Returns (record, outputs, has_waits, continuations).
|
|
553
|
+
|
|
554
|
+
Still useful for tests/CLI, but the HTTP route should prefer submit_run().
|
|
555
|
+
|
|
556
|
+
NOTE:
|
|
557
|
+
agent_id and app_id will override any value pulled from original graphs. Use it
|
|
558
|
+
only when you want to explicitly set these fields for tracking purpose.
|
|
559
|
+
"""
|
|
560
|
+
if identity is None:
|
|
561
|
+
identity = RequestIdentity(user_id="local", org_id="local", mode="local")
|
|
562
|
+
|
|
563
|
+
tags = tags or []
|
|
564
|
+
target = await self._resolve_target(graph_id)
|
|
565
|
+
rid = run_id or f"run-{uuid4().hex[:8]}"
|
|
566
|
+
started_at = _utcnow()
|
|
567
|
+
|
|
568
|
+
if _is_task_graph(target):
|
|
569
|
+
kind = "taskgraph"
|
|
570
|
+
elif _is_graphfn(target):
|
|
571
|
+
kind = "graphfn"
|
|
572
|
+
else:
|
|
573
|
+
kind = "other"
|
|
574
|
+
|
|
575
|
+
# pull flow_id and entrypoint from registry if possible
|
|
576
|
+
flow_id: str | None = None
|
|
577
|
+
reg = self.registry()
|
|
578
|
+
if reg is not None:
|
|
579
|
+
if kind == "taskgraph":
|
|
580
|
+
meta = reg.get_meta(nspace="graph", name=graph_id, version=None) or {}
|
|
581
|
+
elif kind == "graphfn":
|
|
582
|
+
meta = reg.get_meta(nspace="graphfn", name=graph_id, version=None) or {}
|
|
583
|
+
else:
|
|
584
|
+
meta = {}
|
|
585
|
+
flow_id = meta.get("flow_id") or graph_id
|
|
586
|
+
|
|
587
|
+
# use run_id as session_id if not provided
|
|
588
|
+
if session_id is None:
|
|
589
|
+
session_id = rid
|
|
590
|
+
|
|
591
|
+
record = RunRecord(
|
|
592
|
+
run_id=rid,
|
|
593
|
+
graph_id=graph_id,
|
|
594
|
+
kind=kind,
|
|
595
|
+
status=RunStatus.running, # we go straight to running as before
|
|
596
|
+
started_at=started_at,
|
|
597
|
+
tags=list(tags),
|
|
598
|
+
user_id=identity.user_id,
|
|
599
|
+
org_id=identity.org_id,
|
|
600
|
+
meta={},
|
|
601
|
+
session_id=session_id,
|
|
602
|
+
origin=RunOrigin.app, # app is a typical default for graph runs
|
|
603
|
+
visibility=RunVisibility.normal,
|
|
604
|
+
importance=RunImportance.normal,
|
|
605
|
+
agent_id=agent_id,
|
|
606
|
+
app_id=app_id,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
if flow_id:
|
|
610
|
+
record.meta["flow_id"] = flow_id
|
|
611
|
+
if f"flow:{flow_id}" not in record.tags:
|
|
612
|
+
record.tags.append(f"flow:{flow_id}") # add flow tag if missing
|
|
613
|
+
if session_id:
|
|
614
|
+
record.meta["session_id"] = session_id
|
|
615
|
+
if f"session:{session_id}" not in record.tags:
|
|
616
|
+
record.tags.append(f"session:{session_id}") # add session tag if missing
|
|
617
|
+
|
|
618
|
+
if self._store is not None:
|
|
619
|
+
await self._store.create(record)
|
|
620
|
+
|
|
621
|
+
return await self._run_and_finalize(
|
|
622
|
+
record=record,
|
|
623
|
+
target=target,
|
|
624
|
+
graph_id=graph_id,
|
|
625
|
+
inputs=inputs,
|
|
626
|
+
identity=identity,
|
|
627
|
+
# agent_id=agent_id,
|
|
628
|
+
# app_id=app_id,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
async def get_record(self, run_id: str) -> RunRecord | None:
|
|
632
|
+
if self._store is None:
|
|
633
|
+
return None
|
|
634
|
+
out = await self._store.get(run_id)
|
|
635
|
+
return out
|
|
636
|
+
|
|
637
|
+
async def list_records(
|
|
638
|
+
self,
|
|
639
|
+
*,
|
|
640
|
+
graph_id: str | None = None,
|
|
641
|
+
status: RunStatus | None = None,
|
|
642
|
+
flow_id: str | None = None,
|
|
643
|
+
user_id: str | None = None,
|
|
644
|
+
org_id: str | None = None,
|
|
645
|
+
session_id: str | None = None,
|
|
646
|
+
limit: int = 100,
|
|
647
|
+
offset: int = 0,
|
|
648
|
+
) -> list[RunRecord]:
|
|
649
|
+
records = await self._store.list(
|
|
650
|
+
graph_id=graph_id,
|
|
651
|
+
status=status,
|
|
652
|
+
user_id=user_id,
|
|
653
|
+
org_id=org_id,
|
|
654
|
+
session_id=session_id,
|
|
655
|
+
limit=limit,
|
|
656
|
+
offset=offset,
|
|
657
|
+
)
|
|
658
|
+
# Optional: still filter flow_id in Python for now since it's in meta/tags
|
|
659
|
+
if flow_id is not None:
|
|
660
|
+
records = [rec for rec in records if (rec.meta or {}).get("flow_id") == flow_id]
|
|
661
|
+
|
|
662
|
+
return records
|
|
663
|
+
|
|
664
|
+
def _get_sched_registry(self):
|
|
665
|
+
if self._sched_registry is not None:
|
|
666
|
+
return self._sched_registry
|
|
667
|
+
try:
|
|
668
|
+
container = current_services()
|
|
669
|
+
except Exception:
|
|
670
|
+
return None
|
|
671
|
+
return getattr(container, "sched_registry", None)
|
|
672
|
+
|
|
673
|
+
async def cancel_run(self, run_id: str) -> RunRecord | None:
|
|
674
|
+
"""
|
|
675
|
+
Best-effort cancellation for a run.
|
|
676
|
+
|
|
677
|
+
Behaviour:
|
|
678
|
+
- If the run is found and not yet terminal:
|
|
679
|
+
- Mark status = cancellation_requested and persist.
|
|
680
|
+
- Look up scheduler in sched_registry and call terminate().
|
|
681
|
+
- If the run is already terminal, return it unchanged.
|
|
682
|
+
- If no record is found, we still try scheduler-level termination
|
|
683
|
+
(in case the run hasn't been persisted yet), then return None.
|
|
684
|
+
|
|
685
|
+
The actual transition to RunStatus.canceled happens inside
|
|
686
|
+
_run_and_finalize() when the scheduler raises asyncio.CancelledError.
|
|
687
|
+
"""
|
|
688
|
+
record: RunRecord | None = None
|
|
689
|
+
if self._store is not None:
|
|
690
|
+
record = await self._store.get(run_id)
|
|
691
|
+
|
|
692
|
+
# Helper: scheduler-level termination
|
|
693
|
+
async def _terminate_scheduler() -> None:
|
|
694
|
+
reg = self._get_sched_registry()
|
|
695
|
+
if reg is None:
|
|
696
|
+
return
|
|
697
|
+
sched = reg.get(run_id)
|
|
698
|
+
if sched is None:
|
|
699
|
+
return
|
|
700
|
+
|
|
701
|
+
try:
|
|
702
|
+
# if local scheduler -> terminate
|
|
703
|
+
# if global scheduler -> terminate_run(run_id)
|
|
704
|
+
if isinstance(sched, GlobalForwardScheduler):
|
|
705
|
+
await sched.terminate_run(run_id)
|
|
706
|
+
return
|
|
707
|
+
elif isinstance(sched, ForwardScheduler):
|
|
708
|
+
await sched.terminate()
|
|
709
|
+
return
|
|
710
|
+
except Exception: # noqa: BLE001
|
|
711
|
+
import logging
|
|
712
|
+
|
|
713
|
+
logging.getLogger("aethergraph.runtime.run_manager").exception(
|
|
714
|
+
"Error terminating scheduler for run_id=%s", run_id
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# No record in store – still try to terminate scheduler, then bail
|
|
718
|
+
if record is None:
|
|
719
|
+
await _terminate_scheduler()
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
# If already terminal, don't change status
|
|
723
|
+
if record.status in {
|
|
724
|
+
RunStatus.succeeded,
|
|
725
|
+
RunStatus.failed,
|
|
726
|
+
RunStatus.canceled,
|
|
727
|
+
}:
|
|
728
|
+
return record
|
|
729
|
+
|
|
730
|
+
# Mark cancellation requested so UI can react immediately
|
|
731
|
+
record.status = RunStatus.cancellation_requested
|
|
732
|
+
if self._store is not None:
|
|
733
|
+
await self._store.update_status(
|
|
734
|
+
run_id,
|
|
735
|
+
RunStatus.cancellation_requested,
|
|
736
|
+
finished_at=None,
|
|
737
|
+
error=None,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
# Ask the scheduler to stop
|
|
741
|
+
await _terminate_scheduler()
|
|
742
|
+
|
|
743
|
+
return record
|
|
744
|
+
|
|
745
|
+
# ------- run waiters for orchestration --------
|
|
746
|
+
async def wait_run(
|
|
747
|
+
self,
|
|
748
|
+
run_id: str,
|
|
749
|
+
*,
|
|
750
|
+
timeout_s: float | None = None,
|
|
751
|
+
) -> RunRecord:
|
|
752
|
+
# Fast path: already terminal in store
|
|
753
|
+
rec = await self.get_record(run_id)
|
|
754
|
+
if rec and rec.status in {RunStatus.succeeded, RunStatus.failed, RunStatus.canceled}:
|
|
755
|
+
return rec
|
|
756
|
+
|
|
757
|
+
fut = await self._get_or_create_run_future(run_id)
|
|
758
|
+
if timeout_s is not None:
|
|
759
|
+
return await asyncio.wait_for(fut, timeout=timeout_s)
|
|
760
|
+
return await fut
|
|
761
|
+
|
|
762
|
+
async def _get_or_create_run_future(self, run_id: str) -> asyncio.Future:
|
|
763
|
+
async with self._run_waiters_lock:
|
|
764
|
+
fut = self._run_waiters.get(run_id)
|
|
765
|
+
if fut is None or fut.done():
|
|
766
|
+
fut = asyncio.get_running_loop().create_future()
|
|
767
|
+
self._run_waiters[run_id] = fut
|
|
768
|
+
return fut
|
|
769
|
+
|
|
770
|
+
async def _resolve_run_future(self, run_id: str, value: Any) -> None:
|
|
771
|
+
async with self._run_waiters_lock:
|
|
772
|
+
fut = self._run_waiters.get(run_id)
|
|
773
|
+
if fut and not fut.done():
|
|
774
|
+
fut.set_result(value)
|
|
775
|
+
# optional cleanup
|
|
776
|
+
self._run_waiters.pop(run_id, None)
|
|
777
|
+
|
|
778
|
+
async def _reject_run_future(self, run_id: str, err: Exception) -> None:
|
|
779
|
+
async with self._run_waiters_lock:
|
|
780
|
+
fut = self._run_waiters.get(run_id)
|
|
781
|
+
if fut and not fut.done():
|
|
782
|
+
fut.set_exception(err)
|
|
783
|
+
self._run_waiters.pop(run_id, None)
|