codex-autorunner 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_autorunner/agents/opencode/client.py +113 -4
- codex_autorunner/agents/opencode/supervisor.py +4 -0
- codex_autorunner/agents/registry.py +17 -7
- codex_autorunner/bootstrap.py +219 -1
- codex_autorunner/core/__init__.py +17 -1
- codex_autorunner/core/about_car.py +124 -11
- codex_autorunner/core/app_server_threads.py +6 -0
- codex_autorunner/core/config.py +238 -3
- codex_autorunner/core/context_awareness.py +39 -0
- codex_autorunner/core/docs.py +0 -122
- codex_autorunner/core/filebox.py +265 -0
- codex_autorunner/core/flows/controller.py +71 -1
- codex_autorunner/core/flows/reconciler.py +4 -1
- codex_autorunner/core/flows/runtime.py +22 -0
- codex_autorunner/core/flows/store.py +61 -9
- codex_autorunner/core/flows/transition.py +23 -16
- codex_autorunner/core/flows/ux_helpers.py +18 -3
- codex_autorunner/core/flows/worker_process.py +32 -6
- codex_autorunner/core/hub.py +198 -41
- codex_autorunner/core/lifecycle_events.py +253 -0
- codex_autorunner/core/path_utils.py +2 -1
- codex_autorunner/core/pma_audit.py +224 -0
- codex_autorunner/core/pma_context.py +683 -0
- codex_autorunner/core/pma_dispatch_interceptor.py +284 -0
- codex_autorunner/core/pma_lifecycle.py +527 -0
- codex_autorunner/core/pma_queue.py +367 -0
- codex_autorunner/core/pma_safety.py +221 -0
- codex_autorunner/core/pma_state.py +115 -0
- codex_autorunner/core/ports/agent_backend.py +2 -5
- codex_autorunner/core/ports/run_event.py +1 -4
- codex_autorunner/core/prompt.py +0 -80
- codex_autorunner/core/prompts.py +56 -172
- codex_autorunner/core/redaction.py +0 -4
- codex_autorunner/core/review_context.py +11 -9
- codex_autorunner/core/runner_controller.py +35 -33
- codex_autorunner/core/runner_state.py +147 -0
- codex_autorunner/core/runtime.py +829 -0
- codex_autorunner/core/sqlite_utils.py +13 -4
- codex_autorunner/core/state.py +7 -10
- codex_autorunner/core/state_roots.py +5 -0
- codex_autorunner/core/templates/__init__.py +39 -0
- codex_autorunner/core/templates/git_mirror.py +234 -0
- codex_autorunner/core/templates/provenance.py +56 -0
- codex_autorunner/core/templates/scan_cache.py +120 -0
- codex_autorunner/core/ticket_linter_cli.py +17 -0
- codex_autorunner/core/ticket_manager_cli.py +154 -92
- codex_autorunner/core/time_utils.py +11 -0
- codex_autorunner/core/types.py +18 -0
- codex_autorunner/core/utils.py +34 -6
- codex_autorunner/flows/review/service.py +23 -25
- codex_autorunner/flows/ticket_flow/definition.py +43 -1
- codex_autorunner/integrations/agents/__init__.py +2 -0
- codex_autorunner/integrations/agents/backend_orchestrator.py +18 -0
- codex_autorunner/integrations/agents/codex_backend.py +19 -8
- codex_autorunner/integrations/agents/runner.py +3 -8
- codex_autorunner/integrations/agents/wiring.py +8 -0
- codex_autorunner/integrations/telegram/adapter.py +1 -1
- codex_autorunner/integrations/telegram/config.py +1 -1
- codex_autorunner/integrations/telegram/doctor.py +228 -6
- codex_autorunner/integrations/telegram/handlers/commands/execution.py +236 -74
- codex_autorunner/integrations/telegram/handlers/commands/files.py +314 -75
- codex_autorunner/integrations/telegram/handlers/commands/flows.py +346 -58
- codex_autorunner/integrations/telegram/handlers/commands/workspace.py +498 -37
- codex_autorunner/integrations/telegram/handlers/commands_runtime.py +202 -45
- codex_autorunner/integrations/telegram/handlers/commands_spec.py +18 -7
- codex_autorunner/integrations/telegram/handlers/messages.py +34 -3
- codex_autorunner/integrations/telegram/helpers.py +1 -3
- codex_autorunner/integrations/telegram/runtime.py +9 -4
- codex_autorunner/integrations/telegram/service.py +30 -0
- codex_autorunner/integrations/telegram/state.py +38 -0
- codex_autorunner/integrations/telegram/ticket_flow_bridge.py +10 -4
- codex_autorunner/integrations/telegram/transport.py +10 -3
- codex_autorunner/integrations/templates/__init__.py +27 -0
- codex_autorunner/integrations/templates/scan_agent.py +312 -0
- codex_autorunner/server.py +2 -2
- codex_autorunner/static/agentControls.js +21 -5
- codex_autorunner/static/app.js +115 -11
- codex_autorunner/static/archive.js +274 -81
- codex_autorunner/static/archiveApi.js +21 -0
- codex_autorunner/static/chatUploads.js +137 -0
- codex_autorunner/static/constants.js +1 -1
- codex_autorunner/static/docChatCore.js +185 -13
- codex_autorunner/static/fileChat.js +68 -40
- codex_autorunner/static/fileboxUi.js +159 -0
- codex_autorunner/static/hub.js +46 -81
- codex_autorunner/static/index.html +303 -24
- codex_autorunner/static/messages.js +82 -4
- codex_autorunner/static/notifications.js +288 -0
- codex_autorunner/static/pma.js +1167 -0
- codex_autorunner/static/settings.js +3 -0
- codex_autorunner/static/streamUtils.js +57 -0
- codex_autorunner/static/styles.css +9141 -6742
- codex_autorunner/static/templateReposSettings.js +225 -0
- codex_autorunner/static/terminalManager.js +22 -3
- codex_autorunner/static/ticketChatActions.js +165 -3
- codex_autorunner/static/ticketChatStream.js +17 -119
- codex_autorunner/static/ticketEditor.js +41 -13
- codex_autorunner/static/ticketTemplates.js +798 -0
- codex_autorunner/static/tickets.js +69 -19
- codex_autorunner/static/turnEvents.js +27 -0
- codex_autorunner/static/turnResume.js +33 -0
- codex_autorunner/static/utils.js +28 -0
- codex_autorunner/static/workspace.js +258 -44
- codex_autorunner/static/workspaceFileBrowser.js +6 -4
- codex_autorunner/surfaces/cli/cli.py +1465 -155
- codex_autorunner/surfaces/cli/pma_cli.py +817 -0
- codex_autorunner/surfaces/web/app.py +253 -49
- codex_autorunner/surfaces/web/routes/__init__.py +4 -0
- codex_autorunner/surfaces/web/routes/analytics.py +29 -22
- codex_autorunner/surfaces/web/routes/archive.py +197 -0
- codex_autorunner/surfaces/web/routes/file_chat.py +297 -36
- codex_autorunner/surfaces/web/routes/filebox.py +227 -0
- codex_autorunner/surfaces/web/routes/flows.py +219 -29
- codex_autorunner/surfaces/web/routes/messages.py +70 -39
- codex_autorunner/surfaces/web/routes/pma.py +1652 -0
- codex_autorunner/surfaces/web/routes/repos.py +1 -1
- codex_autorunner/surfaces/web/routes/shared.py +0 -3
- codex_autorunner/surfaces/web/routes/templates.py +634 -0
- codex_autorunner/surfaces/web/runner_manager.py +2 -2
- codex_autorunner/surfaces/web/schemas.py +81 -18
- codex_autorunner/tickets/agent_pool.py +27 -0
- codex_autorunner/tickets/files.py +33 -16
- codex_autorunner/tickets/lint.py +50 -0
- codex_autorunner/tickets/models.py +3 -0
- codex_autorunner/tickets/outbox.py +41 -5
- codex_autorunner/tickets/runner.py +350 -69
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/METADATA +15 -19
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/RECORD +132 -101
- codex_autorunner/core/adapter_utils.py +0 -21
- codex_autorunner/core/engine.py +0 -3302
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/WHEEL +0 -0
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/entry_points.txt +0 -0
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -45,22 +45,7 @@ def resolve_flow_transition(
|
|
|
45
45
|
inner_status = engine.get("status")
|
|
46
46
|
reason_code = engine.get("reason_code")
|
|
47
47
|
|
|
48
|
-
# 1)
|
|
49
|
-
if (
|
|
50
|
-
record.status in (FlowRunStatus.RUNNING, FlowRunStatus.STOPPING)
|
|
51
|
-
and not health.is_alive
|
|
52
|
-
):
|
|
53
|
-
new_status = (
|
|
54
|
-
FlowRunStatus.STOPPED
|
|
55
|
-
if record.status == FlowRunStatus.STOPPING
|
|
56
|
-
else FlowRunStatus.FAILED
|
|
57
|
-
)
|
|
58
|
-
state = ensure_reason_summary(state, status=new_status, default="Worker died")
|
|
59
|
-
return TransitionDecision(
|
|
60
|
-
status=new_status, finished_at=now, state=state, note="worker-dead"
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# 2) Inner engine reconciliation (worker is alive or not required).
|
|
48
|
+
# 1) Inner engine completion takes priority over worker liveness for active flows.
|
|
64
49
|
if record.status == FlowRunStatus.RUNNING:
|
|
65
50
|
if inner_status == "paused":
|
|
66
51
|
state = ensure_reason_summary(state, status=FlowRunStatus.PAUSED)
|
|
@@ -79,10 +64,32 @@ def resolve_flow_transition(
|
|
|
79
64
|
note="engine-completed",
|
|
80
65
|
)
|
|
81
66
|
|
|
67
|
+
# 2) Worker liveness overrides for active flows (only if engine not completed).
|
|
68
|
+
if not health.is_alive:
|
|
69
|
+
new_status = FlowRunStatus.FAILED
|
|
70
|
+
state = ensure_reason_summary(
|
|
71
|
+
state, status=new_status, default="Worker died"
|
|
72
|
+
)
|
|
73
|
+
return TransitionDecision(
|
|
74
|
+
status=new_status, finished_at=now, state=state, note="worker-dead"
|
|
75
|
+
)
|
|
76
|
+
|
|
82
77
|
return TransitionDecision(
|
|
83
78
|
status=FlowRunStatus.RUNNING, finished_at=None, state=state, note="running"
|
|
84
79
|
)
|
|
85
80
|
|
|
81
|
+
# Handle STOPPING case separately - worker liveness check still applies.
|
|
82
|
+
if record.status == FlowRunStatus.STOPPING and not health.is_alive:
|
|
83
|
+
state = ensure_reason_summary(
|
|
84
|
+
state, status=FlowRunStatus.STOPPED, default="Worker stopped"
|
|
85
|
+
)
|
|
86
|
+
return TransitionDecision(
|
|
87
|
+
status=FlowRunStatus.STOPPED,
|
|
88
|
+
finished_at=now,
|
|
89
|
+
state=state,
|
|
90
|
+
note="worker-dead",
|
|
91
|
+
)
|
|
92
|
+
|
|
86
93
|
if record.status == FlowRunStatus.PAUSED:
|
|
87
94
|
if inner_status == "completed":
|
|
88
95
|
return TransitionDecision(
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any, Callable, Optional, Protocol
|
|
6
6
|
|
|
7
|
-
from ...tickets.files import list_ticket_paths
|
|
7
|
+
from ...tickets.files import list_ticket_paths, ticket_is_done
|
|
8
8
|
from .models import FlowEventType, FlowRunRecord
|
|
9
9
|
from .store import FlowStore
|
|
10
10
|
from .worker_process import (
|
|
@@ -58,6 +58,18 @@ def _ticket_dir(repo_root: Path) -> Path:
|
|
|
58
58
|
return repo_root.resolve() / ".codex-autorunner" / "tickets"
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
def ticket_progress(repo_root: Path) -> dict[str, int]:
|
|
62
|
+
ticket_dir = _ticket_dir(repo_root)
|
|
63
|
+
ticket_paths = list_ticket_paths(ticket_dir)
|
|
64
|
+
total = len(ticket_paths)
|
|
65
|
+
done = 0
|
|
66
|
+
if total:
|
|
67
|
+
for path in ticket_paths:
|
|
68
|
+
if ticket_is_done(path):
|
|
69
|
+
done += 1
|
|
70
|
+
return {"done": done, "total": total}
|
|
71
|
+
|
|
72
|
+
|
|
61
73
|
def bootstrap_check(
|
|
62
74
|
repo_root: Path,
|
|
63
75
|
github_service_factory: Optional[Callable[[Path], GitHubServiceProtocol]] = None,
|
|
@@ -219,13 +231,15 @@ def build_flow_status_snapshot(
|
|
|
219
231
|
"last_event_at": last_event_at,
|
|
220
232
|
"worker_health": health,
|
|
221
233
|
"effective_current_ticket": effective_ticket,
|
|
234
|
+
"ticket_progress": ticket_progress(repo_root),
|
|
222
235
|
"state": updated_state,
|
|
223
236
|
}
|
|
224
237
|
|
|
225
238
|
|
|
226
|
-
def ensure_worker(repo_root: Path, run_id: str) -> dict:
|
|
239
|
+
def ensure_worker(repo_root: Path, run_id: str, is_terminal: bool = False) -> dict:
|
|
227
240
|
health = check_worker_health(repo_root, run_id)
|
|
228
|
-
|
|
241
|
+
# Only clear metadata for dead/mismatch/invalid workers if not terminal
|
|
242
|
+
if not is_terminal and health.status in {"dead", "mismatch", "invalid"}:
|
|
229
243
|
try:
|
|
230
244
|
clear_worker_metadata(health.artifact_path.parent)
|
|
231
245
|
except Exception:
|
|
@@ -252,6 +266,7 @@ __all__ = [
|
|
|
252
266
|
"format_issue_as_markdown",
|
|
253
267
|
"issue_md_has_content",
|
|
254
268
|
"issue_md_path",
|
|
269
|
+
"ticket_progress",
|
|
255
270
|
"seed_issue_from_github",
|
|
256
271
|
"seed_issue_from_text",
|
|
257
272
|
]
|
|
@@ -152,7 +152,8 @@ def check_worker_health(
|
|
|
152
152
|
try:
|
|
153
153
|
data = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
154
154
|
pid = int(data.get("pid")) if data.get("pid") is not None else None
|
|
155
|
-
|
|
155
|
+
raw_cmd = data.get("cmd") or []
|
|
156
|
+
cmd = [str(part) for part in raw_cmd] if isinstance(raw_cmd, list) else []
|
|
156
157
|
except Exception:
|
|
157
158
|
return FlowWorkerHealth(
|
|
158
159
|
status="invalid",
|
|
@@ -166,7 +167,7 @@ def check_worker_health(
|
|
|
166
167
|
return FlowWorkerHealth(
|
|
167
168
|
status="invalid",
|
|
168
169
|
pid=pid,
|
|
169
|
-
cmdline=cmd
|
|
170
|
+
cmdline=cmd,
|
|
170
171
|
artifact_path=metadata_path,
|
|
171
172
|
message="missing or invalid PID",
|
|
172
173
|
)
|
|
@@ -175,19 +176,19 @@ def check_worker_health(
|
|
|
175
176
|
return FlowWorkerHealth(
|
|
176
177
|
status="dead",
|
|
177
178
|
pid=pid,
|
|
178
|
-
cmdline=cmd
|
|
179
|
+
cmdline=cmd,
|
|
179
180
|
artifact_path=metadata_path,
|
|
180
181
|
message="worker PID not running",
|
|
181
182
|
)
|
|
182
183
|
|
|
183
|
-
expected_cmd = _build_worker_cmd(entrypoint, run_id)
|
|
184
|
+
expected_cmd = cmd or _build_worker_cmd(entrypoint, run_id)
|
|
184
185
|
actual_cmd = _read_process_cmdline(pid)
|
|
185
186
|
if actual_cmd is None:
|
|
186
187
|
# Can't inspect cmdline; trust the PID check.
|
|
187
188
|
return FlowWorkerHealth(
|
|
188
189
|
status="alive",
|
|
189
190
|
pid=pid,
|
|
190
|
-
cmdline=cmd
|
|
191
|
+
cmdline=cmd,
|
|
191
192
|
artifact_path=metadata_path,
|
|
192
193
|
message="worker running (cmdline unknown)",
|
|
193
194
|
)
|
|
@@ -198,7 +199,7 @@ def check_worker_health(
|
|
|
198
199
|
pid=pid,
|
|
199
200
|
cmdline=actual_cmd,
|
|
200
201
|
artifact_path=metadata_path,
|
|
201
|
-
message="worker PID command does not match
|
|
202
|
+
message="worker PID command does not match stored metadata",
|
|
202
203
|
)
|
|
203
204
|
|
|
204
205
|
return FlowWorkerHealth(
|
|
@@ -210,6 +211,31 @@ def check_worker_health(
|
|
|
210
211
|
)
|
|
211
212
|
|
|
212
213
|
|
|
214
|
+
def register_worker_metadata(
|
|
215
|
+
repo_root: Path,
|
|
216
|
+
run_id: str,
|
|
217
|
+
*,
|
|
218
|
+
artifacts_root: Optional[Path] = None,
|
|
219
|
+
pid: Optional[int] = None,
|
|
220
|
+
cmd: Optional[list[str]] = None,
|
|
221
|
+
entrypoint: str = "codex_autorunner",
|
|
222
|
+
) -> Path:
|
|
223
|
+
normalized_run_id = _normalized_run_id(run_id)
|
|
224
|
+
artifacts_dir = _worker_artifacts_dir(repo_root, normalized_run_id, artifacts_root)
|
|
225
|
+
|
|
226
|
+
resolved_pid = pid or os.getpid()
|
|
227
|
+
resolved_cmd = cmd or _read_process_cmdline(resolved_pid)
|
|
228
|
+
if not resolved_cmd:
|
|
229
|
+
resolved_cmd = _build_worker_cmd(entrypoint, normalized_run_id)
|
|
230
|
+
|
|
231
|
+
_write_worker_metadata(
|
|
232
|
+
_worker_metadata_path(artifacts_dir),
|
|
233
|
+
resolved_pid,
|
|
234
|
+
resolved_cmd,
|
|
235
|
+
)
|
|
236
|
+
return artifacts_dir
|
|
237
|
+
|
|
238
|
+
|
|
213
239
|
def spawn_flow_worker(
|
|
214
240
|
repo_root: Path,
|
|
215
241
|
run_id: str,
|
codex_autorunner/core/hub.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import dataclasses
|
|
2
3
|
import enum
|
|
3
4
|
import logging
|
|
4
5
|
import re
|
|
5
6
|
import shutil
|
|
7
|
+
import threading
|
|
6
8
|
import time
|
|
7
9
|
from pathlib import Path
|
|
8
|
-
from typing import Callable, Dict, List, Optional, Tuple
|
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
9
11
|
|
|
10
12
|
from ..bootstrap import seed_repo_files
|
|
11
13
|
from ..discovery import DiscoveryRecord, discover_and_init
|
|
@@ -18,7 +20,6 @@ from ..manifest import (
|
|
|
18
20
|
)
|
|
19
21
|
from .archive import archive_worktree_snapshot, build_snapshot_id
|
|
20
22
|
from .config import HubConfig, RepoConfig, derive_repo_config, load_hub_config
|
|
21
|
-
from .engine import AppServerSupervisorFactory, BackendFactory, Engine
|
|
22
23
|
from .git_utils import (
|
|
23
24
|
GitError,
|
|
24
25
|
git_available,
|
|
@@ -29,15 +30,22 @@ from .git_utils import (
|
|
|
29
30
|
git_upstream_status,
|
|
30
31
|
run_git,
|
|
31
32
|
)
|
|
33
|
+
from .lifecycle_events import LifecycleEvent, LifecycleEventEmitter, LifecycleEventStore
|
|
32
34
|
from .locks import DEFAULT_RUNNER_CMD_HINTS, assess_lock, process_alive
|
|
35
|
+
from .ports.backend_orchestrator import (
|
|
36
|
+
BackendOrchestrator as BackendOrchestratorProtocol,
|
|
37
|
+
)
|
|
33
38
|
from .runner_controller import ProcessRunnerController, SpawnRunnerFn
|
|
39
|
+
from .runtime import RuntimeContext
|
|
34
40
|
from .state import RunnerState, load_state, now_iso
|
|
41
|
+
from .types import AppServerSupervisorFactory, BackendFactory
|
|
35
42
|
from .utils import atomic_write
|
|
36
43
|
|
|
37
44
|
logger = logging.getLogger("codex_autorunner.hub")
|
|
38
45
|
|
|
39
46
|
BackendFactoryBuilder = Callable[[Path, RepoConfig], BackendFactory]
|
|
40
47
|
AppServerSupervisorFactoryBuilder = Callable[[RepoConfig], AppServerSupervisorFactory]
|
|
48
|
+
BackendOrchestratorBuilder = Callable[[Path, RepoConfig], BackendOrchestratorProtocol]
|
|
41
49
|
|
|
42
50
|
|
|
43
51
|
def _git_failure_detail(proc) -> str:
|
|
@@ -205,27 +213,25 @@ class RepoRunner:
|
|
|
205
213
|
app_server_supervisor_factory_builder: Optional[
|
|
206
214
|
AppServerSupervisorFactoryBuilder
|
|
207
215
|
] = None,
|
|
216
|
+
backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
|
|
208
217
|
agent_id_validator: Optional[Callable[[str], str]] = None,
|
|
209
218
|
):
|
|
210
219
|
self.repo_id = repo_id
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
if
|
|
214
|
-
else None
|
|
215
|
-
)
|
|
216
|
-
app_server_supervisor_factory = (
|
|
217
|
-
app_server_supervisor_factory_builder(repo_config)
|
|
218
|
-
if app_server_supervisor_factory_builder is not None
|
|
220
|
+
backend_orchestrator = (
|
|
221
|
+
backend_orchestrator_builder(repo_root, repo_config)
|
|
222
|
+
if backend_orchestrator_builder is not None
|
|
219
223
|
else None
|
|
220
224
|
)
|
|
221
|
-
|
|
222
|
-
|
|
225
|
+
if backend_orchestrator is None:
|
|
226
|
+
raise ValueError(
|
|
227
|
+
"backend_orchestrator_builder is required for HubSupervisor"
|
|
228
|
+
)
|
|
229
|
+
self._ctx = RuntimeContext(
|
|
230
|
+
repo_root=repo_root,
|
|
223
231
|
config=repo_config,
|
|
224
|
-
|
|
225
|
-
app_server_supervisor_factory=app_server_supervisor_factory,
|
|
226
|
-
agent_id_validator=agent_id_validator,
|
|
232
|
+
backend_orchestrator=backend_orchestrator,
|
|
227
233
|
)
|
|
228
|
-
self._controller = ProcessRunnerController(self.
|
|
234
|
+
self._controller = ProcessRunnerController(self._ctx, spawn_fn=spawn_fn)
|
|
229
235
|
|
|
230
236
|
@property
|
|
231
237
|
def running(self) -> bool:
|
|
@@ -254,6 +260,7 @@ class HubSupervisor:
|
|
|
254
260
|
app_server_supervisor_factory_builder: Optional[
|
|
255
261
|
AppServerSupervisorFactoryBuilder
|
|
256
262
|
] = None,
|
|
263
|
+
backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
|
|
257
264
|
agent_id_validator: Optional[Callable[[str], str]] = None,
|
|
258
265
|
):
|
|
259
266
|
self.hub_config = hub_config
|
|
@@ -264,11 +271,22 @@ class HubSupervisor:
|
|
|
264
271
|
self._app_server_supervisor_factory_builder = (
|
|
265
272
|
app_server_supervisor_factory_builder
|
|
266
273
|
)
|
|
274
|
+
self._backend_orchestrator_builder = backend_orchestrator_builder
|
|
267
275
|
self._agent_id_validator = agent_id_validator
|
|
268
276
|
self.state = load_hub_state(self.state_path, self.hub_config.root)
|
|
269
277
|
self._list_cache_at: Optional[float] = None
|
|
270
278
|
self._list_cache: Optional[List[RepoSnapshot]] = None
|
|
279
|
+
self._list_lock = threading.Lock()
|
|
280
|
+
self._lifecycle_emitter = LifecycleEventEmitter(hub_config.root)
|
|
281
|
+
self._lifecycle_task_lock = threading.Lock()
|
|
282
|
+
self._lifecycle_stop_event = threading.Event()
|
|
283
|
+
self._lifecycle_thread: Optional[threading.Thread] = None
|
|
284
|
+
self._dispatch_interceptor_task: Optional[asyncio.Task] = None
|
|
285
|
+
self._dispatch_interceptor_stop_event: Optional[threading.Event] = None
|
|
286
|
+
self._dispatch_interceptor_thread: Optional[threading.Thread] = None
|
|
271
287
|
self._reconcile_startup()
|
|
288
|
+
self._start_lifecycle_event_processor()
|
|
289
|
+
self._start_dispatch_interceptor()
|
|
272
290
|
|
|
273
291
|
@classmethod
|
|
274
292
|
def from_path(
|
|
@@ -279,12 +297,14 @@ class HubSupervisor:
|
|
|
279
297
|
app_server_supervisor_factory_builder: Optional[
|
|
280
298
|
AppServerSupervisorFactoryBuilder
|
|
281
299
|
] = None,
|
|
300
|
+
backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
|
|
282
301
|
) -> "HubSupervisor":
|
|
283
302
|
config = load_hub_config(path)
|
|
284
303
|
return cls(
|
|
285
304
|
config,
|
|
286
305
|
backend_factory_builder=backend_factory_builder,
|
|
287
306
|
app_server_supervisor_factory_builder=app_server_supervisor_factory_builder,
|
|
307
|
+
backend_orchestrator_builder=backend_orchestrator_builder,
|
|
288
308
|
)
|
|
289
309
|
|
|
290
310
|
def scan(self) -> List[RepoSnapshot]:
|
|
@@ -296,16 +316,17 @@ class HubSupervisor:
|
|
|
296
316
|
return snapshots
|
|
297
317
|
|
|
298
318
|
def list_repos(self, *, use_cache: bool = True) -> List[RepoSnapshot]:
|
|
299
|
-
|
|
300
|
-
if
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
319
|
+
with self._list_lock:
|
|
320
|
+
if use_cache and self._list_cache and self._list_cache_at is not None:
|
|
321
|
+
if time.monotonic() - self._list_cache_at < 2.0:
|
|
322
|
+
return self._list_cache
|
|
323
|
+
manifest, records = self._manifest_records(manifest_only=True)
|
|
324
|
+
snapshots = self._build_snapshots(records)
|
|
325
|
+
self.state = HubState(last_scan_at=self.state.last_scan_at, repos=snapshots)
|
|
326
|
+
save_hub_state(self.state_path, self.state, self.hub_config.root)
|
|
327
|
+
self._list_cache = snapshots
|
|
328
|
+
self._list_cache_at = time.monotonic()
|
|
329
|
+
return snapshots
|
|
309
330
|
|
|
310
331
|
def _reconcile_startup(self) -> None:
|
|
311
332
|
try:
|
|
@@ -320,23 +341,18 @@ class HubSupervisor:
|
|
|
320
341
|
repo_config = derive_repo_config(
|
|
321
342
|
self.hub_config, record.absolute_path, load_env=False
|
|
322
343
|
)
|
|
323
|
-
|
|
324
|
-
self.
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
app_server_supervisor_factory = (
|
|
329
|
-
self._app_server_supervisor_factory_builder(repo_config)
|
|
330
|
-
if self._app_server_supervisor_factory_builder is not None
|
|
344
|
+
backend_orchestrator = (
|
|
345
|
+
self._backend_orchestrator_builder(
|
|
346
|
+
record.absolute_path, repo_config
|
|
347
|
+
)
|
|
348
|
+
if self._backend_orchestrator_builder is not None
|
|
331
349
|
else None
|
|
332
350
|
)
|
|
333
351
|
controller = ProcessRunnerController(
|
|
334
|
-
|
|
335
|
-
record.absolute_path,
|
|
352
|
+
RuntimeContext(
|
|
353
|
+
repo_root=record.absolute_path,
|
|
336
354
|
config=repo_config,
|
|
337
|
-
|
|
338
|
-
app_server_supervisor_factory=app_server_supervisor_factory,
|
|
339
|
-
agent_id_validator=self._agent_id_validator,
|
|
355
|
+
backend_orchestrator=backend_orchestrator,
|
|
340
356
|
)
|
|
341
357
|
)
|
|
342
358
|
controller.reconcile()
|
|
@@ -890,6 +906,7 @@ class HubSupervisor:
|
|
|
890
906
|
app_server_supervisor_factory_builder=(
|
|
891
907
|
self._app_server_supervisor_factory_builder
|
|
892
908
|
),
|
|
909
|
+
backend_orchestrator_builder=self._backend_orchestrator_builder,
|
|
893
910
|
agent_id_validator=self._agent_id_validator,
|
|
894
911
|
)
|
|
895
912
|
self._runners[repo_id] = runner
|
|
@@ -933,8 +950,148 @@ class HubSupervisor:
|
|
|
933
950
|
return snapshot
|
|
934
951
|
|
|
935
952
|
def _invalidate_list_cache(self) -> None:
|
|
936
|
-
self.
|
|
937
|
-
|
|
953
|
+
with self._list_lock:
|
|
954
|
+
self._list_cache = None
|
|
955
|
+
self._list_cache_at = None
|
|
956
|
+
|
|
957
|
+
@property
|
|
958
|
+
def lifecycle_emitter(self) -> LifecycleEventEmitter:
|
|
959
|
+
return self._lifecycle_emitter
|
|
960
|
+
|
|
961
|
+
@property
|
|
962
|
+
def lifecycle_store(self) -> LifecycleEventStore:
|
|
963
|
+
return self._lifecycle_emitter._store
|
|
964
|
+
|
|
965
|
+
def trigger_pma_from_lifecycle_event(self, event: LifecycleEvent) -> None:
|
|
966
|
+
if event.processed:
|
|
967
|
+
return
|
|
968
|
+
event_id = event.event_id
|
|
969
|
+
if event_id is None:
|
|
970
|
+
return
|
|
971
|
+
self.lifecycle_store.mark_processed(event_id)
|
|
972
|
+
self.lifecycle_store.prune_processed(keep_last=50)
|
|
973
|
+
logger.info(
|
|
974
|
+
"PMA wakeup triggered by lifecycle event: type=%s repo_id=%s run_id=%s",
|
|
975
|
+
event.event_type.value,
|
|
976
|
+
event.repo_id,
|
|
977
|
+
event.run_id,
|
|
978
|
+
)
|
|
979
|
+
|
|
980
|
+
def process_lifecycle_events(self) -> None:
|
|
981
|
+
events = self.lifecycle_store.get_unprocessed(limit=100)
|
|
982
|
+
if not events:
|
|
983
|
+
return
|
|
984
|
+
for event in events:
|
|
985
|
+
try:
|
|
986
|
+
self.trigger_pma_from_lifecycle_event(event)
|
|
987
|
+
except Exception as exc:
|
|
988
|
+
logger.exception(
|
|
989
|
+
"Failed to process lifecycle event %s: %s", event.event_id, exc
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
def _start_lifecycle_event_processor(self) -> None:
|
|
993
|
+
if self._lifecycle_thread is not None:
|
|
994
|
+
return
|
|
995
|
+
|
|
996
|
+
def _process_loop():
|
|
997
|
+
while not self._lifecycle_stop_event.wait(5.0):
|
|
998
|
+
try:
|
|
999
|
+
self.process_lifecycle_events()
|
|
1000
|
+
except Exception:
|
|
1001
|
+
logger.exception("Error in lifecycle event processor")
|
|
1002
|
+
|
|
1003
|
+
self._lifecycle_thread = threading.Thread(
|
|
1004
|
+
target=_process_loop, daemon=True, name="lifecycle-event-processor"
|
|
1005
|
+
)
|
|
1006
|
+
self._lifecycle_thread.start()
|
|
1007
|
+
|
|
1008
|
+
def _stop_lifecycle_event_processor(self) -> None:
|
|
1009
|
+
if self._lifecycle_thread is None:
|
|
1010
|
+
return
|
|
1011
|
+
self._lifecycle_stop_event.set()
|
|
1012
|
+
self._lifecycle_thread.join(timeout=2.0)
|
|
1013
|
+
self._lifecycle_thread = None
|
|
1014
|
+
|
|
1015
|
+
def shutdown(self) -> None:
|
|
1016
|
+
self._stop_lifecycle_event_processor()
|
|
1017
|
+
self._stop_dispatch_interceptor()
|
|
1018
|
+
|
|
1019
|
+
def _start_dispatch_interceptor(self) -> None:
|
|
1020
|
+
if not self.hub_config.pma.enabled:
|
|
1021
|
+
return
|
|
1022
|
+
if not self.hub_config.pma.dispatch_interception_enabled:
|
|
1023
|
+
return
|
|
1024
|
+
if self._dispatch_interceptor_thread is not None:
|
|
1025
|
+
return
|
|
1026
|
+
|
|
1027
|
+
import asyncio
|
|
1028
|
+
from typing import TYPE_CHECKING
|
|
1029
|
+
|
|
1030
|
+
if TYPE_CHECKING:
|
|
1031
|
+
pass
|
|
1032
|
+
|
|
1033
|
+
def _run_interceptor():
|
|
1034
|
+
loop = asyncio.new_event_loop()
|
|
1035
|
+
asyncio.set_event_loop(loop)
|
|
1036
|
+
|
|
1037
|
+
from .pma_dispatch_interceptor import run_dispatch_interceptor
|
|
1038
|
+
|
|
1039
|
+
stop_event = threading.Event()
|
|
1040
|
+
self._dispatch_interceptor_stop_event = stop_event
|
|
1041
|
+
|
|
1042
|
+
async def run_until_stop():
|
|
1043
|
+
task = None
|
|
1044
|
+
try:
|
|
1045
|
+
task = await run_dispatch_interceptor(
|
|
1046
|
+
hub_root=self.hub_config.root,
|
|
1047
|
+
supervisor=self,
|
|
1048
|
+
interval_seconds=5.0,
|
|
1049
|
+
on_intercept=self._on_dispatch_intercept,
|
|
1050
|
+
)
|
|
1051
|
+
while not stop_event.is_set():
|
|
1052
|
+
await asyncio.sleep(0.1)
|
|
1053
|
+
except asyncio.CancelledError:
|
|
1054
|
+
pass
|
|
1055
|
+
finally:
|
|
1056
|
+
if task is not None and not task.done():
|
|
1057
|
+
task.cancel()
|
|
1058
|
+
if task is not None:
|
|
1059
|
+
try:
|
|
1060
|
+
await task
|
|
1061
|
+
except (asyncio.CancelledError, Exception):
|
|
1062
|
+
pass
|
|
1063
|
+
|
|
1064
|
+
loop.run_until_complete(run_until_stop())
|
|
1065
|
+
loop.close()
|
|
1066
|
+
|
|
1067
|
+
self._dispatch_interceptor_thread = threading.Thread(
|
|
1068
|
+
target=_run_interceptor, daemon=True, name="pma-dispatch-interceptor"
|
|
1069
|
+
)
|
|
1070
|
+
self._dispatch_interceptor_thread.start()
|
|
1071
|
+
|
|
1072
|
+
def _stop_dispatch_interceptor(self) -> None:
|
|
1073
|
+
if self._dispatch_interceptor_stop_event is not None:
|
|
1074
|
+
self._dispatch_interceptor_stop_event.set()
|
|
1075
|
+
if self._dispatch_interceptor_thread is not None:
|
|
1076
|
+
self._dispatch_interceptor_thread.join(timeout=2.0)
|
|
1077
|
+
self._dispatch_interceptor_thread = None
|
|
1078
|
+
self._dispatch_interceptor_stop_event = None
|
|
1079
|
+
|
|
1080
|
+
def _on_dispatch_intercept(self, event_id: str, result: Any) -> None:
|
|
1081
|
+
logger.info(
|
|
1082
|
+
"Dispatch intercepted: event_id=%s action=%s reason=%s",
|
|
1083
|
+
event_id,
|
|
1084
|
+
(
|
|
1085
|
+
result.get("action")
|
|
1086
|
+
if isinstance(result, dict)
|
|
1087
|
+
else getattr(result, "action", None)
|
|
1088
|
+
),
|
|
1089
|
+
(
|
|
1090
|
+
result.get("reason")
|
|
1091
|
+
if isinstance(result, dict)
|
|
1092
|
+
else getattr(result, "reason", None)
|
|
1093
|
+
),
|
|
1094
|
+
)
|
|
938
1095
|
|
|
939
1096
|
def _snapshot_from_record(self, record: DiscoveryRecord) -> RepoSnapshot:
|
|
940
1097
|
repo_path = record.absolute_path
|