codex-autorunner 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. codex_autorunner/agents/opencode/client.py +113 -4
  2. codex_autorunner/agents/opencode/supervisor.py +4 -0
  3. codex_autorunner/agents/registry.py +17 -7
  4. codex_autorunner/bootstrap.py +219 -1
  5. codex_autorunner/core/__init__.py +17 -1
  6. codex_autorunner/core/about_car.py +124 -11
  7. codex_autorunner/core/app_server_threads.py +6 -0
  8. codex_autorunner/core/config.py +238 -3
  9. codex_autorunner/core/context_awareness.py +39 -0
  10. codex_autorunner/core/docs.py +0 -122
  11. codex_autorunner/core/filebox.py +265 -0
  12. codex_autorunner/core/flows/controller.py +71 -1
  13. codex_autorunner/core/flows/reconciler.py +4 -1
  14. codex_autorunner/core/flows/runtime.py +22 -0
  15. codex_autorunner/core/flows/store.py +61 -9
  16. codex_autorunner/core/flows/transition.py +23 -16
  17. codex_autorunner/core/flows/ux_helpers.py +18 -3
  18. codex_autorunner/core/flows/worker_process.py +32 -6
  19. codex_autorunner/core/hub.py +198 -41
  20. codex_autorunner/core/lifecycle_events.py +253 -0
  21. codex_autorunner/core/path_utils.py +2 -1
  22. codex_autorunner/core/pma_audit.py +224 -0
  23. codex_autorunner/core/pma_context.py +683 -0
  24. codex_autorunner/core/pma_dispatch_interceptor.py +284 -0
  25. codex_autorunner/core/pma_lifecycle.py +527 -0
  26. codex_autorunner/core/pma_queue.py +367 -0
  27. codex_autorunner/core/pma_safety.py +221 -0
  28. codex_autorunner/core/pma_state.py +115 -0
  29. codex_autorunner/core/ports/agent_backend.py +2 -5
  30. codex_autorunner/core/ports/run_event.py +1 -4
  31. codex_autorunner/core/prompt.py +0 -80
  32. codex_autorunner/core/prompts.py +56 -172
  33. codex_autorunner/core/redaction.py +0 -4
  34. codex_autorunner/core/review_context.py +11 -9
  35. codex_autorunner/core/runner_controller.py +35 -33
  36. codex_autorunner/core/runner_state.py +147 -0
  37. codex_autorunner/core/runtime.py +829 -0
  38. codex_autorunner/core/sqlite_utils.py +13 -4
  39. codex_autorunner/core/state.py +7 -10
  40. codex_autorunner/core/state_roots.py +5 -0
  41. codex_autorunner/core/templates/__init__.py +39 -0
  42. codex_autorunner/core/templates/git_mirror.py +234 -0
  43. codex_autorunner/core/templates/provenance.py +56 -0
  44. codex_autorunner/core/templates/scan_cache.py +120 -0
  45. codex_autorunner/core/ticket_linter_cli.py +17 -0
  46. codex_autorunner/core/ticket_manager_cli.py +154 -92
  47. codex_autorunner/core/time_utils.py +11 -0
  48. codex_autorunner/core/types.py +18 -0
  49. codex_autorunner/core/utils.py +34 -6
  50. codex_autorunner/flows/review/service.py +23 -25
  51. codex_autorunner/flows/ticket_flow/definition.py +43 -1
  52. codex_autorunner/integrations/agents/__init__.py +2 -0
  53. codex_autorunner/integrations/agents/backend_orchestrator.py +18 -0
  54. codex_autorunner/integrations/agents/codex_backend.py +19 -8
  55. codex_autorunner/integrations/agents/runner.py +3 -8
  56. codex_autorunner/integrations/agents/wiring.py +8 -0
  57. codex_autorunner/integrations/telegram/adapter.py +1 -1
  58. codex_autorunner/integrations/telegram/config.py +1 -1
  59. codex_autorunner/integrations/telegram/doctor.py +228 -6
  60. codex_autorunner/integrations/telegram/handlers/commands/execution.py +236 -74
  61. codex_autorunner/integrations/telegram/handlers/commands/files.py +314 -75
  62. codex_autorunner/integrations/telegram/handlers/commands/flows.py +346 -58
  63. codex_autorunner/integrations/telegram/handlers/commands/workspace.py +498 -37
  64. codex_autorunner/integrations/telegram/handlers/commands_runtime.py +202 -45
  65. codex_autorunner/integrations/telegram/handlers/commands_spec.py +18 -7
  66. codex_autorunner/integrations/telegram/handlers/messages.py +34 -3
  67. codex_autorunner/integrations/telegram/helpers.py +1 -3
  68. codex_autorunner/integrations/telegram/runtime.py +9 -4
  69. codex_autorunner/integrations/telegram/service.py +30 -0
  70. codex_autorunner/integrations/telegram/state.py +38 -0
  71. codex_autorunner/integrations/telegram/ticket_flow_bridge.py +10 -4
  72. codex_autorunner/integrations/telegram/transport.py +10 -3
  73. codex_autorunner/integrations/templates/__init__.py +27 -0
  74. codex_autorunner/integrations/templates/scan_agent.py +312 -0
  75. codex_autorunner/server.py +2 -2
  76. codex_autorunner/static/agentControls.js +21 -5
  77. codex_autorunner/static/app.js +115 -11
  78. codex_autorunner/static/archive.js +274 -81
  79. codex_autorunner/static/archiveApi.js +21 -0
  80. codex_autorunner/static/chatUploads.js +137 -0
  81. codex_autorunner/static/constants.js +1 -1
  82. codex_autorunner/static/docChatCore.js +185 -13
  83. codex_autorunner/static/fileChat.js +68 -40
  84. codex_autorunner/static/fileboxUi.js +159 -0
  85. codex_autorunner/static/hub.js +46 -81
  86. codex_autorunner/static/index.html +303 -24
  87. codex_autorunner/static/messages.js +82 -4
  88. codex_autorunner/static/notifications.js +288 -0
  89. codex_autorunner/static/pma.js +1167 -0
  90. codex_autorunner/static/settings.js +3 -0
  91. codex_autorunner/static/streamUtils.js +57 -0
  92. codex_autorunner/static/styles.css +9141 -6742
  93. codex_autorunner/static/templateReposSettings.js +225 -0
  94. codex_autorunner/static/terminalManager.js +22 -3
  95. codex_autorunner/static/ticketChatActions.js +165 -3
  96. codex_autorunner/static/ticketChatStream.js +17 -119
  97. codex_autorunner/static/ticketEditor.js +41 -13
  98. codex_autorunner/static/ticketTemplates.js +798 -0
  99. codex_autorunner/static/tickets.js +69 -19
  100. codex_autorunner/static/turnEvents.js +27 -0
  101. codex_autorunner/static/turnResume.js +33 -0
  102. codex_autorunner/static/utils.js +28 -0
  103. codex_autorunner/static/workspace.js +258 -44
  104. codex_autorunner/static/workspaceFileBrowser.js +6 -4
  105. codex_autorunner/surfaces/cli/cli.py +1465 -155
  106. codex_autorunner/surfaces/cli/pma_cli.py +817 -0
  107. codex_autorunner/surfaces/web/app.py +253 -49
  108. codex_autorunner/surfaces/web/routes/__init__.py +4 -0
  109. codex_autorunner/surfaces/web/routes/analytics.py +29 -22
  110. codex_autorunner/surfaces/web/routes/archive.py +197 -0
  111. codex_autorunner/surfaces/web/routes/file_chat.py +297 -36
  112. codex_autorunner/surfaces/web/routes/filebox.py +227 -0
  113. codex_autorunner/surfaces/web/routes/flows.py +219 -29
  114. codex_autorunner/surfaces/web/routes/messages.py +70 -39
  115. codex_autorunner/surfaces/web/routes/pma.py +1652 -0
  116. codex_autorunner/surfaces/web/routes/repos.py +1 -1
  117. codex_autorunner/surfaces/web/routes/shared.py +0 -3
  118. codex_autorunner/surfaces/web/routes/templates.py +634 -0
  119. codex_autorunner/surfaces/web/runner_manager.py +2 -2
  120. codex_autorunner/surfaces/web/schemas.py +81 -18
  121. codex_autorunner/tickets/agent_pool.py +27 -0
  122. codex_autorunner/tickets/files.py +33 -16
  123. codex_autorunner/tickets/lint.py +50 -0
  124. codex_autorunner/tickets/models.py +3 -0
  125. codex_autorunner/tickets/outbox.py +41 -5
  126. codex_autorunner/tickets/runner.py +350 -69
  127. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/METADATA +15 -19
  128. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/RECORD +132 -101
  129. codex_autorunner/core/adapter_utils.py +0 -21
  130. codex_autorunner/core/engine.py +0 -3302
  131. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/WHEEL +0 -0
  132. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/entry_points.txt +0 -0
  133. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/licenses/LICENSE +0 -0
  134. {codex_autorunner-1.1.0.dist-info → codex_autorunner-1.2.1.dist-info}/top_level.txt +0 -0
@@ -45,22 +45,7 @@ def resolve_flow_transition(
45
45
  inner_status = engine.get("status")
46
46
  reason_code = engine.get("reason_code")
47
47
 
48
- # 1) Worker liveness overrides for active flows.
49
- if (
50
- record.status in (FlowRunStatus.RUNNING, FlowRunStatus.STOPPING)
51
- and not health.is_alive
52
- ):
53
- new_status = (
54
- FlowRunStatus.STOPPED
55
- if record.status == FlowRunStatus.STOPPING
56
- else FlowRunStatus.FAILED
57
- )
58
- state = ensure_reason_summary(state, status=new_status, default="Worker died")
59
- return TransitionDecision(
60
- status=new_status, finished_at=now, state=state, note="worker-dead"
61
- )
62
-
63
- # 2) Inner engine reconciliation (worker is alive or not required).
48
+ # 1) Inner engine completion takes priority over worker liveness for active flows.
64
49
  if record.status == FlowRunStatus.RUNNING:
65
50
  if inner_status == "paused":
66
51
  state = ensure_reason_summary(state, status=FlowRunStatus.PAUSED)
@@ -79,10 +64,32 @@ def resolve_flow_transition(
79
64
  note="engine-completed",
80
65
  )
81
66
 
67
+ # 2) Worker liveness overrides for active flows (only if engine not completed).
68
+ if not health.is_alive:
69
+ new_status = FlowRunStatus.FAILED
70
+ state = ensure_reason_summary(
71
+ state, status=new_status, default="Worker died"
72
+ )
73
+ return TransitionDecision(
74
+ status=new_status, finished_at=now, state=state, note="worker-dead"
75
+ )
76
+
82
77
  return TransitionDecision(
83
78
  status=FlowRunStatus.RUNNING, finished_at=None, state=state, note="running"
84
79
  )
85
80
 
81
+ # Handle STOPPING case separately - worker liveness check still applies.
82
+ if record.status == FlowRunStatus.STOPPING and not health.is_alive:
83
+ state = ensure_reason_summary(
84
+ state, status=FlowRunStatus.STOPPED, default="Worker stopped"
85
+ )
86
+ return TransitionDecision(
87
+ status=FlowRunStatus.STOPPED,
88
+ finished_at=now,
89
+ state=state,
90
+ note="worker-dead",
91
+ )
92
+
86
93
  if record.status == FlowRunStatus.PAUSED:
87
94
  if inner_status == "completed":
88
95
  return TransitionDecision(
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
4
  from pathlib import Path
5
5
  from typing import Any, Callable, Optional, Protocol
6
6
 
7
- from ...tickets.files import list_ticket_paths
7
+ from ...tickets.files import list_ticket_paths, ticket_is_done
8
8
  from .models import FlowEventType, FlowRunRecord
9
9
  from .store import FlowStore
10
10
  from .worker_process import (
@@ -58,6 +58,18 @@ def _ticket_dir(repo_root: Path) -> Path:
58
58
  return repo_root.resolve() / ".codex-autorunner" / "tickets"
59
59
 
60
60
 
61
+ def ticket_progress(repo_root: Path) -> dict[str, int]:
62
+ ticket_dir = _ticket_dir(repo_root)
63
+ ticket_paths = list_ticket_paths(ticket_dir)
64
+ total = len(ticket_paths)
65
+ done = 0
66
+ if total:
67
+ for path in ticket_paths:
68
+ if ticket_is_done(path):
69
+ done += 1
70
+ return {"done": done, "total": total}
71
+
72
+
61
73
  def bootstrap_check(
62
74
  repo_root: Path,
63
75
  github_service_factory: Optional[Callable[[Path], GitHubServiceProtocol]] = None,
@@ -219,13 +231,15 @@ def build_flow_status_snapshot(
219
231
  "last_event_at": last_event_at,
220
232
  "worker_health": health,
221
233
  "effective_current_ticket": effective_ticket,
234
+ "ticket_progress": ticket_progress(repo_root),
222
235
  "state": updated_state,
223
236
  }
224
237
 
225
238
 
226
- def ensure_worker(repo_root: Path, run_id: str) -> dict:
239
+ def ensure_worker(repo_root: Path, run_id: str, is_terminal: bool = False) -> dict:
227
240
  health = check_worker_health(repo_root, run_id)
228
- if health.status in {"dead", "mismatch", "invalid"}:
241
+ # Only clear metadata for dead/mismatch/invalid workers if not terminal
242
+ if not is_terminal and health.status in {"dead", "mismatch", "invalid"}:
229
243
  try:
230
244
  clear_worker_metadata(health.artifact_path.parent)
231
245
  except Exception:
@@ -252,6 +266,7 @@ __all__ = [
252
266
  "format_issue_as_markdown",
253
267
  "issue_md_has_content",
254
268
  "issue_md_path",
269
+ "ticket_progress",
255
270
  "seed_issue_from_github",
256
271
  "seed_issue_from_text",
257
272
  ]
@@ -152,7 +152,8 @@ def check_worker_health(
152
152
  try:
153
153
  data = json.loads(metadata_path.read_text(encoding="utf-8"))
154
154
  pid = int(data.get("pid")) if data.get("pid") is not None else None
155
- cmd = data.get("cmd") or []
155
+ raw_cmd = data.get("cmd") or []
156
+ cmd = [str(part) for part in raw_cmd] if isinstance(raw_cmd, list) else []
156
157
  except Exception:
157
158
  return FlowWorkerHealth(
158
159
  status="invalid",
@@ -166,7 +167,7 @@ def check_worker_health(
166
167
  return FlowWorkerHealth(
167
168
  status="invalid",
168
169
  pid=pid,
169
- cmdline=cmd if isinstance(cmd, list) else [],
170
+ cmdline=cmd,
170
171
  artifact_path=metadata_path,
171
172
  message="missing or invalid PID",
172
173
  )
@@ -175,19 +176,19 @@ def check_worker_health(
175
176
  return FlowWorkerHealth(
176
177
  status="dead",
177
178
  pid=pid,
178
- cmdline=cmd if isinstance(cmd, list) else [],
179
+ cmdline=cmd,
179
180
  artifact_path=metadata_path,
180
181
  message="worker PID not running",
181
182
  )
182
183
 
183
- expected_cmd = _build_worker_cmd(entrypoint, run_id)
184
+ expected_cmd = cmd or _build_worker_cmd(entrypoint, run_id)
184
185
  actual_cmd = _read_process_cmdline(pid)
185
186
  if actual_cmd is None:
186
187
  # Can't inspect cmdline; trust the PID check.
187
188
  return FlowWorkerHealth(
188
189
  status="alive",
189
190
  pid=pid,
190
- cmdline=cmd if isinstance(cmd, list) else [],
191
+ cmdline=cmd,
191
192
  artifact_path=metadata_path,
192
193
  message="worker running (cmdline unknown)",
193
194
  )
@@ -198,7 +199,7 @@ def check_worker_health(
198
199
  pid=pid,
199
200
  cmdline=actual_cmd,
200
201
  artifact_path=metadata_path,
201
- message="worker PID command does not match expected",
202
+ message="worker PID command does not match stored metadata",
202
203
  )
203
204
 
204
205
  return FlowWorkerHealth(
@@ -210,6 +211,31 @@ def check_worker_health(
210
211
  )
211
212
 
212
213
 
214
+ def register_worker_metadata(
215
+ repo_root: Path,
216
+ run_id: str,
217
+ *,
218
+ artifacts_root: Optional[Path] = None,
219
+ pid: Optional[int] = None,
220
+ cmd: Optional[list[str]] = None,
221
+ entrypoint: str = "codex_autorunner",
222
+ ) -> Path:
223
+ normalized_run_id = _normalized_run_id(run_id)
224
+ artifacts_dir = _worker_artifacts_dir(repo_root, normalized_run_id, artifacts_root)
225
+
226
+ resolved_pid = pid or os.getpid()
227
+ resolved_cmd = cmd or _read_process_cmdline(resolved_pid)
228
+ if not resolved_cmd:
229
+ resolved_cmd = _build_worker_cmd(entrypoint, normalized_run_id)
230
+
231
+ _write_worker_metadata(
232
+ _worker_metadata_path(artifacts_dir),
233
+ resolved_pid,
234
+ resolved_cmd,
235
+ )
236
+ return artifacts_dir
237
+
238
+
213
239
  def spawn_flow_worker(
214
240
  repo_root: Path,
215
241
  run_id: str,
@@ -1,11 +1,13 @@
1
+ import asyncio
1
2
  import dataclasses
2
3
  import enum
3
4
  import logging
4
5
  import re
5
6
  import shutil
7
+ import threading
6
8
  import time
7
9
  from pathlib import Path
8
- from typing import Callable, Dict, List, Optional, Tuple
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple
9
11
 
10
12
  from ..bootstrap import seed_repo_files
11
13
  from ..discovery import DiscoveryRecord, discover_and_init
@@ -18,7 +20,6 @@ from ..manifest import (
18
20
  )
19
21
  from .archive import archive_worktree_snapshot, build_snapshot_id
20
22
  from .config import HubConfig, RepoConfig, derive_repo_config, load_hub_config
21
- from .engine import AppServerSupervisorFactory, BackendFactory, Engine
22
23
  from .git_utils import (
23
24
  GitError,
24
25
  git_available,
@@ -29,15 +30,22 @@ from .git_utils import (
29
30
  git_upstream_status,
30
31
  run_git,
31
32
  )
33
+ from .lifecycle_events import LifecycleEvent, LifecycleEventEmitter, LifecycleEventStore
32
34
  from .locks import DEFAULT_RUNNER_CMD_HINTS, assess_lock, process_alive
35
+ from .ports.backend_orchestrator import (
36
+ BackendOrchestrator as BackendOrchestratorProtocol,
37
+ )
33
38
  from .runner_controller import ProcessRunnerController, SpawnRunnerFn
39
+ from .runtime import RuntimeContext
34
40
  from .state import RunnerState, load_state, now_iso
41
+ from .types import AppServerSupervisorFactory, BackendFactory
35
42
  from .utils import atomic_write
36
43
 
37
44
  logger = logging.getLogger("codex_autorunner.hub")
38
45
 
39
46
  BackendFactoryBuilder = Callable[[Path, RepoConfig], BackendFactory]
40
47
  AppServerSupervisorFactoryBuilder = Callable[[RepoConfig], AppServerSupervisorFactory]
48
+ BackendOrchestratorBuilder = Callable[[Path, RepoConfig], BackendOrchestratorProtocol]
41
49
 
42
50
 
43
51
  def _git_failure_detail(proc) -> str:
@@ -205,27 +213,25 @@ class RepoRunner:
205
213
  app_server_supervisor_factory_builder: Optional[
206
214
  AppServerSupervisorFactoryBuilder
207
215
  ] = None,
216
+ backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
208
217
  agent_id_validator: Optional[Callable[[str], str]] = None,
209
218
  ):
210
219
  self.repo_id = repo_id
211
- backend_factory = (
212
- backend_factory_builder(repo_root, repo_config)
213
- if backend_factory_builder is not None
214
- else None
215
- )
216
- app_server_supervisor_factory = (
217
- app_server_supervisor_factory_builder(repo_config)
218
- if app_server_supervisor_factory_builder is not None
220
+ backend_orchestrator = (
221
+ backend_orchestrator_builder(repo_root, repo_config)
222
+ if backend_orchestrator_builder is not None
219
223
  else None
220
224
  )
221
- self._engine = Engine(
222
- repo_root,
225
+ if backend_orchestrator is None:
226
+ raise ValueError(
227
+ "backend_orchestrator_builder is required for HubSupervisor"
228
+ )
229
+ self._ctx = RuntimeContext(
230
+ repo_root=repo_root,
223
231
  config=repo_config,
224
- backend_factory=backend_factory,
225
- app_server_supervisor_factory=app_server_supervisor_factory,
226
- agent_id_validator=agent_id_validator,
232
+ backend_orchestrator=backend_orchestrator,
227
233
  )
228
- self._controller = ProcessRunnerController(self._engine, spawn_fn=spawn_fn)
234
+ self._controller = ProcessRunnerController(self._ctx, spawn_fn=spawn_fn)
229
235
 
230
236
  @property
231
237
  def running(self) -> bool:
@@ -254,6 +260,7 @@ class HubSupervisor:
254
260
  app_server_supervisor_factory_builder: Optional[
255
261
  AppServerSupervisorFactoryBuilder
256
262
  ] = None,
263
+ backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
257
264
  agent_id_validator: Optional[Callable[[str], str]] = None,
258
265
  ):
259
266
  self.hub_config = hub_config
@@ -264,11 +271,22 @@ class HubSupervisor:
264
271
  self._app_server_supervisor_factory_builder = (
265
272
  app_server_supervisor_factory_builder
266
273
  )
274
+ self._backend_orchestrator_builder = backend_orchestrator_builder
267
275
  self._agent_id_validator = agent_id_validator
268
276
  self.state = load_hub_state(self.state_path, self.hub_config.root)
269
277
  self._list_cache_at: Optional[float] = None
270
278
  self._list_cache: Optional[List[RepoSnapshot]] = None
279
+ self._list_lock = threading.Lock()
280
+ self._lifecycle_emitter = LifecycleEventEmitter(hub_config.root)
281
+ self._lifecycle_task_lock = threading.Lock()
282
+ self._lifecycle_stop_event = threading.Event()
283
+ self._lifecycle_thread: Optional[threading.Thread] = None
284
+ self._dispatch_interceptor_task: Optional[asyncio.Task] = None
285
+ self._dispatch_interceptor_stop_event: Optional[threading.Event] = None
286
+ self._dispatch_interceptor_thread: Optional[threading.Thread] = None
271
287
  self._reconcile_startup()
288
+ self._start_lifecycle_event_processor()
289
+ self._start_dispatch_interceptor()
272
290
 
273
291
  @classmethod
274
292
  def from_path(
@@ -279,12 +297,14 @@ class HubSupervisor:
279
297
  app_server_supervisor_factory_builder: Optional[
280
298
  AppServerSupervisorFactoryBuilder
281
299
  ] = None,
300
+ backend_orchestrator_builder: Optional[BackendOrchestratorBuilder] = None,
282
301
  ) -> "HubSupervisor":
283
302
  config = load_hub_config(path)
284
303
  return cls(
285
304
  config,
286
305
  backend_factory_builder=backend_factory_builder,
287
306
  app_server_supervisor_factory_builder=app_server_supervisor_factory_builder,
307
+ backend_orchestrator_builder=backend_orchestrator_builder,
288
308
  )
289
309
 
290
310
  def scan(self) -> List[RepoSnapshot]:
@@ -296,16 +316,17 @@ class HubSupervisor:
296
316
  return snapshots
297
317
 
298
318
  def list_repos(self, *, use_cache: bool = True) -> List[RepoSnapshot]:
299
- if use_cache and self._list_cache and self._list_cache_at is not None:
300
- if time.monotonic() - self._list_cache_at < 2.0:
301
- return self._list_cache
302
- manifest, records = self._manifest_records(manifest_only=True)
303
- snapshots = self._build_snapshots(records)
304
- self.state = HubState(last_scan_at=self.state.last_scan_at, repos=snapshots)
305
- save_hub_state(self.state_path, self.state, self.hub_config.root)
306
- self._list_cache = snapshots
307
- self._list_cache_at = time.monotonic()
308
- return snapshots
319
+ with self._list_lock:
320
+ if use_cache and self._list_cache and self._list_cache_at is not None:
321
+ if time.monotonic() - self._list_cache_at < 2.0:
322
+ return self._list_cache
323
+ manifest, records = self._manifest_records(manifest_only=True)
324
+ snapshots = self._build_snapshots(records)
325
+ self.state = HubState(last_scan_at=self.state.last_scan_at, repos=snapshots)
326
+ save_hub_state(self.state_path, self.state, self.hub_config.root)
327
+ self._list_cache = snapshots
328
+ self._list_cache_at = time.monotonic()
329
+ return snapshots
309
330
 
310
331
  def _reconcile_startup(self) -> None:
311
332
  try:
@@ -320,23 +341,18 @@ class HubSupervisor:
320
341
  repo_config = derive_repo_config(
321
342
  self.hub_config, record.absolute_path, load_env=False
322
343
  )
323
- backend_factory = (
324
- self._backend_factory_builder(record.absolute_path, repo_config)
325
- if self._backend_factory_builder is not None
326
- else None
327
- )
328
- app_server_supervisor_factory = (
329
- self._app_server_supervisor_factory_builder(repo_config)
330
- if self._app_server_supervisor_factory_builder is not None
344
+ backend_orchestrator = (
345
+ self._backend_orchestrator_builder(
346
+ record.absolute_path, repo_config
347
+ )
348
+ if self._backend_orchestrator_builder is not None
331
349
  else None
332
350
  )
333
351
  controller = ProcessRunnerController(
334
- Engine(
335
- record.absolute_path,
352
+ RuntimeContext(
353
+ repo_root=record.absolute_path,
336
354
  config=repo_config,
337
- backend_factory=backend_factory,
338
- app_server_supervisor_factory=app_server_supervisor_factory,
339
- agent_id_validator=self._agent_id_validator,
355
+ backend_orchestrator=backend_orchestrator,
340
356
  )
341
357
  )
342
358
  controller.reconcile()
@@ -890,6 +906,7 @@ class HubSupervisor:
890
906
  app_server_supervisor_factory_builder=(
891
907
  self._app_server_supervisor_factory_builder
892
908
  ),
909
+ backend_orchestrator_builder=self._backend_orchestrator_builder,
893
910
  agent_id_validator=self._agent_id_validator,
894
911
  )
895
912
  self._runners[repo_id] = runner
@@ -933,8 +950,148 @@ class HubSupervisor:
933
950
  return snapshot
934
951
 
935
952
  def _invalidate_list_cache(self) -> None:
936
- self._list_cache = None
937
- self._list_cache_at = None
953
+ with self._list_lock:
954
+ self._list_cache = None
955
+ self._list_cache_at = None
956
+
957
+ @property
958
+ def lifecycle_emitter(self) -> LifecycleEventEmitter:
959
+ return self._lifecycle_emitter
960
+
961
+ @property
962
+ def lifecycle_store(self) -> LifecycleEventStore:
963
+ return self._lifecycle_emitter._store
964
+
965
+ def trigger_pma_from_lifecycle_event(self, event: LifecycleEvent) -> None:
966
+ if event.processed:
967
+ return
968
+ event_id = event.event_id
969
+ if event_id is None:
970
+ return
971
+ self.lifecycle_store.mark_processed(event_id)
972
+ self.lifecycle_store.prune_processed(keep_last=50)
973
+ logger.info(
974
+ "PMA wakeup triggered by lifecycle event: type=%s repo_id=%s run_id=%s",
975
+ event.event_type.value,
976
+ event.repo_id,
977
+ event.run_id,
978
+ )
979
+
980
+ def process_lifecycle_events(self) -> None:
981
+ events = self.lifecycle_store.get_unprocessed(limit=100)
982
+ if not events:
983
+ return
984
+ for event in events:
985
+ try:
986
+ self.trigger_pma_from_lifecycle_event(event)
987
+ except Exception as exc:
988
+ logger.exception(
989
+ "Failed to process lifecycle event %s: %s", event.event_id, exc
990
+ )
991
+
992
+ def _start_lifecycle_event_processor(self) -> None:
993
+ if self._lifecycle_thread is not None:
994
+ return
995
+
996
+ def _process_loop():
997
+ while not self._lifecycle_stop_event.wait(5.0):
998
+ try:
999
+ self.process_lifecycle_events()
1000
+ except Exception:
1001
+ logger.exception("Error in lifecycle event processor")
1002
+
1003
+ self._lifecycle_thread = threading.Thread(
1004
+ target=_process_loop, daemon=True, name="lifecycle-event-processor"
1005
+ )
1006
+ self._lifecycle_thread.start()
1007
+
1008
+ def _stop_lifecycle_event_processor(self) -> None:
1009
+ if self._lifecycle_thread is None:
1010
+ return
1011
+ self._lifecycle_stop_event.set()
1012
+ self._lifecycle_thread.join(timeout=2.0)
1013
+ self._lifecycle_thread = None
1014
+
1015
+ def shutdown(self) -> None:
1016
+ self._stop_lifecycle_event_processor()
1017
+ self._stop_dispatch_interceptor()
1018
+
1019
+ def _start_dispatch_interceptor(self) -> None:
1020
+ if not self.hub_config.pma.enabled:
1021
+ return
1022
+ if not self.hub_config.pma.dispatch_interception_enabled:
1023
+ return
1024
+ if self._dispatch_interceptor_thread is not None:
1025
+ return
1026
+
1027
+ import asyncio
1028
+ from typing import TYPE_CHECKING
1029
+
1030
+ if TYPE_CHECKING:
1031
+ pass
1032
+
1033
+ def _run_interceptor():
1034
+ loop = asyncio.new_event_loop()
1035
+ asyncio.set_event_loop(loop)
1036
+
1037
+ from .pma_dispatch_interceptor import run_dispatch_interceptor
1038
+
1039
+ stop_event = threading.Event()
1040
+ self._dispatch_interceptor_stop_event = stop_event
1041
+
1042
+ async def run_until_stop():
1043
+ task = None
1044
+ try:
1045
+ task = await run_dispatch_interceptor(
1046
+ hub_root=self.hub_config.root,
1047
+ supervisor=self,
1048
+ interval_seconds=5.0,
1049
+ on_intercept=self._on_dispatch_intercept,
1050
+ )
1051
+ while not stop_event.is_set():
1052
+ await asyncio.sleep(0.1)
1053
+ except asyncio.CancelledError:
1054
+ pass
1055
+ finally:
1056
+ if task is not None and not task.done():
1057
+ task.cancel()
1058
+ if task is not None:
1059
+ try:
1060
+ await task
1061
+ except (asyncio.CancelledError, Exception):
1062
+ pass
1063
+
1064
+ loop.run_until_complete(run_until_stop())
1065
+ loop.close()
1066
+
1067
+ self._dispatch_interceptor_thread = threading.Thread(
1068
+ target=_run_interceptor, daemon=True, name="pma-dispatch-interceptor"
1069
+ )
1070
+ self._dispatch_interceptor_thread.start()
1071
+
1072
+ def _stop_dispatch_interceptor(self) -> None:
1073
+ if self._dispatch_interceptor_stop_event is not None:
1074
+ self._dispatch_interceptor_stop_event.set()
1075
+ if self._dispatch_interceptor_thread is not None:
1076
+ self._dispatch_interceptor_thread.join(timeout=2.0)
1077
+ self._dispatch_interceptor_thread = None
1078
+ self._dispatch_interceptor_stop_event = None
1079
+
1080
+ def _on_dispatch_intercept(self, event_id: str, result: Any) -> None:
1081
+ logger.info(
1082
+ "Dispatch intercepted: event_id=%s action=%s reason=%s",
1083
+ event_id,
1084
+ (
1085
+ result.get("action")
1086
+ if isinstance(result, dict)
1087
+ else getattr(result, "action", None)
1088
+ ),
1089
+ (
1090
+ result.get("reason")
1091
+ if isinstance(result, dict)
1092
+ else getattr(result, "reason", None)
1093
+ ),
1094
+ )
938
1095
 
939
1096
  def _snapshot_from_record(self, record: DiscoveryRecord) -> RepoSnapshot:
940
1097
  repo_path = record.absolute_path