agent-control-plane 0.2.0 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -19
- package/assets/workflow-catalog.json +1 -1
- package/bin/pr-risk.sh +22 -7
- package/bin/sync-pr-labels.sh +1 -1
- package/hooks/heartbeat-hooks.sh +125 -12
- package/hooks/issue-reconcile-hooks.sh +1 -1
- package/hooks/pr-reconcile-hooks.sh +1 -1
- package/npm/bin/agent-control-plane.js +296 -61
- package/package.json +11 -7
- package/tools/bin/agent-github-update-labels +36 -2
- package/tools/bin/agent-project-catch-up-merged-prs +4 -2
- package/tools/bin/agent-project-cleanup-session +49 -5
- package/tools/bin/agent-project-heartbeat-loop +119 -1471
- package/tools/bin/agent-project-publish-issue-pr +6 -3
- package/tools/bin/agent-project-reconcile-issue-session +78 -106
- package/tools/bin/agent-project-reconcile-pr-session +166 -143
- package/tools/bin/agent-project-retry-state +18 -7
- package/tools/bin/agent-project-run-claude-session +10 -0
- package/tools/bin/agent-project-run-codex-resilient +99 -14
- package/tools/bin/agent-project-run-codex-session +16 -5
- package/tools/bin/agent-project-run-kilo-session +10 -0
- package/tools/bin/agent-project-run-openclaw-session +10 -0
- package/tools/bin/agent-project-run-opencode-session +10 -0
- package/tools/bin/agent-project-sync-source-repo-main +163 -0
- package/tools/bin/agent-project-worker-status +10 -7
- package/tools/bin/cleanup-worktree.sh +6 -1
- package/tools/bin/flow-config-lib.sh +1257 -34
- package/tools/bin/flow-resident-worker-lib.sh +119 -1
- package/tools/bin/flow-shell-lib.sh +56 -0
- package/tools/bin/github-core-rate-limit-state.sh +77 -0
- package/tools/bin/github-write-outbox.sh +470 -0
- package/tools/bin/heartbeat-loop-cache-lib.sh +164 -0
- package/tools/bin/heartbeat-loop-counting-lib.sh +306 -0
- package/tools/bin/heartbeat-loop-pr-strategy-lib.sh +199 -0
- package/tools/bin/heartbeat-loop-scheduling-lib.sh +506 -0
- package/tools/bin/heartbeat-loop-worker-lib.sh +319 -0
- package/tools/bin/heartbeat-recovery-preflight.sh +12 -1
- package/tools/bin/heartbeat-safe-auto.sh +56 -3
- package/tools/bin/install-project-launchd.sh +17 -2
- package/tools/bin/project-init.sh +21 -1
- package/tools/bin/project-launchd-bootstrap.sh +16 -9
- package/tools/bin/project-runtimectl.sh +46 -2
- package/tools/bin/reconcile-bootstrap-lib.sh +113 -0
- package/tools/bin/resident-issue-controller-lib.sh +448 -0
- package/tools/bin/scaffold-profile.sh +61 -3
- package/tools/bin/start-pr-fix-worker.sh +47 -10
- package/tools/bin/start-resident-issue-loop.sh +28 -439
- package/tools/dashboard/app.js +37 -1
- package/tools/dashboard/dashboard_snapshot.py +65 -26
- package/tools/templates/pr-fix-template.md +3 -1
- package/tools/templates/pr-merge-repair-template.md +2 -1
- package/SKILL.md +0 -149
- package/references/architecture.md +0 -217
- package/references/commands.md +0 -128
- package/references/control-plane-map.md +0 -124
- package/references/docs-map.md +0 -73
- package/references/release-checklist.md +0 -65
- package/references/repo-map.md +0 -36
- package/tools/bin/split-retained-slice.sh +0 -124
|
@@ -13,9 +13,15 @@ from typing import Any
|
|
|
13
13
|
|
|
14
14
|
ROOT_DIR = Path(__file__).resolve().parents[2]
|
|
15
15
|
TOOLS_BIN_DIR = ROOT_DIR / "tools" / "bin"
|
|
16
|
+
DASHBOARD_DIR = ROOT_DIR / "tools" / "dashboard"
|
|
16
17
|
RENDER_FLOW_CONFIG = TOOLS_BIN_DIR / "render-flow-config.sh"
|
|
17
18
|
WORKER_STATUS_TOOL = TOOLS_BIN_DIR / "agent-project-worker-status"
|
|
18
19
|
|
|
20
|
+
if str(DASHBOARD_DIR) not in sys.path:
|
|
21
|
+
sys.path.insert(0, str(DASHBOARD_DIR))
|
|
22
|
+
|
|
23
|
+
from issue_queue_state import collect_issue_queue
|
|
24
|
+
|
|
19
25
|
|
|
20
26
|
def utc_now_iso() -> str:
|
|
21
27
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
@@ -493,6 +499,13 @@ def collect_resident_workers(state_root: Path) -> list[dict[str, Any]]:
|
|
|
493
499
|
"last_action": env.get("LAST_ACTION", ""),
|
|
494
500
|
"last_failure_reason": env.get("LAST_FAILURE_REASON", ""),
|
|
495
501
|
"worktree": env.get("WORKTREE", ""),
|
|
502
|
+
"resident_lane_kind": env.get("RESIDENT_LANE_KIND", ""),
|
|
503
|
+
"resident_lane_value": env.get("RESIDENT_LANE_VALUE", ""),
|
|
504
|
+
"resident_lane": (
|
|
505
|
+
f"{env.get('RESIDENT_LANE_KIND', '')}/{env.get('RESIDENT_LANE_VALUE', '')}"
|
|
506
|
+
if env.get("RESIDENT_LANE_KIND", "") and env.get("RESIDENT_LANE_VALUE", "")
|
|
507
|
+
else ""
|
|
508
|
+
),
|
|
496
509
|
"metadata_file": str(path),
|
|
497
510
|
}
|
|
498
511
|
)
|
|
@@ -684,46 +697,68 @@ def collect_pr_retries(state_root: Path) -> list[dict[str, Any]]:
|
|
|
684
697
|
return items
|
|
685
698
|
|
|
686
699
|
|
|
687
|
-
def
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
if configured and configured != ".":
|
|
693
|
-
return Path(configured)
|
|
694
|
-
if runs_root.name == "runs":
|
|
695
|
-
return runs_root.parent / "history"
|
|
696
|
-
return Path(".")
|
|
700
|
+
def collect_github_outbox(state_root: Path) -> dict[str, Any]:
|
|
701
|
+
outbox_root = state_root / "github-outbox"
|
|
702
|
+
pending_root = outbox_root / "pending"
|
|
703
|
+
sent_root = outbox_root / "sent"
|
|
704
|
+
failed_root = outbox_root / "failed"
|
|
697
705
|
|
|
698
|
-
|
|
699
|
-
def collect_issue_queue(state_root: Path) -> dict[str, list[dict[str, Any]]]:
|
|
700
|
-
queue_root = state_root / "resident-workers" / "issue-queue"
|
|
701
|
-
pending_root = queue_root / "pending"
|
|
702
|
-
claims_root = queue_root / "claims"
|
|
703
|
-
|
|
704
|
-
def collect_files(root: Path) -> list[dict[str, Any]]:
|
|
706
|
+
def list_items(root: Path, limit: int | None = None) -> list[dict[str, Any]]:
|
|
705
707
|
if not root.is_dir():
|
|
706
708
|
return []
|
|
709
|
+
|
|
707
710
|
items: list[dict[str, Any]] = []
|
|
708
|
-
for path in sorted(root.glob("*.
|
|
709
|
-
|
|
711
|
+
for path in sorted(root.glob("*.json"), key=lambda item: item.stat().st_mtime, reverse=True):
|
|
712
|
+
payload = read_json_file(path)
|
|
710
713
|
items.append(
|
|
711
714
|
{
|
|
712
|
-
"
|
|
713
|
-
"
|
|
714
|
-
"
|
|
715
|
-
"
|
|
716
|
-
"
|
|
715
|
+
"type": str(payload.get("type", "")),
|
|
716
|
+
"repo_slug": str(payload.get("repo_slug", "")),
|
|
717
|
+
"number": str(payload.get("number", "")),
|
|
718
|
+
"kind": str(payload.get("kind", "")),
|
|
719
|
+
"created_at": str(payload.get("created_at", "")),
|
|
720
|
+
"updated_at": file_mtime_iso(path),
|
|
721
|
+
"file": str(path),
|
|
722
|
+
"add_count": len(payload.get("add", []) or []),
|
|
723
|
+
"remove_count": len(payload.get("remove", []) or []),
|
|
724
|
+
"body_preview": summarize_whitespace(str(payload.get("body", "")))[:120],
|
|
717
725
|
}
|
|
718
726
|
)
|
|
727
|
+
if limit is not None and len(items) >= limit:
|
|
728
|
+
break
|
|
719
729
|
return items
|
|
720
730
|
|
|
731
|
+
all_pending_items = list_items(pending_root)
|
|
732
|
+
pending_items = all_pending_items[:20]
|
|
733
|
+
sent_items = list_items(sent_root, limit=5)
|
|
734
|
+
failed_items = list_items(failed_root, limit=5)
|
|
735
|
+
|
|
721
736
|
return {
|
|
722
|
-
"pending":
|
|
723
|
-
"
|
|
737
|
+
"pending": pending_items,
|
|
738
|
+
"sent_recent": sent_items,
|
|
739
|
+
"failed_recent": failed_items,
|
|
740
|
+
"counts": {
|
|
741
|
+
"pending": len(all_pending_items),
|
|
742
|
+
"sent": len(list(sent_root.glob("*.json"))) if sent_root.is_dir() else 0,
|
|
743
|
+
"failed": len(list(failed_root.glob("*.json"))) if failed_root.is_dir() else 0,
|
|
744
|
+
"pending_comments": sum(1 for item in all_pending_items if item["type"] == "comment"),
|
|
745
|
+
"pending_approvals": sum(1 for item in all_pending_items if item["type"] == "approval"),
|
|
746
|
+
"pending_label_updates": sum(1 for item in all_pending_items if item["type"] == "labels"),
|
|
747
|
+
},
|
|
724
748
|
}
|
|
725
749
|
|
|
726
750
|
|
|
751
|
+
def resolve_history_root(render_env: dict[str, str], yaml_env: dict[str, str], runs_root: Path) -> Path:
|
|
752
|
+
configured = (
|
|
753
|
+
render_env.get("EFFECTIVE_HISTORY_ROOT", "").strip()
|
|
754
|
+
or yaml_env.get("runtime.history_root", "").strip()
|
|
755
|
+
)
|
|
756
|
+
if configured and configured != ".":
|
|
757
|
+
return Path(configured)
|
|
758
|
+
if runs_root.name == "runs":
|
|
759
|
+
return runs_root.parent / "history"
|
|
760
|
+
return Path(".")
|
|
761
|
+
|
|
727
762
|
def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, Any]:
|
|
728
763
|
env = env_with_profile(profile_id, registry_root)
|
|
729
764
|
render_env = run_key_value_script(RENDER_FLOW_CONFIG, env)
|
|
@@ -742,6 +777,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
742
777
|
retries = collect_issue_retries(state_root)
|
|
743
778
|
pr_retries = collect_pr_retries(state_root)
|
|
744
779
|
queue = collect_issue_queue(state_root)
|
|
780
|
+
github_outbox = collect_github_outbox(state_root)
|
|
745
781
|
alerts = [alert for run in (runs + recent_history) for alert in run.get("alerts", [])]
|
|
746
782
|
codex_rotation = collect_codex_rotation(render_env)
|
|
747
783
|
|
|
@@ -789,6 +825,8 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
789
825
|
"active_retries": sum(1 for item in retries if not item.get("ready", True)),
|
|
790
826
|
"scheduled_issues": len(scheduled),
|
|
791
827
|
"alerts": len(alerts),
|
|
828
|
+
"pending_github_writes": github_outbox["counts"]["pending"],
|
|
829
|
+
"failed_github_writes": github_outbox["counts"]["failed"],
|
|
792
830
|
},
|
|
793
831
|
"runs": runs,
|
|
794
832
|
"recent_history": recent_history,
|
|
@@ -800,6 +838,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
800
838
|
"issue_retries": retries,
|
|
801
839
|
"pr_retries": pr_retries,
|
|
802
840
|
"issue_queue": queue,
|
|
841
|
+
"github_outbox": github_outbox,
|
|
803
842
|
}
|
|
804
843
|
|
|
805
844
|
|
|
@@ -11,6 +11,7 @@ PR metadata:
|
|
|
11
11
|
- PR: {PR_NUMBER} - {PR_TITLE}
|
|
12
12
|
- URL: {PR_URL}
|
|
13
13
|
- Base branch: {PR_BASE_REF}
|
|
14
|
+
- Base tracking ref: {PR_BASE_TRACKING_REF}
|
|
14
15
|
- Head branch: {PR_HEAD_REF}
|
|
15
16
|
- Linked issue: {PR_LINKED_ISSUE_ID}
|
|
16
17
|
- Risk classification: {PR_RISK}
|
|
@@ -55,7 +56,7 @@ Required flow:
|
|
|
55
56
|
|
|
56
57
|
1. Inspect the current diff and the failing/pending CI signals first:
|
|
57
58
|
- `openspec list` if the repo uses OpenSpec
|
|
58
|
-
- `git diff --stat
|
|
59
|
+
- `git diff --stat {PR_BASE_TRACKING_REF}...HEAD`
|
|
59
60
|
- `git status --short`
|
|
60
61
|
- if `Merge state` is not `CLEAN` or `Mergeable` is `FALSE`, treat branch drift/conflicts as the concrete blocker first
|
|
61
62
|
- if `Actionable current-head review findings` is not `- none`, treat those findings as the concrete blockers to address first
|
|
@@ -68,6 +69,7 @@ Required flow:
|
|
|
68
69
|
- do not run `git fetch`, `git merge`, `git rebase`, `git commit`, `git push`, or other Git metadata-writing commands from inside this worker; host-side wrappers own those steps
|
|
69
70
|
3. If the blocker is branch drift or a merge conflict, use the already-prepared local refs and make the smallest branch-local source update needed to restore mergeability on this PR branch. Keep the resolution scoped to the PR intent; do not rewrite unrelated code.
|
|
70
71
|
- Treat `Current local merge-conflict paths` as the authoritative conflict list to clear.
|
|
72
|
+
- Treat `{PR_BASE_TRACKING_REF}` as the authoritative base ref for any read-only diff or merge-base inspection.
|
|
71
73
|
- Do not stop after fixing only one file if other conflict paths remain.
|
|
72
74
|
- Before you declare success, rerun local merge simulation and confirm there are no remaining conflict paths for this branch against `{PR_BASE_REF}`.
|
|
73
75
|
4. Make the smallest change that fixes the concrete PR blockers on this existing branch.
|
|
@@ -11,6 +11,7 @@ PR metadata:
|
|
|
11
11
|
- PR: {PR_NUMBER} - {PR_TITLE}
|
|
12
12
|
- URL: {PR_URL}
|
|
13
13
|
- Base branch: {PR_BASE_REF}
|
|
14
|
+
- Base tracking ref: {PR_BASE_TRACKING_REF}
|
|
14
15
|
- Head branch: {PR_HEAD_REF}
|
|
15
16
|
- Linked issue: {PR_LINKED_ISSUE_ID}
|
|
16
17
|
- Risk classification: {PR_RISK}
|
|
@@ -43,7 +44,7 @@ PR body:
|
|
|
43
44
|
Required flow:
|
|
44
45
|
|
|
45
46
|
1. Treat this worktree as an already-prepared merge repair state from host control-plane.
|
|
46
|
-
- `
|
|
47
|
+
- `{PR_BASE_TRACKING_REF}` has already been merged into this worktree locally.
|
|
47
48
|
- if `Current unresolved merge-conflict paths` is not `- none detected after host merge preparation`, resolve those files first.
|
|
48
49
|
2. Never run Git control commands from inside the worker:
|
|
49
50
|
- do not run `git fetch`, `git pull`, `git merge`, `git rebase`, `git commit`, `git push`, or any command that writes Git metadata
|
package/SKILL.md
DELETED
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: agent-control-plane
|
|
3
|
-
description: Use when working on the shared multi-project agent control plane, including scheduler/runtime orchestration, worktree and worker lifecycle, profile onboarding, and cross-project automation flows.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Agent Control Plane
|
|
7
|
-
|
|
8
|
-
This repository is the canonical `agent-control-plane` package. It owns the
|
|
9
|
-
generic scheduler/runtime, worktree lifecycle, profile onboarding, queue/risk
|
|
10
|
-
automation, and profile-scoped prompt/template resolution used across multiple
|
|
11
|
-
projects.
|
|
12
|
-
|
|
13
|
-
Installed project profiles live under
|
|
14
|
-
`~/.agent-runtime/control-plane/profiles/<id>/`. Treat the control plane itself
|
|
15
|
-
as generic, then load the selected profile's local guidance only when the task
|
|
16
|
-
is truly about that project. Integrated project data should stay in that
|
|
17
|
-
external profile registry, not inside this repository.
|
|
18
|
-
|
|
19
|
-
## What Lives Here
|
|
20
|
-
|
|
21
|
-
- core operating manual in this `SKILL.md`
|
|
22
|
-
- installed project profiles in `~/.agent-runtime/control-plane/profiles/*/control-plane.yaml`
|
|
23
|
-
- installed profile notes in `~/.agent-runtime/control-plane/profiles/*/README.md`
|
|
24
|
-
- workflow catalog in `assets/workflow-catalog.json`
|
|
25
|
-
- worker dashboard in `tools/dashboard/` with launcher at `tools/dashboard/dashboard_snapshot.py`
|
|
26
|
-
and `tools/bin/serve-dashboard.sh`
|
|
27
|
-
- dashboard autostart helpers in `tools/bin/dashboard-launchd-bootstrap.sh` and
|
|
28
|
-
`tools/bin/install-dashboard-launchd.sh`
|
|
29
|
-
- project autostart helpers in `tools/bin/project-launchd-bootstrap.sh`,
|
|
30
|
-
`tools/bin/install-project-launchd.sh`, and
|
|
31
|
-
`tools/bin/uninstall-project-launchd.sh`
|
|
32
|
-
- queue/label/risk scripts in `bin/`
|
|
33
|
-
- heartbeat and reconcile hooks in `hooks/`
|
|
34
|
-
- shared runtime wrappers, onboarding tools, and tests in `tools/bin/` and
|
|
35
|
-
`tools/tests/`
|
|
36
|
-
|
|
37
|
-
The vendored runtime entrypoints used by live schedulers are published from this
|
|
38
|
-
checkout into the shared canonical skill copy under
|
|
39
|
-
`skills/openclaw/agent-control-plane`, then copied into
|
|
40
|
-
`~/.agent-runtime/runtime-home/skills/openclaw/agent-control-plane` by
|
|
41
|
-
`tools/bin/sync-shared-agent-home.sh`.
|
|
42
|
-
|
|
43
|
-
## Required Startup Sequence
|
|
44
|
-
|
|
45
|
-
Before doing non-trivial work in this repository or on an integrated project:
|
|
46
|
-
|
|
47
|
-
1. Determine the active profile with `AGENT_PROJECT_ID`, `ACP_PROJECT_ID`, or
|
|
48
|
-
`tools/bin/render-flow-config.sh`.
|
|
49
|
-
2. Read the selected profile notes in
|
|
50
|
-
`~/.agent-runtime/control-plane/profiles/<id>/README.md` when they exist.
|
|
51
|
-
3. Read the selected repo's local startup docs before changing behavior:
|
|
52
|
-
`AGENTS.md`, `openspec/AGENT_RULES.md`, `openspec/AGENTS.md`,
|
|
53
|
-
`openspec/project.md`, and `openspec/CONVENTIONS.md`.
|
|
54
|
-
4. Use a clean read-only inspection checkout first; move to an isolated
|
|
55
|
-
worktree or agent-owned checkout before making non-trivial edits.
|
|
56
|
-
5. If the task changes product behavior, inspect the active OpenSpec changes
|
|
57
|
-
before implementation.
|
|
58
|
-
|
|
59
|
-
For onboarding a new repository onto the shared control plane:
|
|
60
|
-
|
|
61
|
-
1. Prefer `tools/bin/project-init.sh --profile-id <id> --repo-slug <owner/repo>`
|
|
62
|
-
2. Fill in `~/.agent-runtime/control-plane/profiles/<id>/README.md`
|
|
63
|
-
3. If you need manual control, the underlying steps remain:
|
|
64
|
-
`tools/bin/scaffold-profile.sh`, `tools/bin/profile-smoke.sh`,
|
|
65
|
-
`tools/bin/profile-adopt.sh`, and `tools/bin/sync-shared-agent-home.sh`
|
|
66
|
-
|
|
67
|
-
For runtime control of one installed profile:
|
|
68
|
-
|
|
69
|
-
1. Check state with `tools/bin/project-runtimectl.sh status --profile-id <id>`
|
|
70
|
-
2. Start or ensure runtime with `tools/bin/project-runtimectl.sh start --profile-id <id>`
|
|
71
|
-
3. Stop or recycle runtime with `tools/bin/project-runtimectl.sh stop --profile-id <id>`
|
|
72
|
-
4. Use `tools/bin/project-runtimectl.sh restart --profile-id <id>` for a clean bounce
|
|
73
|
-
5. Use `tools/bin/install-project-launchd.sh --profile-id <id>` when one
|
|
74
|
-
profile should survive reboot/login via a per-project LaunchAgent
|
|
75
|
-
6. Remove per-project autostart with
|
|
76
|
-
`tools/bin/uninstall-project-launchd.sh --profile-id <id>`
|
|
77
|
-
7. Remove an installed profile with `tools/bin/project-remove.sh --profile-id <id>`
|
|
78
|
-
|
|
79
|
-
## Task Routing
|
|
80
|
-
|
|
81
|
-
Pick the smallest matching path and load only the relevant references:
|
|
82
|
-
|
|
83
|
-
- Control-plane layout, publication model, and profile ownership:
|
|
84
|
-
`references/control-plane-map.md`
|
|
85
|
-
- Control-plane operator commands and profile-management entrypoints:
|
|
86
|
-
`references/commands.md`
|
|
87
|
-
- Control-plane repository layout:
|
|
88
|
-
`references/repo-map.md`
|
|
89
|
-
- Control-plane docs and profile guidance locations:
|
|
90
|
-
`references/docs-map.md`
|
|
91
|
-
- Project-specific rules and repo commands:
|
|
92
|
-
`~/.agent-runtime/control-plane/profiles/<id>/README.md`
|
|
93
|
-
|
|
94
|
-
## Repo Rules That Matter Most
|
|
95
|
-
|
|
96
|
-
- Keep the core engine generic. Put repo-specific behavior behind a profile,
|
|
97
|
-
profile templates, or profile-scoped docs instead of hardcoding it into the
|
|
98
|
-
shared runtime.
|
|
99
|
-
- Follow OpenSpec and the selected repo's local rules before implementing
|
|
100
|
-
product behavior changes.
|
|
101
|
-
- Do not simplify or change approach without explicit user approval.
|
|
102
|
-
- Prefer deterministic wrappers and config-driven routing over special-case
|
|
103
|
-
conditionals.
|
|
104
|
-
- For any non-trivial write task, use a dedicated agent worktree or another
|
|
105
|
-
isolated clean checkout.
|
|
106
|
-
- Preserve dirty retained checkouts; continue from a fresh isolated worktree
|
|
107
|
-
instead of layering more edits there.
|
|
108
|
-
- Prefer canonical docs and profile-local notes over stale audits or incidental
|
|
109
|
-
markdown snapshots.
|
|
110
|
-
- When updating the control plane itself, repair published copies after the
|
|
111
|
-
source change so runtime and source do not drift.
|
|
112
|
-
|
|
113
|
-
## Common Operating Patterns
|
|
114
|
-
|
|
115
|
-
### Analysis and Planning
|
|
116
|
-
|
|
117
|
-
- Resolve the active profile first.
|
|
118
|
-
- Read `~/.agent-runtime/control-plane/profiles/<id>/README.md` for repo-local context.
|
|
119
|
-
- Use `references/docs-map.md` to find the canonical control-plane or
|
|
120
|
-
profile-local source instead of scanning random files.
|
|
121
|
-
|
|
122
|
-
### Implementation
|
|
123
|
-
|
|
124
|
-
- Keep generic scheduler/runtime changes in the shared engine.
|
|
125
|
-
- Put repo-specific prompts, commands, or heuristics in the installed profile
|
|
126
|
-
directory under `~/.agent-runtime/control-plane/profiles/<id>/`.
|
|
127
|
-
- Keep changes reversible and tightly scoped.
|
|
128
|
-
|
|
129
|
-
### Testing
|
|
130
|
-
|
|
131
|
-
- Use `references/commands.md` for control-plane checks.
|
|
132
|
-
- Use the selected profile's README for repo-specific dev/test commands.
|
|
133
|
-
- Re-run `tools/bin/profile-smoke.sh`, `tools/bin/check-skill-contracts.sh`,
|
|
134
|
-
dashboard tests, and targeted shell tests after meaningful control-plane
|
|
135
|
-
changes.
|
|
136
|
-
|
|
137
|
-
### Publishing and Runtime Health
|
|
138
|
-
|
|
139
|
-
- Use `tools/bin/flow-runtime-doctor.sh` to confirm source/runtime sync.
|
|
140
|
-
- Use `tools/bin/sync-shared-agent-home.sh` after changing runtime-facing files.
|
|
141
|
-
- Prefer copied published artifacts over symlink aliases.
|
|
142
|
-
|
|
143
|
-
## References
|
|
144
|
-
|
|
145
|
-
- `references/control-plane-map.md`
|
|
146
|
-
- `references/commands.md`
|
|
147
|
-
- `references/repo-map.md`
|
|
148
|
-
- `references/docs-map.md`
|
|
149
|
-
- `~/.agent-runtime/control-plane/profiles/<id>/README.md`
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
# Architecture Guide
|
|
2
|
-
|
|
3
|
-
This document explains how `agent-control-plane` is put together as an
|
|
4
|
-
operator-facing system, not just a collection of scripts.
|
|
5
|
-
|
|
6
|
-
ACP has five practical layers:
|
|
7
|
-
|
|
8
|
-
1. package entrypoint and staging
|
|
9
|
-
2. profile installation and publication
|
|
10
|
-
3. runtime supervision and heartbeat scheduling
|
|
11
|
-
4. worker execution and reconcile
|
|
12
|
-
5. dashboard and operator visibility
|
|
13
|
-
|
|
14
|
-
If you are reading the repo for the first time, start with the system overview
|
|
15
|
-
diagram below, then jump to the flow you care about most.
|
|
16
|
-
|
|
17
|
-
## System Overview
|
|
18
|
-
|
|
19
|
-
```mermaid
|
|
20
|
-
flowchart LR
|
|
21
|
-
User[Operator] --> CLI["npm/bin/agent-control-plane.js"]
|
|
22
|
-
|
|
23
|
-
CLI --> Init["project-init.sh"]
|
|
24
|
-
CLI --> Sync["sync-shared-agent-home.sh"]
|
|
25
|
-
CLI --> RuntimeCtl["project-runtimectl.sh"]
|
|
26
|
-
CLI --> DashboardCmd["serve-dashboard.sh"]
|
|
27
|
-
|
|
28
|
-
Init --> Profiles["Profile registry\n~/.agent-runtime/control-plane/profiles/<id>"]
|
|
29
|
-
Sync --> RuntimeHome["Runtime home\n~/.agent-runtime/runtime-home"]
|
|
30
|
-
|
|
31
|
-
RuntimeCtl --> Supervisor["project-runtime-supervisor.sh"]
|
|
32
|
-
Supervisor --> Bootstrap["project-launchd-bootstrap.sh"]
|
|
33
|
-
Bootstrap --> Heartbeat["heartbeat-safe-auto.sh"]
|
|
34
|
-
Heartbeat --> Scheduler["agent-project-heartbeat-loop"]
|
|
35
|
-
|
|
36
|
-
Scheduler --> IssueWorkers["start-issue-worker.sh"]
|
|
37
|
-
Scheduler --> PRWorkers["start-pr-review-worker.sh\nstart-pr-fix-worker.sh\nstart-pr-merge-repair-worker.sh"]
|
|
38
|
-
IssueWorkers --> Router["run-codex-task.sh"]
|
|
39
|
-
PRWorkers --> Router
|
|
40
|
-
|
|
41
|
-
Router --> Codex["agent-project-run-codex-session"]
|
|
42
|
-
Router --> Claude["agent-project-run-claude-session"]
|
|
43
|
-
Router --> OpenClaw["agent-project-run-openclaw-session"]
|
|
44
|
-
Router --> Ollama["agent-project-run-ollama-session"]
|
|
45
|
-
Router --> Pi["agent-project-run-pi-session"]
|
|
46
|
-
Router --> OpenCode["agent-project-run-opencode-session"]
|
|
47
|
-
Router --> Kilo["agent-project-run-kilo-session"]
|
|
48
|
-
|
|
49
|
-
Codex --> Artifacts["run.env / runner.env /\nresult.env / verification.jsonl"]
|
|
50
|
-
Claude --> Artifacts
|
|
51
|
-
OpenClaw --> Artifacts
|
|
52
|
-
Ollama --> Artifacts
|
|
53
|
-
Pi --> Artifacts
|
|
54
|
-
OpenCode --> Artifacts
|
|
55
|
-
Kilo --> Artifacts
|
|
56
|
-
|
|
57
|
-
Artifacts --> Reconcile["reconcile-issue-worker.sh\nreconcile-pr-worker.sh"]
|
|
58
|
-
Reconcile --> GitHub["GitHub issues / PRs / labels / comments"]
|
|
59
|
-
Reconcile --> History["runs/ + history/ + state/"]
|
|
60
|
-
|
|
61
|
-
DashboardCmd --> Snapshot["dashboard_snapshot.py"]
|
|
62
|
-
Snapshot --> Profiles
|
|
63
|
-
Snapshot --> History
|
|
64
|
-
Snapshot --> Browser["Local dashboard browser"]
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
The important architectural choice is that ACP separates:
|
|
68
|
-
|
|
69
|
-
- package distribution from runtime execution
|
|
70
|
-
- shared engine logic from per-profile config
|
|
71
|
-
- worker execution from reconcile and GitHub side effects
|
|
72
|
-
- operator visibility from the worker CLIs themselves
|
|
73
|
-
|
|
74
|
-
## Install and Publication Flow
|
|
75
|
-
|
|
76
|
-
This is the path from `npx agent-control-plane ...` to a usable runtime on disk.
|
|
77
|
-
|
|
78
|
-
```mermaid
|
|
79
|
-
sequenceDiagram
|
|
80
|
-
participant User
|
|
81
|
-
participant CLI as agent-control-plane.js
|
|
82
|
-
participant Stage as staged shared-home
|
|
83
|
-
participant Init as project-init.sh
|
|
84
|
-
participant Scaffold as scaffold-profile.sh
|
|
85
|
-
participant Smoke as profile-smoke.sh
|
|
86
|
-
participant Adopt as profile-adopt.sh
|
|
87
|
-
participant Sync as sync-shared-agent-home.sh
|
|
88
|
-
|
|
89
|
-
User->>CLI: npx agent-control-plane init ...
|
|
90
|
-
CLI->>Stage: copy packaged skill into temp shared-home
|
|
91
|
-
CLI->>Init: forward command with staged env
|
|
92
|
-
Init->>Scaffold: write control-plane.yaml + profile docs
|
|
93
|
-
Init->>Smoke: validate profile contract
|
|
94
|
-
Init->>Adopt: create runtime roots / sync anchor repo / workspace
|
|
95
|
-
Init->>Sync: publish shared runtime into ~/.agent-runtime/runtime-home
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
Why this split exists:
|
|
99
|
-
|
|
100
|
-
- the npm package is treated as a distribution artifact
|
|
101
|
-
- the real runtime is copied into `~/.agent-runtime/runtime-home`
|
|
102
|
-
- installed profiles live outside the package in
|
|
103
|
-
`~/.agent-runtime/control-plane/profiles/<id>`
|
|
104
|
-
- upgrades are therefore explicit and repeatable instead of depending on a temp
|
|
105
|
-
`npx` cache directory
|
|
106
|
-
|
|
107
|
-
## Runtime Scheduler Loop
|
|
108
|
-
|
|
109
|
-
This is the heartbeat path ACP follows after `runtime start`.
|
|
110
|
-
|
|
111
|
-
```mermaid
|
|
112
|
-
flowchart TD
|
|
113
|
-
Start["runtime start"] --> RuntimeCtl["project-runtimectl.sh"]
|
|
114
|
-
RuntimeCtl --> Supervisor["project-runtime-supervisor.sh"]
|
|
115
|
-
Supervisor --> Bootstrap["project-launchd-bootstrap.sh"]
|
|
116
|
-
Bootstrap --> SyncCheck["sync runtime copy if needed"]
|
|
117
|
-
SyncCheck --> Heartbeat["heartbeat-safe-auto.sh"]
|
|
118
|
-
Heartbeat --> Preflight["locks / quota preflight /\nretained-worktree audit"]
|
|
119
|
-
Preflight --> SharedLoop["agent-project-heartbeat-loop"]
|
|
120
|
-
|
|
121
|
-
SharedLoop --> ReconcileCompleted["reconcile completed sessions"]
|
|
122
|
-
SharedLoop --> Capacity["compute capacity / cooldown / pending launches"]
|
|
123
|
-
SharedLoop --> Workflows["select workflow lane from issue + PR state"]
|
|
124
|
-
|
|
125
|
-
Workflows --> IssueImplementation["issue implementation"]
|
|
126
|
-
Workflows --> IssueScheduled["scheduled issue checks"]
|
|
127
|
-
Workflows --> IssueRecovery["blocked recovery"]
|
|
128
|
-
Workflows --> PRReview["PR review"]
|
|
129
|
-
Workflows --> PRFix["PR fix"]
|
|
130
|
-
Workflows --> PRMergeRepair["merge repair"]
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
Key detail: the shared scheduler owns the control logic around workers:
|
|
134
|
-
|
|
135
|
-
- concurrency and heavy-worker limits
|
|
136
|
-
- cooldown and retry gating
|
|
137
|
-
- resident recurring and scheduled issue lanes
|
|
138
|
-
- launch ordering
|
|
139
|
-
- summary output and queue visibility
|
|
140
|
-
|
|
141
|
-
That is why workers do not need to be "smart" about the entire system. The
|
|
142
|
-
workflow around them carries a lot of the operational burden.
|
|
143
|
-
|
|
144
|
-
## Worker Session Lifecycle
|
|
145
|
-
|
|
146
|
-
This is the path from one chosen issue or PR to a reconciled outcome.
|
|
147
|
-
|
|
148
|
-
```mermaid
|
|
149
|
-
flowchart LR
|
|
150
|
-
Pick["heartbeat selects issue or PR"] --> Launch["start-issue-worker.sh\nor start-pr-*.sh"]
|
|
151
|
-
Launch --> Worktree["open or reuse managed worktree"]
|
|
152
|
-
Worktree --> Prompt["render prompt + context files"]
|
|
153
|
-
Prompt --> Route["run-codex-task.sh"]
|
|
154
|
-
|
|
155
|
-
Route --> Backend["Codex / Claude / OpenClaw adapter"]
|
|
156
|
-
Backend --> Session["backend session wrapper"]
|
|
157
|
-
Session --> Output["result.env / comments /\nverification.jsonl / runner.env"]
|
|
158
|
-
|
|
159
|
-
Output --> Reconcile["agent-project-reconcile-issue-session\nor agent-project-reconcile-pr-session"]
|
|
160
|
-
Reconcile --> Labels["update labels / retry state /\nresident metadata / cooldown"]
|
|
161
|
-
Reconcile --> Publish["comment on issue, open PR,\nor leave blocked report"]
|
|
162
|
-
Reconcile --> Archive["archive run into history root"]
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
The contract here is deliberate:
|
|
166
|
-
|
|
167
|
-
- worker backends focus on producing work and result artifacts
|
|
168
|
-
- reconcile scripts own the final interpretation and GitHub-facing outcome
|
|
169
|
-
- resident metadata and history are updated by the host workflow, not by the
|
|
170
|
-
worker trying to infer the entire system state
|
|
171
|
-
|
|
172
|
-
## Dashboard Snapshot Pipeline
|
|
173
|
-
|
|
174
|
-
The dashboard is a read-only window into ACP state. It does not own scheduling.
|
|
175
|
-
|
|
176
|
-
```mermaid
|
|
177
|
-
flowchart LR
|
|
178
|
-
Browser["browser"] --> Server["tools/dashboard/server.py"]
|
|
179
|
-
Server --> API["GET /api/snapshot.json"]
|
|
180
|
-
API --> Snapshot["dashboard_snapshot.py"]
|
|
181
|
-
|
|
182
|
-
Snapshot --> Registry["profile registry"]
|
|
183
|
-
Snapshot --> Config["render-flow-config.sh"]
|
|
184
|
-
Snapshot --> WorkerStatus["agent-project-worker-status"]
|
|
185
|
-
Snapshot --> Runs["runs/ history/ state/"]
|
|
186
|
-
|
|
187
|
-
Registry --> JSON["snapshot payload"]
|
|
188
|
-
Config --> JSON
|
|
189
|
-
WorkerStatus --> JSON
|
|
190
|
-
Runs --> JSON
|
|
191
|
-
|
|
192
|
-
JSON --> UI["dashboard app.js + index.html"]
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
This means the dashboard reflects the current state of:
|
|
196
|
-
|
|
197
|
-
- installed profiles
|
|
198
|
-
- live and recent runs
|
|
199
|
-
- resident controller metadata
|
|
200
|
-
- provider cooldowns
|
|
201
|
-
- scheduled issue state
|
|
202
|
-
- runtime process status
|
|
203
|
-
|
|
204
|
-
without introducing a second control path that could drift away from the real
|
|
205
|
-
scheduler state.
|
|
206
|
-
|
|
207
|
-
## Reading Order
|
|
208
|
-
|
|
209
|
-
If you want the shortest path through the architecture:
|
|
210
|
-
|
|
211
|
-
1. [System Overview](#system-overview)
|
|
212
|
-
2. [Runtime Scheduler Loop](#runtime-scheduler-loop)
|
|
213
|
-
3. [Worker Session Lifecycle](#worker-session-lifecycle)
|
|
214
|
-
4. [Dashboard Snapshot Pipeline](#dashboard-snapshot-pipeline)
|
|
215
|
-
|
|
216
|
-
If you are changing packaging or onboarding, also read
|
|
217
|
-
[Install and Publication Flow](#install-and-publication-flow).
|