agent-control-plane 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pr-risk.sh +54 -10
- package/hooks/heartbeat-hooks.sh +70 -6
- package/package.json +1 -1
- package/tools/bin/agent-project-cleanup-session +10 -2
- package/tools/bin/agent-project-heartbeat-loop +29 -2
- package/tools/bin/agent-project-reconcile-issue-session +59 -2
- package/tools/bin/agent-project-reconcile-pr-session +104 -13
- package/tools/bin/agent-project-run-claude-session +19 -1
- package/tools/bin/agent-project-run-codex-session +1 -1
- package/tools/bin/agent-project-run-openclaw-session +200 -7
- package/tools/bin/agent-project-sync-anchor-repo +13 -2
- package/tools/bin/agent-project-worker-status +19 -14
- package/tools/bin/flow-shell-lib.sh +13 -7
- package/tools/bin/prepare-worktree.sh +3 -1
- package/tools/bin/provider-cooldown-state.sh +1 -1
- package/tools/bin/render-flow-config.sh +16 -1
- package/tools/bin/run-codex-task.sh +2 -2
- package/tools/bin/scaffold-profile.sh +2 -2
- package/tools/bin/start-issue-worker.sh +42 -10
- package/tools/dashboard/app.js +20 -2
- package/tools/dashboard/dashboard_snapshot.py +45 -0
package/tools/dashboard/app.js
CHANGED
|
@@ -45,7 +45,7 @@ function renderResult(row) {
|
|
|
45
45
|
|
|
46
46
|
function renderControllerState(row) {
|
|
47
47
|
const state = row.state || "n/a";
|
|
48
|
-
const stale = state !== "stopped" && row.controller_live === false;
|
|
48
|
+
const stale = row.controller_stale === true || (state !== "stopped" && row.controller_live === false);
|
|
49
49
|
const label = stale ? `${state} (stale)` : state;
|
|
50
50
|
return `<span class="status-pill ${statusClass(stale ? "stale" : state)}">${label}</span>`;
|
|
51
51
|
}
|
|
@@ -123,7 +123,9 @@ function renderProfile(profile) {
|
|
|
123
123
|
["Reported", profile.counts.reported_runs],
|
|
124
124
|
["Blocked", profile.counts.blocked_runs],
|
|
125
125
|
["Live controllers", profile.counts.live_resident_controllers],
|
|
126
|
+
["Stale controllers", profile.counts.stale_resident_controllers],
|
|
126
127
|
["Provider cooldowns", profile.counts.provider_cooldowns],
|
|
128
|
+
["Issue retries", profile.counts.active_retries],
|
|
127
129
|
["Queued issues", profile.counts.queued_issues],
|
|
128
130
|
["Scheduled", profile.counts.scheduled_issues],
|
|
129
131
|
]
|
|
@@ -165,6 +167,18 @@ function renderProfile(profile) {
|
|
|
165
167
|
"No resident controllers recorded for this profile.",
|
|
166
168
|
);
|
|
167
169
|
|
|
170
|
+
const retryTable = renderTable(
|
|
171
|
+
[
|
|
172
|
+
{ label: "Issue", key: "issue_id" },
|
|
173
|
+
{ label: "Status", render: (row) => `<span class="status-pill ${row.ready ? "" : "waiting-provider"}">${row.ready ? "ready" : "retrying"}</span>` },
|
|
174
|
+
{ label: "Reason", render: (row) => row.last_reason || "n/a" },
|
|
175
|
+
{ label: "Attempts", key: "attempts" },
|
|
176
|
+
{ label: "Next attempt", render: (row) => row.next_attempt_at ? `${relativeTime(row.next_attempt_at)}<div class="muted">${row.next_attempt_at}</div>` : "n/a" },
|
|
177
|
+
],
|
|
178
|
+
profile.issue_retries || [],
|
|
179
|
+
"No issue retries recorded.",
|
|
180
|
+
);
|
|
181
|
+
|
|
168
182
|
const workerTable = renderTable(
|
|
169
183
|
[
|
|
170
184
|
{ label: "Key", render: (row) => `<div class="mono">${row.key}</div>` },
|
|
@@ -233,9 +247,13 @@ function renderProfile(profile) {
|
|
|
233
247
|
</section>
|
|
234
248
|
<section class="panel">
|
|
235
249
|
<h3>Resident Controllers</h3>
|
|
236
|
-
<p class="panel-subtitle">Includes provider wait and failover telemetry.</p>
|
|
250
|
+
<p class="panel-subtitle">Includes provider wait and failover telemetry. Stale controllers show a warning.</p>
|
|
237
251
|
${controllerTable}
|
|
238
252
|
</section>
|
|
253
|
+
<section class="panel half">
|
|
254
|
+
<h3>Issue Retries</h3>
|
|
255
|
+
${retryTable}
|
|
256
|
+
</section>
|
|
239
257
|
<section class="panel">
|
|
240
258
|
<h3>Resident Worker Metadata</h3>
|
|
241
259
|
${workerTable}
|
|
@@ -225,6 +225,20 @@ def collect_runs(runs_root: Path) -> list[dict[str, Any]]:
|
|
|
225
225
|
return runs
|
|
226
226
|
|
|
227
227
|
|
|
228
|
+
def controller_is_stale(env: dict[str, str], controller_path: Path) -> bool:
|
|
229
|
+
"""A controller is stale if it claims to be running but its PID is dead or its
|
|
230
|
+
UPDATED_AT file mtime is older than 10 minutes."""
|
|
231
|
+
if env.get("CONTROLLER_STATE", "") in {"stopped", ""}:
|
|
232
|
+
return False
|
|
233
|
+
if not pid_alive(env.get("CONTROLLER_PID", "")):
|
|
234
|
+
return True
|
|
235
|
+
try:
|
|
236
|
+
file_age = datetime.now(timezone.utc).timestamp() - controller_path.stat().st_mtime
|
|
237
|
+
return file_age > 600
|
|
238
|
+
except Exception:
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
|
|
228
242
|
def collect_resident_controllers(state_root: Path) -> list[dict[str, Any]]:
|
|
229
243
|
controllers_root = state_root / "resident-workers" / "issues"
|
|
230
244
|
if not controllers_root.is_dir():
|
|
@@ -240,6 +254,7 @@ def collect_resident_controllers(state_root: Path) -> list[dict[str, Any]]:
|
|
|
240
254
|
"session": env.get("SESSION", ""),
|
|
241
255
|
"controller_pid": controller_pid,
|
|
242
256
|
"controller_live": pid_alive(controller_pid),
|
|
257
|
+
"controller_stale": controller_is_stale(env, path),
|
|
243
258
|
"mode": env.get("CONTROLLER_MODE", ""),
|
|
244
259
|
"loop_count": safe_int(env.get("CONTROLLER_LOOP_COUNT")),
|
|
245
260
|
"state": env.get("CONTROLLER_STATE", ""),
|
|
@@ -346,6 +361,32 @@ def collect_scheduled_issues(state_root: Path) -> list[dict[str, Any]]:
|
|
|
346
361
|
return items
|
|
347
362
|
|
|
348
363
|
|
|
364
|
+
def collect_issue_retries(state_root: Path) -> list[dict[str, Any]]:
|
|
365
|
+
"""Collect retry/backoff state for issues tracked by agent-project-retry-state."""
|
|
366
|
+
retries_root = state_root / "retries" / "issues"
|
|
367
|
+
if not retries_root.is_dir():
|
|
368
|
+
return []
|
|
369
|
+
|
|
370
|
+
now_epoch = int(datetime.now(timezone.utc).timestamp())
|
|
371
|
+
items: list[dict[str, Any]] = []
|
|
372
|
+
for path in sorted(retries_root.glob("*.env"), key=lambda item: item.stat().st_mtime, reverse=True):
|
|
373
|
+
env = read_env_file(path)
|
|
374
|
+
next_attempt_epoch = safe_int(env.get("NEXT_ATTEMPT_EPOCH"))
|
|
375
|
+
items.append(
|
|
376
|
+
{
|
|
377
|
+
"issue_id": path.stem,
|
|
378
|
+
"attempts": safe_int(env.get("ATTEMPTS")) or 0,
|
|
379
|
+
"next_attempt_epoch": next_attempt_epoch,
|
|
380
|
+
"next_attempt_at": env.get("NEXT_ATTEMPT_AT", ""),
|
|
381
|
+
"last_reason": env.get("LAST_REASON", ""),
|
|
382
|
+
"updated_at": env.get("UPDATED_AT", "") or file_mtime_iso(path),
|
|
383
|
+
"ready": not bool(next_attempt_epoch and next_attempt_epoch > now_epoch),
|
|
384
|
+
"state_file": str(path),
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
return items
|
|
388
|
+
|
|
389
|
+
|
|
349
390
|
def collect_issue_queue(state_root: Path) -> dict[str, list[dict[str, Any]]]:
|
|
350
391
|
queue_root = state_root / "resident-workers" / "issue-queue"
|
|
351
392
|
pending_root = queue_root / "pending"
|
|
@@ -387,6 +428,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
387
428
|
resident_workers = collect_resident_workers(state_root)
|
|
388
429
|
cooldowns = collect_provider_cooldowns(state_root)
|
|
389
430
|
scheduled = collect_scheduled_issues(state_root)
|
|
431
|
+
retries = collect_issue_retries(state_root)
|
|
390
432
|
queue = collect_issue_queue(state_root)
|
|
391
433
|
|
|
392
434
|
return {
|
|
@@ -422,10 +464,12 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
422
464
|
),
|
|
423
465
|
"resident_controllers": len(controllers),
|
|
424
466
|
"live_resident_controllers": sum(1 for item in controllers if item["state"] != "stopped" and item["controller_live"]),
|
|
467
|
+
"stale_resident_controllers": sum(1 for item in controllers if item.get("controller_stale", False)),
|
|
425
468
|
"resident_workers": len(resident_workers),
|
|
426
469
|
"queued_issues": len(queue["pending"]),
|
|
427
470
|
"claimed_issues": len(queue["claims"]),
|
|
428
471
|
"provider_cooldowns": sum(1 for item in cooldowns if item["active"]),
|
|
472
|
+
"active_retries": sum(1 for item in retries if not item.get("ready", True)),
|
|
429
473
|
"scheduled_issues": len(scheduled),
|
|
430
474
|
},
|
|
431
475
|
"runs": runs,
|
|
@@ -433,6 +477,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
433
477
|
"resident_workers": resident_workers,
|
|
434
478
|
"provider_cooldowns": cooldowns,
|
|
435
479
|
"scheduled_issues": scheduled,
|
|
480
|
+
"issue_retries": retries,
|
|
436
481
|
"issue_queue": queue,
|
|
437
482
|
}
|
|
438
483
|
|