agent-control-plane 0.1.12 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +1 -1
- package/hooks/heartbeat-hooks.sh +64 -7
- package/hooks/issue-reconcile-hooks.sh +46 -0
- package/npm/bin/agent-control-plane.js +89 -8
- package/package.json +3 -1
- package/references/commands.md +2 -2
- package/references/control-plane-map.md +1 -1
- package/tools/bin/agent-project-reconcile-issue-session +2 -0
- package/tools/bin/agent-project-reconcile-pr-session +166 -22
- package/tools/bin/agent-project-run-codex-session +58 -0
- package/tools/bin/branch-verification-guard.sh +15 -2
- package/tools/bin/ensure-runtime-sync.sh +5 -5
- package/tools/bin/heartbeat-safe-auto.sh +20 -10
- package/tools/bin/provider-cooldown-state.sh +39 -1
- package/tools/bin/start-issue-worker.sh +66 -24
- package/tools/bin/sync-shared-agent-home.sh +24 -10
- package/tools/dashboard/app.js +74 -0
- package/tools/dashboard/dashboard_snapshot.py +202 -1
- package/tools/templates/issue-prompt-template.md +4 -1
- package/tools/vendor/codex-quota-manager/scripts/auto-switch.sh +8 -6
- package/tools/bin/render-dashboard-snapshot.py +0 -16
- package/tools/templates/legacy/issue-prompt-template-pre-slim.md +0 -109
|
@@ -377,6 +377,30 @@ const recentPrs = recentNumbers.map((number) => {
|
|
|
377
377
|
const activePrs = recentPrs.filter((pr) => pr.state === 'open' || pr.state === 'draft');
|
|
378
378
|
const completedPrs = recentPrs.filter((pr) => pr.state !== 'open' && pr.state !== 'draft');
|
|
379
379
|
|
|
380
|
+
const recentCycleNotes = [];
|
|
381
|
+
for (const comment of [...(issue.comments || [])].reverse()) {
|
|
382
|
+
const body = String(comment?.body || '').trim();
|
|
383
|
+
if (!body) {
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
if (!/^(Completed|Blocked on|# Blocker:|Host-side publish blocked|Host-side publish failed)/im.test(body)) {
|
|
387
|
+
continue;
|
|
388
|
+
}
|
|
389
|
+
const summaryLines = body
|
|
390
|
+
.split(/\r?\n/)
|
|
391
|
+
.map((line) => line.trim())
|
|
392
|
+
.filter(Boolean)
|
|
393
|
+
.slice(0, 6);
|
|
394
|
+
if (summaryLines.length === 0) {
|
|
395
|
+
continue;
|
|
396
|
+
}
|
|
397
|
+
const summary = summaryLines.join(' | ');
|
|
398
|
+
recentCycleNotes.push(summary.length > 420 ? `${summary.slice(0, 417)}...` : summary);
|
|
399
|
+
if (recentCycleNotes.length >= 3) {
|
|
400
|
+
break;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
380
404
|
const formatPr = (pr) => {
|
|
381
405
|
const suffix = pr.url ? ` ${pr.url}` : '';
|
|
382
406
|
return `- #${pr.number} (${pr.state}): ${pr.title}${suffix}`;
|
|
@@ -389,6 +413,7 @@ const lines = [
|
|
|
389
413
|
'- Before editing, choose exactly one concrete target module, screen, or flow and keep the cycle limited to that target.',
|
|
390
414
|
'- Do not work on a target already covered by an open or draft PR for this issue, or by the most recent completed cycles listed below, unless you are explicitly fixing a regression introduced there.',
|
|
391
415
|
'- If you cannot identify a small non-overlapping target after reviewing recent cycle history, stop blocked using the blocker contract instead of forcing another PR.',
|
|
416
|
+
'- Prefer the recent cycle notes below over repeating broad web research; only fetch outside context when the local baseline or linked advisories materially changed.',
|
|
392
417
|
'- In your final worker output, start with `Target:` and `Why now:` lines before the changed-files list.',
|
|
393
418
|
];
|
|
394
419
|
|
|
@@ -406,6 +431,13 @@ if (completedPrs.length > 0) {
|
|
|
406
431
|
}
|
|
407
432
|
}
|
|
408
433
|
|
|
434
|
+
if (recentCycleNotes.length > 0) {
|
|
435
|
+
lines.push('', '### Recent cycle notes from issue comments');
|
|
436
|
+
for (const note of recentCycleNotes) {
|
|
437
|
+
lines.push(`- ${note}`);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
409
441
|
process.stdout.write(`${lines.join('\n')}\n`);
|
|
410
442
|
EOF
|
|
411
443
|
ISSUE_RECURRING_CONTEXT="$(cat "$ISSUE_RECURRING_CONTEXT_FILE")"
|
|
@@ -727,38 +759,48 @@ for (const line of body.split(/\r?\n/).slice(0, 40)) {
|
|
|
727
759
|
}
|
|
728
760
|
}
|
|
729
761
|
|
|
730
|
-
if (commands.length === 0 && repoRoot) {
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
addCommand('pnpm test');
|
|
738
|
-
} else if (fs.existsSync(path.join(repoRoot, 'yarn.lock'))) {
|
|
739
|
-
addCommand('yarn test');
|
|
740
|
-
} else {
|
|
741
|
-
addCommand('npm test');
|
|
762
|
+
if (commands.length === 0 && repoRoot) {
|
|
763
|
+
const packageJsonPath = path.join(repoRoot, 'package.json');
|
|
764
|
+
if (fs.existsSync(packageJsonPath)) {
|
|
765
|
+
try {
|
|
766
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
767
|
+
if (packageJson?.scripts?.smoke) {
|
|
768
|
+
addCommand('# If this cycle changes smoke runners, CLI entrypoints, or operator commands, also run the matching smoke command from the repo instructions.');
|
|
742
769
|
}
|
|
770
|
+
} catch (_error) {
|
|
771
|
+
// Ignore parse errors and fall through to generic guidance.
|
|
743
772
|
}
|
|
744
|
-
} catch (_error) {
|
|
745
|
-
// Ignore parse errors and fall through to generic guidance.
|
|
746
773
|
}
|
|
747
774
|
}
|
|
748
|
-
}
|
|
749
775
|
|
|
750
|
-
if (commands.length === 0) {
|
|
751
|
-
|
|
752
|
-
|
|
776
|
+
if (commands.length === 0) {
|
|
777
|
+
addCommand('# Pick the narrowest relevant local verification for the files you touch.');
|
|
778
|
+
addCommand('# Do not default to repo-wide pnpm test unless the issue body explicitly requires it.');
|
|
779
|
+
addCommand('# Examples:');
|
|
780
|
+
addCommand('# pnpm --filter @<repo>/api test -- --runInBand <target-spec>');
|
|
781
|
+
addCommand('# pnpm --filter @<repo>/api typecheck');
|
|
782
|
+
addCommand('# pnpm --filter @<repo>/web test -- --run <target-spec>');
|
|
783
|
+
addCommand('# pnpm --filter @<repo>/web typecheck');
|
|
784
|
+
addCommand('# pnpm --filter @<repo>/mobile test -- --runInBand <target-spec>');
|
|
785
|
+
addCommand('# pnpm --filter @<repo>/mobile typecheck');
|
|
786
|
+
addCommand('# pnpm --filter @<repo>/<package> test -- --run <target-spec>');
|
|
787
|
+
addCommand('# pnpm --filter @<repo>/<package> typecheck');
|
|
788
|
+
addCommand('# After each successful command, record it with record-verification.sh exactly as shown below.');
|
|
789
|
+
}
|
|
753
790
|
|
|
754
791
|
const escapeDoubleQuotes = (value) => value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
755
792
|
const snippet = commands
|
|
756
|
-
.map((command) =>
|
|
757
|
-
command
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
793
|
+
.map((command) => {
|
|
794
|
+
if (command.startsWith('#')) {
|
|
795
|
+
return command;
|
|
796
|
+
}
|
|
797
|
+
return (
|
|
798
|
+
command + '\n' +
|
|
799
|
+
'bash "$ACP_FLOW_TOOLS_DIR/record-verification.sh" --run-dir "$ACP_RUN_DIR" --status pass --command "' +
|
|
800
|
+
escapeDoubleQuotes(command) +
|
|
801
|
+
'"'
|
|
802
|
+
);
|
|
803
|
+
})
|
|
762
804
|
.join('\n\n');
|
|
763
805
|
|
|
764
806
|
process.stdout.write(snippet);
|
|
@@ -32,12 +32,21 @@ if [[ ! -d "${FLOW_SKILL_SOURCE}" && -n "${COMPAT_FLOW_SKILL_ALIAS}" ]]; then
|
|
|
32
32
|
fi
|
|
33
33
|
|
|
34
34
|
FLOW_SKILL_SOURCE="$(cd "${FLOW_SKILL_SOURCE}" && pwd -P)"
|
|
35
|
+
SOURCE_HOME="$(cd "${SOURCE_HOME}" && pwd -P)"
|
|
36
|
+
|
|
37
|
+
remove_tree_force() {
|
|
38
|
+
local target="${1:-}"
|
|
39
|
+
[[ -n "${target}" ]] || return 0
|
|
40
|
+
[[ -e "${target}" || -L "${target}" ]] || return 0
|
|
41
|
+
chmod -R u+w "${target}" 2>/dev/null || true
|
|
42
|
+
rm -rf "${target}" 2>/dev/null || true
|
|
43
|
+
}
|
|
35
44
|
|
|
36
45
|
sync_tree_copy_mode() {
|
|
37
46
|
local source_dir="${1:?source dir required}"
|
|
38
47
|
local target_dir="${2:?target dir required}"
|
|
39
48
|
[[ -d "${source_dir}" ]] || return 0
|
|
40
|
-
|
|
49
|
+
remove_tree_force "${target_dir}"
|
|
41
50
|
mkdir -p "${target_dir}"
|
|
42
51
|
(
|
|
43
52
|
cd "${source_dir}"
|
|
@@ -57,13 +66,15 @@ sync_tree_into_target() {
|
|
|
57
66
|
}
|
|
58
67
|
|
|
59
68
|
sync_skill_copies() {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
if ! flow_is_skill_root "${SOURCE_HOME}"; then
|
|
70
|
+
sync_tree_into_target "${FLOW_SKILL_SOURCE}" "${SOURCE_FLOW_CANONICAL_ALIAS}"
|
|
71
|
+
if [[ -n "${SOURCE_FLOW_COMPAT_ALIAS}" ]]; then
|
|
72
|
+
sync_tree_into_target "${FLOW_SKILL_SOURCE}" "${SOURCE_FLOW_COMPAT_ALIAS}"
|
|
73
|
+
fi
|
|
65
74
|
fi
|
|
66
75
|
|
|
76
|
+
sync_tree_into_target "${FLOW_SKILL_SOURCE}" "${FLOW_SKILL_TARGET}"
|
|
77
|
+
|
|
67
78
|
if [[ -n "${TARGET_FLOW_COMPAT_ALIAS}" ]]; then
|
|
68
79
|
sync_tree_into_target "${FLOW_SKILL_SOURCE}" "${TARGET_FLOW_COMPAT_ALIAS}"
|
|
69
80
|
fi
|
|
@@ -200,18 +211,21 @@ sync_tree_rsync() {
|
|
|
200
211
|
}
|
|
201
212
|
|
|
202
213
|
reset_runtime_skill_targets() {
|
|
203
|
-
|
|
214
|
+
remove_tree_force "${FLOW_SKILL_TARGET}"
|
|
204
215
|
if [[ -n "${TARGET_FLOW_COMPAT_ALIAS}" ]]; then
|
|
205
|
-
|
|
216
|
+
remove_tree_force "${TARGET_FLOW_COMPAT_ALIAS}"
|
|
206
217
|
fi
|
|
207
218
|
}
|
|
208
219
|
|
|
209
220
|
reset_source_skill_targets() {
|
|
221
|
+
if flow_is_skill_root "${SOURCE_HOME}"; then
|
|
222
|
+
return 0
|
|
223
|
+
fi
|
|
210
224
|
if [[ "${FLOW_SKILL_SOURCE}" != "${SOURCE_FLOW_CANONICAL_ALIAS}" ]]; then
|
|
211
|
-
|
|
225
|
+
remove_tree_force "${SOURCE_FLOW_CANONICAL_ALIAS}"
|
|
212
226
|
fi
|
|
213
227
|
if [[ -n "${SOURCE_FLOW_COMPAT_ALIAS}" && "${FLOW_SKILL_SOURCE}" != "${SOURCE_FLOW_COMPAT_ALIAS}" ]]; then
|
|
214
|
-
|
|
228
|
+
remove_tree_force "${SOURCE_FLOW_COMPAT_ALIAS}"
|
|
215
229
|
fi
|
|
216
230
|
}
|
|
217
231
|
|
package/tools/dashboard/app.js
CHANGED
|
@@ -138,6 +138,33 @@ function renderAlerts(alerts) {
|
|
|
138
138
|
`;
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
+
function renderCodexRotation(rotation) {
|
|
142
|
+
if (!rotation || !rotation.active_label) {
|
|
143
|
+
return `<div class="empty-state">Codex rotation data is not available yet for this Codex profile.</div>`;
|
|
144
|
+
}
|
|
145
|
+
const candidates = (rotation.candidate_labels || []).length ? rotation.candidate_labels.join(", ") : "n/a";
|
|
146
|
+
const ready = (rotation.ready_candidates || []).length ? rotation.ready_candidates.join(", ") : "none";
|
|
147
|
+
const nextRetry = rotation.next_retry_at
|
|
148
|
+
? `${rotation.next_retry_label || "n/a"} · ${relativeTime(rotation.next_retry_at)}<div class="muted">${rotation.next_retry_at}</div>`
|
|
149
|
+
: "n/a";
|
|
150
|
+
const lastSwitch = rotation.last_switch_label
|
|
151
|
+
? `${rotation.last_switch_label}${rotation.last_switch_reason ? ` · ${rotation.last_switch_reason}` : ""}`
|
|
152
|
+
: "n/a";
|
|
153
|
+
|
|
154
|
+
return renderTable(
|
|
155
|
+
[
|
|
156
|
+
{ label: "Current", render: () => `<div class="mono">${rotation.active_label}</div>` },
|
|
157
|
+
{ label: "Decision", render: () => `<span class="status-pill ${statusClass(rotation.switch_decision || "unknown")}">${rotation.switch_decision || "unknown"}</span>` },
|
|
158
|
+
{ label: "Candidates", render: () => `<div class="mono">${candidates}</div>` },
|
|
159
|
+
{ label: "Ready now", render: () => `<div class="mono">${ready}</div>` },
|
|
160
|
+
{ label: "Next retry", render: () => nextRetry },
|
|
161
|
+
{ label: "Last switch", render: () => `<div class="mono">${lastSwitch}</div>` },
|
|
162
|
+
],
|
|
163
|
+
[{}],
|
|
164
|
+
"No Codex rotation data for this profile.",
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
|
|
141
168
|
function renderProfile(profile) {
|
|
142
169
|
const providerBadges = [
|
|
143
170
|
profile.coding_worker ? `<span class="badge good">${profile.coding_worker}</span>` : "",
|
|
@@ -153,6 +180,7 @@ function renderProfile(profile) {
|
|
|
153
180
|
const summaryCards = [
|
|
154
181
|
["Run sessions", profile.counts.active_runs],
|
|
155
182
|
["Running", profile.counts.running_runs],
|
|
183
|
+
["Recent completed", profile.counts.recent_history_runs || 0],
|
|
156
184
|
["Implemented", profile.counts.implemented_runs],
|
|
157
185
|
["Reported", profile.counts.reported_runs],
|
|
158
186
|
["Blocked", profile.counts.blocked_runs],
|
|
@@ -188,6 +216,19 @@ function renderProfile(profile) {
|
|
|
188
216
|
"No active run directories for this profile.",
|
|
189
217
|
);
|
|
190
218
|
|
|
219
|
+
const recentHistoryTable = renderTable(
|
|
220
|
+
[
|
|
221
|
+
{ label: "Session", render: (row) => `<div class="mono">${row.session}</div>` },
|
|
222
|
+
{ label: "Task", render: (row) => `${row.task_kind || "n/a"} ${row.task_id || ""}`.trim() },
|
|
223
|
+
{ label: "Lifecycle", render: renderLifecycle },
|
|
224
|
+
{ label: "Worker", key: "coding_worker" },
|
|
225
|
+
{ label: "Result", render: renderResult },
|
|
226
|
+
{ label: "Updated", render: (row) => row.updated_at ? `${relativeTime(row.updated_at)}<div class="muted">${row.updated_at}</div>` : "n/a" },
|
|
227
|
+
],
|
|
228
|
+
profile.recent_history || [],
|
|
229
|
+
"No recently archived runs.",
|
|
230
|
+
);
|
|
231
|
+
|
|
191
232
|
const controllerTable = renderTable(
|
|
192
233
|
[
|
|
193
234
|
{ label: "Issue", key: "issue_id" },
|
|
@@ -214,6 +255,18 @@ function renderProfile(profile) {
|
|
|
214
255
|
"No issue retries recorded.",
|
|
215
256
|
);
|
|
216
257
|
|
|
258
|
+
const prRetryTable = renderTable(
|
|
259
|
+
[
|
|
260
|
+
{ label: "PR", key: "pr_number" },
|
|
261
|
+
{ label: "Status", render: (row) => `<span class="status-pill ${row.ready ? "" : "waiting-provider"}">${row.ready ? "ready" : "retrying"}</span>` },
|
|
262
|
+
{ label: "Reason", render: (row) => row.last_reason || "n/a" },
|
|
263
|
+
{ label: "Attempts", key: "attempts" },
|
|
264
|
+
{ label: "Next attempt", render: (row) => row.next_attempt_at ? `${relativeTime(row.next_attempt_at)}<div class="muted">${row.next_attempt_at}</div>` : "n/a" },
|
|
265
|
+
],
|
|
266
|
+
profile.pr_retries || [],
|
|
267
|
+
"No PR retries recorded.",
|
|
268
|
+
);
|
|
269
|
+
|
|
217
270
|
const workerTable = renderTable(
|
|
218
271
|
[
|
|
219
272
|
{ label: "Key", render: (row) => `<div class="mono">${row.key}</div>` },
|
|
@@ -261,6 +314,17 @@ function renderProfile(profile) {
|
|
|
261
314
|
"No pending leased issues.",
|
|
262
315
|
);
|
|
263
316
|
|
|
317
|
+
const codexRotationPanel =
|
|
318
|
+
profile.coding_worker === "codex"
|
|
319
|
+
? `
|
|
320
|
+
<section class="panel">
|
|
321
|
+
<h3>Codex Rotation</h3>
|
|
322
|
+
<p class="panel-subtitle">Shows the active Codex label, candidate labels, and whether failover is ready or deferred.</p>
|
|
323
|
+
${renderCodexRotation(profile.codex_rotation)}
|
|
324
|
+
</section>
|
|
325
|
+
`
|
|
326
|
+
: "";
|
|
327
|
+
|
|
264
328
|
return `
|
|
265
329
|
<article class="profile">
|
|
266
330
|
<header class="profile-header">
|
|
@@ -285,15 +349,25 @@ function renderProfile(profile) {
|
|
|
285
349
|
<p class="panel-subtitle">Lifecycle shows technical session completion. Result shows what the run achieved: implemented, reported, or blocked.</p>
|
|
286
350
|
${runsTable}
|
|
287
351
|
</section>
|
|
352
|
+
<section class="panel">
|
|
353
|
+
<h3>Recent Completed Runs</h3>
|
|
354
|
+
<p class="panel-subtitle">Recently archived runs so they do not disappear from the dashboard immediately after completion.</p>
|
|
355
|
+
${recentHistoryTable}
|
|
356
|
+
</section>
|
|
288
357
|
<section class="panel">
|
|
289
358
|
<h3>Resident Controllers</h3>
|
|
290
359
|
<p class="panel-subtitle">Includes provider wait and failover telemetry. Stale controllers show a warning.</p>
|
|
291
360
|
${controllerTable}
|
|
292
361
|
</section>
|
|
362
|
+
${codexRotationPanel}
|
|
293
363
|
<section class="panel half">
|
|
294
364
|
<h3>Issue Retries</h3>
|
|
295
365
|
${retryTable}
|
|
296
366
|
</section>
|
|
367
|
+
<section class="panel half">
|
|
368
|
+
<h3>PR Retries</h3>
|
|
369
|
+
${prRetryTable}
|
|
370
|
+
</section>
|
|
297
371
|
<section class="panel">
|
|
298
372
|
<h3>Resident Worker Metadata</h3>
|
|
299
373
|
${workerTable}
|
|
@@ -143,6 +143,15 @@ def file_mtime_iso(path: Path) -> str:
|
|
|
143
143
|
return datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
def read_json_file(path: Path) -> dict[str, Any]:
|
|
147
|
+
if not path.is_file():
|
|
148
|
+
return {}
|
|
149
|
+
try:
|
|
150
|
+
return json.loads(path.read_text(encoding="utf-8", errors="replace"))
|
|
151
|
+
except Exception:
|
|
152
|
+
return {}
|
|
153
|
+
|
|
154
|
+
|
|
146
155
|
def read_tail_text(path: Path, max_bytes: int = 65536) -> str:
|
|
147
156
|
if not path.is_file():
|
|
148
157
|
return ""
|
|
@@ -293,6 +302,64 @@ def collect_runs(runs_root: Path) -> list[dict[str, Any]]:
|
|
|
293
302
|
return runs
|
|
294
303
|
|
|
295
304
|
|
|
305
|
+
def collect_recent_history(history_root: Path, limit: int = 8) -> list[dict[str, Any]]:
|
|
306
|
+
if not history_root.is_dir():
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
items: list[dict[str, Any]] = []
|
|
310
|
+
seen_sessions: set[str] = set()
|
|
311
|
+
for run_dir in sorted(
|
|
312
|
+
[entry for entry in history_root.iterdir() if entry.is_dir()],
|
|
313
|
+
key=lambda item: item.stat().st_mtime,
|
|
314
|
+
reverse=True,
|
|
315
|
+
):
|
|
316
|
+
run_env = read_env_file(run_dir / "run.env")
|
|
317
|
+
runner_env = read_env_file(run_dir / "runner.env")
|
|
318
|
+
result_env = read_env_file(run_dir / "result.env")
|
|
319
|
+
session = run_env.get("SESSION", "")
|
|
320
|
+
if not session:
|
|
321
|
+
name = run_dir.name
|
|
322
|
+
parts = name.split("-")
|
|
323
|
+
session = "-".join(parts[:-2]) if len(parts) > 2 else name
|
|
324
|
+
if session in seen_sessions:
|
|
325
|
+
continue
|
|
326
|
+
lifecycle_status = (runner_env.get("RUNNER_STATE", "") or "").strip().upper()
|
|
327
|
+
if lifecycle_status == "SUCCEEDED":
|
|
328
|
+
lifecycle_status = "SUCCEEDED"
|
|
329
|
+
elif lifecycle_status == "FAILED":
|
|
330
|
+
lifecycle_status = "FAILED"
|
|
331
|
+
elif lifecycle_status:
|
|
332
|
+
lifecycle_status = lifecycle_status.upper()
|
|
333
|
+
else:
|
|
334
|
+
lifecycle_status = "UNKNOWN"
|
|
335
|
+
outcome = result_env.get("OUTCOME", "")
|
|
336
|
+
failure_reason = runner_env.get("LAST_FAILURE_REASON", "")
|
|
337
|
+
result_kind, result_label = classify_run_result(lifecycle_status, outcome, failure_reason)
|
|
338
|
+
item = {
|
|
339
|
+
"session": session,
|
|
340
|
+
"task_kind": run_env.get("TASK_KIND", ""),
|
|
341
|
+
"task_id": run_env.get("TASK_ID", ""),
|
|
342
|
+
"status": lifecycle_status,
|
|
343
|
+
"lifecycle_status": lifecycle_status,
|
|
344
|
+
"updated_at": result_env.get("UPDATED_AT", "") or runner_env.get("UPDATED_AT", "") or file_mtime_iso(run_dir),
|
|
345
|
+
"coding_worker": run_env.get("CODING_WORKER", ""),
|
|
346
|
+
"failure_reason": failure_reason,
|
|
347
|
+
"outcome": outcome,
|
|
348
|
+
"action": result_env.get("ACTION", ""),
|
|
349
|
+
"result_kind": result_kind,
|
|
350
|
+
"result_label": result_label,
|
|
351
|
+
"run_dir": str(run_dir),
|
|
352
|
+
"archived": True,
|
|
353
|
+
}
|
|
354
|
+
alert = extract_github_rate_limit_alert(run_dir, item)
|
|
355
|
+
item["alerts"] = [alert] if alert else []
|
|
356
|
+
items.append(item)
|
|
357
|
+
seen_sessions.add(session)
|
|
358
|
+
if len(items) >= limit:
|
|
359
|
+
break
|
|
360
|
+
return items
|
|
361
|
+
|
|
362
|
+
|
|
296
363
|
def controller_is_stale(env: dict[str, str], controller_path: Path) -> bool:
|
|
297
364
|
"""A controller is stale if it claims to be running but its PID is dead or its
|
|
298
365
|
UPDATED_AT file mtime is older than 10 minutes."""
|
|
@@ -408,6 +475,94 @@ def collect_provider_cooldowns(state_root: Path) -> list[dict[str, Any]]:
|
|
|
408
475
|
return items
|
|
409
476
|
|
|
410
477
|
|
|
478
|
+
def collect_codex_rotation(profile: dict[str, str]) -> dict[str, Any]:
|
|
479
|
+
coding_worker = profile.get("EFFECTIVE_CODING_WORKER", "")
|
|
480
|
+
if coding_worker != "codex":
|
|
481
|
+
return {}
|
|
482
|
+
|
|
483
|
+
cache_root = Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "codex-quota-manager"
|
|
484
|
+
state_file = cache_root / "rotation-state.json"
|
|
485
|
+
switch_file = cache_root / "last-switch.env"
|
|
486
|
+
state_json = read_json_file(state_file)
|
|
487
|
+
state_accounts = state_json.get("accounts", {}) if isinstance(state_json, dict) else {}
|
|
488
|
+
now_epoch = int(datetime.now(timezone.utc).timestamp())
|
|
489
|
+
|
|
490
|
+
active_label = ""
|
|
491
|
+
candidate_labels: list[str] = []
|
|
492
|
+
list_json: dict[str, Any] = {}
|
|
493
|
+
quota_bin_override = os.environ.get("CODEX_QUOTA_BIN", "").strip()
|
|
494
|
+
quota_bin = Path(quota_bin_override) if quota_bin_override else TOOLS_BIN_DIR / "codex-quota"
|
|
495
|
+
if quota_bin.is_file():
|
|
496
|
+
try:
|
|
497
|
+
raw = subprocess.check_output(
|
|
498
|
+
[str(quota_bin), "codex", "list", "--json"],
|
|
499
|
+
cwd=str(ROOT_DIR),
|
|
500
|
+
env=os.environ.copy(),
|
|
501
|
+
text=True,
|
|
502
|
+
stderr=subprocess.DEVNULL,
|
|
503
|
+
timeout=20,
|
|
504
|
+
)
|
|
505
|
+
list_json = json.loads(raw)
|
|
506
|
+
except Exception:
|
|
507
|
+
list_json = {}
|
|
508
|
+
|
|
509
|
+
if isinstance(list_json, dict):
|
|
510
|
+
active_info = list_json.get("activeInfo", {}) or {}
|
|
511
|
+
active_label = str(active_info.get("trackedLabel") or active_info.get("activeLabel") or "")
|
|
512
|
+
seen: set[str] = set()
|
|
513
|
+
for account in list_json.get("accounts", []) or []:
|
|
514
|
+
label = str(account.get("label") or "").strip()
|
|
515
|
+
if not label or label == active_label or label in seen:
|
|
516
|
+
continue
|
|
517
|
+
candidate_labels.append(label)
|
|
518
|
+
seen.add(label)
|
|
519
|
+
|
|
520
|
+
next_retry_label = ""
|
|
521
|
+
next_retry_epoch = 0
|
|
522
|
+
for label in candidate_labels:
|
|
523
|
+
entry = state_accounts.get(label, {}) if isinstance(state_accounts, dict) else {}
|
|
524
|
+
retry_epoch = safe_int(str(entry.get("next_retry_at", "")))
|
|
525
|
+
removed = bool(entry.get("removed", False))
|
|
526
|
+
if removed or not retry_epoch or retry_epoch <= now_epoch:
|
|
527
|
+
continue
|
|
528
|
+
if next_retry_epoch == 0 or retry_epoch < next_retry_epoch:
|
|
529
|
+
next_retry_epoch = retry_epoch
|
|
530
|
+
next_retry_label = label
|
|
531
|
+
|
|
532
|
+
ready_candidates = []
|
|
533
|
+
for label in candidate_labels:
|
|
534
|
+
entry = state_accounts.get(label, {}) if isinstance(state_accounts, dict) else {}
|
|
535
|
+
retry_epoch = safe_int(str(entry.get("next_retry_at", ""))) or 0
|
|
536
|
+
removed = bool(entry.get("removed", False))
|
|
537
|
+
if not removed and retry_epoch <= now_epoch:
|
|
538
|
+
ready_candidates.append(label)
|
|
539
|
+
|
|
540
|
+
last_switch = read_env_file(switch_file)
|
|
541
|
+
switch_decision = "unknown"
|
|
542
|
+
if ready_candidates:
|
|
543
|
+
switch_decision = "ready-candidate"
|
|
544
|
+
elif next_retry_label:
|
|
545
|
+
switch_decision = "deferred"
|
|
546
|
+
elif last_switch.get("LAST_SWITCH_LABEL"):
|
|
547
|
+
switch_decision = "switched"
|
|
548
|
+
elif candidate_labels:
|
|
549
|
+
switch_decision = "failed"
|
|
550
|
+
|
|
551
|
+
return {
|
|
552
|
+
"active_label": active_label,
|
|
553
|
+
"candidate_labels": candidate_labels,
|
|
554
|
+
"ready_candidates": ready_candidates,
|
|
555
|
+
"next_retry_label": next_retry_label,
|
|
556
|
+
"next_retry_epoch": next_retry_epoch,
|
|
557
|
+
"next_retry_at": datetime.fromtimestamp(next_retry_epoch, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") if next_retry_epoch else "",
|
|
558
|
+
"switch_decision": switch_decision,
|
|
559
|
+
"last_switch_label": last_switch.get("LAST_SWITCH_LABEL", ""),
|
|
560
|
+
"last_switch_reason": last_switch.get("LAST_SWITCH_REASON", ""),
|
|
561
|
+
"last_switch_epoch": safe_int(last_switch.get("LAST_SWITCH_EPOCH")),
|
|
562
|
+
"state_file": str(state_file),
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
|
|
411
566
|
def collect_scheduled_issues(state_root: Path) -> list[dict[str, Any]]:
|
|
412
567
|
scheduled_root = state_root / "scheduled-issues"
|
|
413
568
|
if not scheduled_root.is_dir():
|
|
@@ -455,6 +610,43 @@ def collect_issue_retries(state_root: Path) -> list[dict[str, Any]]:
|
|
|
455
610
|
return items
|
|
456
611
|
|
|
457
612
|
|
|
613
|
+
def collect_pr_retries(state_root: Path) -> list[dict[str, Any]]:
|
|
614
|
+
retries_root = state_root / "retries" / "prs"
|
|
615
|
+
if not retries_root.is_dir():
|
|
616
|
+
return []
|
|
617
|
+
|
|
618
|
+
now_epoch = int(datetime.now(timezone.utc).timestamp())
|
|
619
|
+
items: list[dict[str, Any]] = []
|
|
620
|
+
for path in sorted(retries_root.glob("*.env"), key=lambda item: item.stat().st_mtime, reverse=True):
|
|
621
|
+
env = read_env_file(path)
|
|
622
|
+
next_attempt_epoch = safe_int(env.get("NEXT_ATTEMPT_EPOCH"))
|
|
623
|
+
items.append(
|
|
624
|
+
{
|
|
625
|
+
"pr_number": path.stem,
|
|
626
|
+
"attempts": safe_int(env.get("ATTEMPTS")) or 0,
|
|
627
|
+
"next_attempt_epoch": next_attempt_epoch,
|
|
628
|
+
"next_attempt_at": env.get("NEXT_ATTEMPT_AT", ""),
|
|
629
|
+
"last_reason": env.get("LAST_REASON", ""),
|
|
630
|
+
"updated_at": env.get("UPDATED_AT", "") or file_mtime_iso(path),
|
|
631
|
+
"ready": not bool(next_attempt_epoch and next_attempt_epoch > now_epoch),
|
|
632
|
+
"state_file": str(path),
|
|
633
|
+
}
|
|
634
|
+
)
|
|
635
|
+
return items
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def resolve_history_root(render_env: dict[str, str], yaml_env: dict[str, str], runs_root: Path) -> Path:
|
|
639
|
+
configured = (
|
|
640
|
+
render_env.get("EFFECTIVE_HISTORY_ROOT", "").strip()
|
|
641
|
+
or yaml_env.get("runtime.history_root", "").strip()
|
|
642
|
+
)
|
|
643
|
+
if configured and configured != ".":
|
|
644
|
+
return Path(configured)
|
|
645
|
+
if runs_root.name == "runs":
|
|
646
|
+
return runs_root.parent / "history"
|
|
647
|
+
return Path(".")
|
|
648
|
+
|
|
649
|
+
|
|
458
650
|
def collect_issue_queue(state_root: Path) -> dict[str, list[dict[str, Any]]]:
|
|
459
651
|
queue_root = state_root / "resident-workers" / "issue-queue"
|
|
460
652
|
pending_root = queue_root / "pending"
|
|
@@ -491,14 +683,18 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
491
683
|
|
|
492
684
|
runs_root = Path(render_env.get("EFFECTIVE_RUNS_ROOT", ""))
|
|
493
685
|
state_root = Path(render_env.get("EFFECTIVE_STATE_ROOT", ""))
|
|
686
|
+
history_root = resolve_history_root(render_env, yaml_env, runs_root)
|
|
494
687
|
runs = collect_runs(runs_root)
|
|
688
|
+
recent_history = collect_recent_history(history_root)
|
|
495
689
|
controllers = collect_resident_controllers(state_root)
|
|
496
690
|
resident_workers = collect_resident_workers(state_root)
|
|
497
691
|
cooldowns = collect_provider_cooldowns(state_root)
|
|
498
692
|
scheduled = collect_scheduled_issues(state_root)
|
|
499
693
|
retries = collect_issue_retries(state_root)
|
|
694
|
+
pr_retries = collect_pr_retries(state_root)
|
|
500
695
|
queue = collect_issue_queue(state_root)
|
|
501
|
-
alerts = [alert for run in runs for alert in run.get("alerts", [])]
|
|
696
|
+
alerts = [alert for run in (runs + recent_history) for alert in run.get("alerts", [])]
|
|
697
|
+
codex_rotation = collect_codex_rotation(render_env)
|
|
502
698
|
|
|
503
699
|
return {
|
|
504
700
|
"id": profile_id,
|
|
@@ -506,6 +702,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
506
702
|
"repo_root": render_env.get("EFFECTIVE_REPO_ROOT", ""),
|
|
507
703
|
"runs_root": str(runs_root),
|
|
508
704
|
"state_root": str(state_root),
|
|
705
|
+
"history_root": str(history_root),
|
|
509
706
|
"issue_prefix": yaml_env.get("session_naming.issue_prefix", ""),
|
|
510
707
|
"pr_prefix": yaml_env.get("session_naming.pr_prefix", ""),
|
|
511
708
|
"coding_worker": render_env.get("EFFECTIVE_CODING_WORKER", ""),
|
|
@@ -520,6 +717,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
520
717
|
"last_reason": render_env.get("EFFECTIVE_PROVIDER_POOL_LAST_REASON", ""),
|
|
521
718
|
"pools_exhausted": render_env.get("EFFECTIVE_PROVIDER_POOLS_EXHAUSTED", ""),
|
|
522
719
|
},
|
|
720
|
+
"codex_rotation": codex_rotation,
|
|
523
721
|
"counts": {
|
|
524
722
|
"active_runs": len(runs),
|
|
525
723
|
"running_runs": sum(1 for item in runs if item["status"] == "RUNNING"),
|
|
@@ -531,6 +729,7 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
531
729
|
"completed_runs": sum(
|
|
532
730
|
1 for item in runs if item["status"] == "SUCCEEDED" and item["result_kind"] not in {"implemented", "reported", "blocked"}
|
|
533
731
|
),
|
|
732
|
+
"recent_history_runs": len(recent_history),
|
|
534
733
|
"resident_controllers": len(controllers),
|
|
535
734
|
"live_resident_controllers": sum(1 for item in controllers if item["state"] != "stopped" and item["controller_live"]),
|
|
536
735
|
"stale_resident_controllers": sum(1 for item in controllers if item.get("controller_stale", False)),
|
|
@@ -543,12 +742,14 @@ def build_profile_snapshot(profile_id: str, registry_root: Path) -> dict[str, An
|
|
|
543
742
|
"alerts": len(alerts),
|
|
544
743
|
},
|
|
545
744
|
"runs": runs,
|
|
745
|
+
"recent_history": recent_history,
|
|
546
746
|
"alerts": alerts,
|
|
547
747
|
"resident_controllers": controllers,
|
|
548
748
|
"resident_workers": resident_workers,
|
|
549
749
|
"provider_cooldowns": cooldowns,
|
|
550
750
|
"scheduled_issues": scheduled,
|
|
551
751
|
"issue_retries": retries,
|
|
752
|
+
"pr_retries": pr_retries,
|
|
552
753
|
"issue_queue": queue,
|
|
553
754
|
}
|
|
554
755
|
|
|
@@ -22,7 +22,10 @@ Follow this order:
|
|
|
22
22
|
bash "$ACP_FLOW_TOOLS_DIR/create-follow-up-issue.sh" --parent {ISSUE_ID} --title "..." --body-file /tmp/follow-up.md
|
|
23
23
|
```
|
|
24
24
|
3. Implement the smallest root-cause fix in this worktree only.
|
|
25
|
-
4. Run verification and record every successful command with `record-verification.sh`.
|
|
25
|
+
4. Run the narrowest relevant local verification for the files you changed, and record every successful command with `record-verification.sh`.
|
|
26
|
+
|
|
27
|
+
- Do not default to repo-wide verification such as `pnpm test` unless the issue body explicitly requires it.
|
|
28
|
+
- If unrelated repo-wide suites are already red, keep the cycle focused on targeted verification for your slice and let the host verification guard decide whether publication is safe.
|
|
26
29
|
|
|
27
30
|
```bash
|
|
28
31
|
{ISSUE_VERIFICATION_COMMAND_SNIPPET}
|
|
@@ -410,12 +410,6 @@ for label in "${CANDIDATE_LABELS[@]}"; do
|
|
|
410
410
|
continue
|
|
411
411
|
fi
|
|
412
412
|
|
|
413
|
-
retry_at="$(state_next_retry_at "$label")"
|
|
414
|
-
if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
|
|
415
|
-
note_candidate_retry "$label" "$retry_at"
|
|
416
|
-
continue
|
|
417
|
-
fi
|
|
418
|
-
|
|
419
413
|
quota_output="$(load_account_quota_json "$label" 2>&1 || true)"
|
|
420
414
|
if ! jq -e 'type == "array" and length > 0' >/dev/null 2>&1 <<<"$quota_output"; then
|
|
421
415
|
if is_auth_401_output "$quota_output"; then
|
|
@@ -436,6 +430,14 @@ for label in "${CANDIDATE_LABELS[@]}"; do
|
|
|
436
430
|
continue
|
|
437
431
|
fi
|
|
438
432
|
|
|
433
|
+
retry_at="$(state_next_retry_at "$label")"
|
|
434
|
+
if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )) && account_is_eligible "$label" "$quota_output"; then
|
|
435
|
+
state_mark_ready "$label" "quota-revalidated" "$now_epoch"
|
|
436
|
+
elif [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
|
|
437
|
+
note_candidate_retry "$label" "$retry_at"
|
|
438
|
+
continue
|
|
439
|
+
fi
|
|
440
|
+
|
|
439
441
|
if ! account_is_eligible "$label" "$quota_output"; then
|
|
440
442
|
retry_at="$(account_retry_epoch "$label" "$quota_output")"
|
|
441
443
|
if [[ "$retry_at" =~ ^[0-9]+$ ]] && (( retry_at > now_epoch )); then
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import pathlib
|
|
5
|
-
import sys
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
ROOT = pathlib.Path(__file__).resolve().parents[1] / "dashboard"
|
|
9
|
-
if str(ROOT) not in sys.path:
|
|
10
|
-
sys.path.insert(0, str(ROOT))
|
|
11
|
-
|
|
12
|
-
from dashboard_snapshot import main
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
if __name__ == "__main__":
|
|
16
|
-
raise SystemExit(main())
|