agentxchain 2.145.0 → 2.146.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/app.js +3 -0
- package/dashboard/components/notifications.js +127 -0
- package/dashboard/index.html +1 -0
- package/package.json +1 -1
- package/scripts/release-downstream-truth.sh +16 -8
- package/src/commands/init.js +66 -31
- package/src/commands/restart.js +18 -3
- package/src/commands/resume.js +17 -0
- package/src/commands/status.js +18 -5
- package/src/commands/step.js +17 -0
- package/src/lib/config.js +4 -1
- package/src/lib/dashboard/actions.js +9 -3
- package/src/lib/dashboard/bridge-server.js +11 -0
- package/src/lib/dashboard/notifications-reader.js +91 -0
- package/src/lib/dashboard/state-reader.js +16 -4
- package/src/lib/governed-state.js +131 -0
- package/src/lib/recent-event-summary.js +1 -0
- package/src/lib/stale-turn-watchdog.js +164 -18
package/dashboard/app.js
CHANGED
|
@@ -15,6 +15,7 @@ import { render as renderCrossRepo } from './components/cross-repo.js';
|
|
|
15
15
|
import { render as renderDelegations } from './components/delegations.js';
|
|
16
16
|
import { render as renderBlockers } from './components/blockers.js';
|
|
17
17
|
import { render as renderArtifacts } from './components/artifacts.js';
|
|
18
|
+
import { render as renderNotifications } from './components/notifications.js';
|
|
18
19
|
import { render as renderMission } from './components/mission.js';
|
|
19
20
|
import { render as renderChain } from './components/chain.js';
|
|
20
21
|
import { render as renderRunHistory } from './components/run-history.js';
|
|
@@ -31,6 +32,7 @@ const VIEWS = {
|
|
|
31
32
|
delegations: { fetch: ['state', 'history'], render: renderDelegations },
|
|
32
33
|
ledger: { fetch: ['state', 'ledger', 'coordinatorState', 'coordinatorLedger', 'repoDecisionsSummary'], render: renderLedger },
|
|
33
34
|
hooks: { fetch: ['audit', 'annotations', 'coordinatorAudit', 'coordinatorAnnotations'], render: renderHooks },
|
|
35
|
+
notifications: { fetch: ['notifications'], render: renderNotifications },
|
|
34
36
|
blocked: { fetch: ['state', 'audit', 'coordinatorState', 'coordinatorAudit', 'coordinatorBlockers', 'coordinatorRepoStatusRows', 'gateActions'], render: renderBlocked },
|
|
35
37
|
gate: { fetch: ['state', 'history', 'coordinatorState', 'coordinatorHistory', 'coordinatorBarriers', 'gateActions'], render: renderGate },
|
|
36
38
|
initiative: { fetch: ['coordinatorState', 'coordinatorBarriers', 'barrierLedger', 'coordinatorBlockers', 'coordinatorRepoStatusRows'], render: renderInitiative },
|
|
@@ -62,6 +64,7 @@ const API_MAP = {
|
|
|
62
64
|
coordinatorBlockers: '/api/coordinator/blockers',
|
|
63
65
|
coordinatorRepoStatusRows: '/api/coordinator/repo-status',
|
|
64
66
|
workflowKitArtifacts: '/api/workflow-kit-artifacts',
|
|
67
|
+
notifications: '/api/notifications',
|
|
65
68
|
missions: '/api/missions',
|
|
66
69
|
plans: '/api/plans',
|
|
67
70
|
chainReports: '/api/chain-reports',
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
function esc(str) {
|
|
2
|
+
if (str == null) return '';
|
|
3
|
+
return String(str)
|
|
4
|
+
.replace(/&/g, '&')
|
|
5
|
+
.replace(/</g, '<')
|
|
6
|
+
.replace(/>/g, '>')
|
|
7
|
+
.replace(/"/g, '"')
|
|
8
|
+
.replace(/'/g, ''');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function badge(label, color = 'var(--text-dim)') {
|
|
12
|
+
return `<span class="badge" style="color:${color};border-color:${color}">${esc(label)}</span>`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function formatResult(entry) {
|
|
16
|
+
if (entry?.delivered) return badge('delivered', 'var(--green)');
|
|
17
|
+
if (entry?.timed_out) return badge('timed out', 'var(--yellow)');
|
|
18
|
+
return badge('failed', 'var(--red)');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function renderWebhookRow(webhook) {
|
|
22
|
+
return `<tr>
|
|
23
|
+
<td class="mono">${esc(webhook.name)}</td>
|
|
24
|
+
<td>${esc(webhook.timeout_ms)}</td>
|
|
25
|
+
<td>${esc(webhook.event_count)}</td>
|
|
26
|
+
<td><span class="mono">${esc((webhook.events || []).join(', '))}</span></td>
|
|
27
|
+
</tr>`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function renderAuditRow(entry) {
|
|
31
|
+
const rowStyle = entry?.delivered ? '' : ' style="border-left:3px solid var(--red)"';
|
|
32
|
+
const statusCode = entry?.status_code == null ? '—' : String(entry.status_code);
|
|
33
|
+
const duration = entry?.duration_ms == null ? '—' : `${entry.duration_ms}ms`;
|
|
34
|
+
return `<tr${rowStyle}>
|
|
35
|
+
<td class="mono">${esc(entry?.emitted_at || '—')}</td>
|
|
36
|
+
<td><span class="mono">${esc(entry?.event_type || '—')}</span></td>
|
|
37
|
+
<td class="mono">${esc(entry?.notification_name || '—')}</td>
|
|
38
|
+
<td>${formatResult(entry)}</td>
|
|
39
|
+
<td>${esc(statusCode)}</td>
|
|
40
|
+
<td>${esc(duration)}</td>
|
|
41
|
+
<td>${esc(entry?.message || '—')}</td>
|
|
42
|
+
</tr>`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function render({ notifications }) {
|
|
46
|
+
if (!notifications) {
|
|
47
|
+
return `<div class="placeholder"><h2>Notifications</h2><p>No notification data available.</p></div>`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (notifications.ok === false) {
|
|
51
|
+
const hint = notifications.code === 'config_missing'
|
|
52
|
+
? ' Run <code>agentxchain init --governed</code> to get started.'
|
|
53
|
+
: '';
|
|
54
|
+
return `<div class="placeholder"><h2>Notifications</h2><p>${esc(notifications.error || 'Failed to load notification data.')}${hint}</p></div>`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const recent = Array.isArray(notifications.recent) ? notifications.recent : [];
|
|
58
|
+
const webhooks = Array.isArray(notifications.webhooks) ? notifications.webhooks : [];
|
|
59
|
+
const summary = notifications.summary || {};
|
|
60
|
+
|
|
61
|
+
if (!notifications.configured && recent.length === 0) {
|
|
62
|
+
return `<div class="placeholder"><h2>Notifications</h2><p>No <code>notifications.webhooks</code> are configured and no delivery audit entries exist yet.</p></div>`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let html = `<div class="notifications-view"><div class="run-header"><div class="run-meta">`;
|
|
66
|
+
html += notifications.configured
|
|
67
|
+
? badge(`${webhooks.length} webhook${webhooks.length === 1 ? '' : 's'} configured`, 'var(--green)')
|
|
68
|
+
: badge('not currently configured', 'var(--yellow)');
|
|
69
|
+
html += badge(`${summary.total_attempts || 0} attempts`, 'var(--accent)');
|
|
70
|
+
if ((summary.failed || 0) > 0) {
|
|
71
|
+
html += badge(`${summary.failed} failed`, 'var(--red)');
|
|
72
|
+
}
|
|
73
|
+
if ((summary.timed_out || 0) > 0) {
|
|
74
|
+
html += badge(`${summary.timed_out} timed out`, 'var(--yellow)');
|
|
75
|
+
}
|
|
76
|
+
if (notifications.approval_sla?.enabled) {
|
|
77
|
+
html += badge(`approval SLA: ${(notifications.approval_sla.reminder_after_seconds || []).join(', ')}s`, 'var(--accent)');
|
|
78
|
+
}
|
|
79
|
+
html += `</div></div>`;
|
|
80
|
+
|
|
81
|
+
if (webhooks.length > 0) {
|
|
82
|
+
html += `<div class="section"><h3>Notification Targets</h3>
|
|
83
|
+
<table class="data-table">
|
|
84
|
+
<thead>
|
|
85
|
+
<tr>
|
|
86
|
+
<th>Name</th>
|
|
87
|
+
<th>Timeout</th>
|
|
88
|
+
<th>Events</th>
|
|
89
|
+
<th>Subscribed Event Types</th>
|
|
90
|
+
</tr>
|
|
91
|
+
</thead>
|
|
92
|
+
<tbody>${webhooks.map(renderWebhookRow).join('')}</tbody>
|
|
93
|
+
</table>
|
|
94
|
+
</div>`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
html += `<div class="section"><h3>Delivery Summary</h3>
|
|
98
|
+
<p><strong>Delivered:</strong> ${esc(summary.delivered || 0)}<br>
|
|
99
|
+
<strong>Failed:</strong> ${esc(summary.failed || 0)}<br>
|
|
100
|
+
<strong>Last emitted:</strong> ${esc(summary.last_emitted_at || '—')}<br>
|
|
101
|
+
<strong>Last failure:</strong> ${esc(summary.last_failure_at || '—')}</p>
|
|
102
|
+
</div>`;
|
|
103
|
+
|
|
104
|
+
if (recent.length === 0) {
|
|
105
|
+
html += `<div class="section"><h3>Recent Delivery Attempts</h3><p style="color:var(--text-dim)">No notification deliveries recorded yet.</p></div>`;
|
|
106
|
+
} else {
|
|
107
|
+
html += `<div class="section"><h3>Recent Delivery Attempts</h3>
|
|
108
|
+
<table class="data-table">
|
|
109
|
+
<thead>
|
|
110
|
+
<tr>
|
|
111
|
+
<th>Emitted</th>
|
|
112
|
+
<th>Event</th>
|
|
113
|
+
<th>Target</th>
|
|
114
|
+
<th>Result</th>
|
|
115
|
+
<th>Status</th>
|
|
116
|
+
<th>Duration</th>
|
|
117
|
+
<th>Message</th>
|
|
118
|
+
</tr>
|
|
119
|
+
</thead>
|
|
120
|
+
<tbody>${recent.map(renderAuditRow).join('')}</tbody>
|
|
121
|
+
</table>
|
|
122
|
+
</div>`;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
html += `</div>`;
|
|
126
|
+
return html;
|
|
127
|
+
}
|
package/dashboard/index.html
CHANGED
|
@@ -401,6 +401,7 @@
|
|
|
401
401
|
<a href="#delegations">Delegations</a>
|
|
402
402
|
<a href="#ledger">Decisions</a>
|
|
403
403
|
<a href="#hooks">Hooks</a>
|
|
404
|
+
<a href="#notifications">Notifications</a>
|
|
404
405
|
<a href="#blocked">Blocked</a>
|
|
405
406
|
<a href="#gate">Gates</a>
|
|
406
407
|
<a href="#blockers">Blockers</a>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# Release downstream truth — run after all downstream surfaces are updated.
|
|
3
|
-
# Verifies: GitHub release
|
|
3
|
+
# Verifies: GitHub release is published on the expected tag URL, Homebrew tap SHA and URL match registry tarball.
|
|
4
4
|
# Usage: bash scripts/release-downstream-truth.sh --target-version <semver>
|
|
5
5
|
set -uo pipefail
|
|
6
6
|
|
|
@@ -91,22 +91,30 @@ echo "[1/3] GitHub release"
|
|
|
91
91
|
if ! command -v gh >/dev/null 2>&1; then
|
|
92
92
|
fail "gh CLI not available — cannot verify GitHub release"
|
|
93
93
|
else
|
|
94
|
-
|
|
94
|
+
GH_READY=false
|
|
95
|
+
EXPECTED_GH_URL="https://github.com/shivamtiwari93/agentXchain.dev/releases/tag/v${TARGET_VERSION}"
|
|
95
96
|
for attempt in $(seq 1 "$RETRY_ATTEMPTS"); do
|
|
96
97
|
GH_TAG="$(gh release view "v${TARGET_VERSION}" --json tagName -q '.tagName' 2>/dev/null || true)"
|
|
97
|
-
|
|
98
|
-
|
|
98
|
+
GH_DRAFT="$(gh release view "v${TARGET_VERSION}" --json isDraft -q '.isDraft' 2>/dev/null || true)"
|
|
99
|
+
GH_URL="$(gh release view "v${TARGET_VERSION}" --json url -q '.url' 2>/dev/null || true)"
|
|
100
|
+
GH_PUBLISHED_AT="$(gh release view "v${TARGET_VERSION}" --json publishedAt -q '.publishedAt' 2>/dev/null || true)"
|
|
101
|
+
if [[ "$GH_TAG" == "v${TARGET_VERSION}" ]] \
|
|
102
|
+
&& [[ "$GH_DRAFT" == "false" ]] \
|
|
103
|
+
&& [[ "$GH_URL" == "$EXPECTED_GH_URL" ]] \
|
|
104
|
+
&& [[ -n "$GH_PUBLISHED_AT" ]] \
|
|
105
|
+
&& [[ "$GH_PUBLISHED_AT" != "null" ]]; then
|
|
106
|
+
GH_READY=true
|
|
99
107
|
break
|
|
100
108
|
fi
|
|
101
109
|
if [[ "$attempt" -lt "$RETRY_ATTEMPTS" ]]; then
|
|
102
|
-
echo " INFO: GitHub release not
|
|
110
|
+
echo " INFO: GitHub release not ready (attempt ${attempt}/${RETRY_ATTEMPTS}); retrying in ${RETRY_DELAY_SECONDS}s..."
|
|
103
111
|
sleep "$RETRY_DELAY_SECONDS"
|
|
104
112
|
fi
|
|
105
113
|
done
|
|
106
|
-
if $
|
|
107
|
-
pass "GitHub release v${TARGET_VERSION}
|
|
114
|
+
if $GH_READY; then
|
|
115
|
+
pass "GitHub release v${TARGET_VERSION} is published on the tagged release URL"
|
|
108
116
|
else
|
|
109
|
-
fail "GitHub release v${TARGET_VERSION} not
|
|
117
|
+
fail "GitHub release v${TARGET_VERSION} is not fully published (tag=${GH_TAG:-<missing>} draft=${GH_DRAFT:-<missing>} url=${GH_URL:-<missing>} publishedAt=${GH_PUBLISHED_AT:-<missing>})"
|
|
110
118
|
fi
|
|
111
119
|
fi
|
|
112
120
|
|
package/src/commands/init.js
CHANGED
|
@@ -103,6 +103,68 @@ const DEFAULT_GOVERNED_LOCAL_DEV_RUNTIME = Object.freeze({
|
|
|
103
103
|
prompt_transport: 'stdin',
|
|
104
104
|
});
|
|
105
105
|
|
|
106
|
+
const GOVERNED_GITIGNORE_LINES = Object.freeze([
|
|
107
|
+
'.env',
|
|
108
|
+
'.agentxchain/staging/',
|
|
109
|
+
'.agentxchain/dispatch/',
|
|
110
|
+
'.agentxchain/transactions/',
|
|
111
|
+
'.agentxchain/state.json',
|
|
112
|
+
'.agentxchain/session.json',
|
|
113
|
+
'.agentxchain/history.jsonl',
|
|
114
|
+
'.agentxchain/decision-ledger.jsonl',
|
|
115
|
+
'.agentxchain/repo-decisions.jsonl',
|
|
116
|
+
'.agentxchain/lock.json',
|
|
117
|
+
'.agentxchain/hook-audit.jsonl',
|
|
118
|
+
'.agentxchain/hook-annotations.jsonl',
|
|
119
|
+
'.agentxchain/run-history.jsonl',
|
|
120
|
+
'.agentxchain/events.jsonl',
|
|
121
|
+
'.agentxchain/notification-audit.jsonl',
|
|
122
|
+
'.agentxchain/schedule-state.json',
|
|
123
|
+
'.agentxchain/schedule-daemon.json',
|
|
124
|
+
'.agentxchain/continuous-session.json',
|
|
125
|
+
'.agentxchain/human-escalations.jsonl',
|
|
126
|
+
'.agentxchain/sla-reminders.json',
|
|
127
|
+
'.agentxchain/SESSION_RECOVERY.md',
|
|
128
|
+
'.agentxchain/migration-report.md',
|
|
129
|
+
'.agentxchain/intake/',
|
|
130
|
+
'.agentxchain/missions/',
|
|
131
|
+
'.agentxchain/multirepo/',
|
|
132
|
+
'.agentxchain/reviews/',
|
|
133
|
+
'.agentxchain/reports/',
|
|
134
|
+
'.agentxchain/proposed/',
|
|
135
|
+
'TALK.md',
|
|
136
|
+
'HUMAN_TASKS.md',
|
|
137
|
+
]);
|
|
138
|
+
|
|
139
|
+
const GOVERNED_GITIGNORE_CONTENT = [
|
|
140
|
+
'# AgentXchain — secrets',
|
|
141
|
+
'.env',
|
|
142
|
+
'',
|
|
143
|
+
'# AgentXchain — transient execution artifacts (never commit)',
|
|
144
|
+
'.agentxchain/staging/',
|
|
145
|
+
'.agentxchain/dispatch/',
|
|
146
|
+
'.agentxchain/transactions/',
|
|
147
|
+
'',
|
|
148
|
+
'# AgentXchain — framework-owned state (gitignored by default in fresh scaffolds)',
|
|
149
|
+
'# These files remain durable on disk and in export/restore, but defaulting them',
|
|
150
|
+
'# out of raw `git status` reduces operator noise. Existing tracked copies still',
|
|
151
|
+
'# appear dirty until the repo explicitly untracks them.',
|
|
152
|
+
...GOVERNED_GITIGNORE_LINES.slice(4),
|
|
153
|
+
].join('\n') + '\n';
|
|
154
|
+
|
|
155
|
+
function ensureGitignoreEntries(gitignorePath, content, requiredEntries) {
|
|
156
|
+
if (!existsSync(gitignorePath)) {
|
|
157
|
+
writeFileSync(gitignorePath, content);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
const existingIgnore = readFileSync(gitignorePath, 'utf8');
|
|
161
|
+
const existingLines = new Set(existingIgnore.split(/\r?\n/));
|
|
162
|
+
const missing = requiredEntries.filter(entry => !existingLines.has(entry));
|
|
163
|
+
if (missing.length === 0) return;
|
|
164
|
+
const prefix = existingIgnore.endsWith('\n') || existingIgnore.length === 0 ? '' : '\n';
|
|
165
|
+
writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
|
|
166
|
+
}
|
|
167
|
+
|
|
106
168
|
const GOVERNED_RUNTIMES = {
|
|
107
169
|
'manual-pm': { type: 'manual' },
|
|
108
170
|
'manual-dev': { type: 'manual' },
|
|
@@ -833,28 +895,10 @@ export function scaffoldGoverned(dir, projectName, projectId, templateId = 'gene
|
|
|
833
895
|
// TALK.md
|
|
834
896
|
writeFileSync(join(dir, 'TALK.md'), `# ${projectName} — Team Talk File\n\nCanonical human-readable handoff log for all agents.\n\n---\n\n`);
|
|
835
897
|
|
|
836
|
-
// .gitignore additions with inline comments so
|
|
898
|
+
// .gitignore additions with inline comments so fresh governed repos keep
|
|
899
|
+
// framework-owned runtime state out of raw git status by default.
|
|
837
900
|
const gitignorePath = join(dir, '.gitignore');
|
|
838
|
-
|
|
839
|
-
'# AgentXchain — secrets',
|
|
840
|
-
'.env',
|
|
841
|
-
'',
|
|
842
|
-
'# AgentXchain — transient execution artifacts (never commit)',
|
|
843
|
-
'.agentxchain/staging/',
|
|
844
|
-
'.agentxchain/dispatch/',
|
|
845
|
-
'.agentxchain/transactions/',
|
|
846
|
-
].join('\n') + '\n';
|
|
847
|
-
const requiredPaths = ['.env', '.agentxchain/staging/', '.agentxchain/dispatch/', '.agentxchain/transactions/'];
|
|
848
|
-
if (!existsSync(gitignorePath)) {
|
|
849
|
-
writeFileSync(gitignorePath, gitignoreContent);
|
|
850
|
-
} else {
|
|
851
|
-
const existingIgnore = readFileSync(gitignorePath, 'utf8');
|
|
852
|
-
const missing = requiredPaths.filter(entry => !existingIgnore.split(/\r?\n/).includes(entry));
|
|
853
|
-
if (missing.length > 0) {
|
|
854
|
-
const prefix = existingIgnore.endsWith('\n') ? '' : '\n';
|
|
855
|
-
writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
|
|
856
|
-
}
|
|
857
|
-
}
|
|
901
|
+
ensureGitignoreEntries(gitignorePath, GOVERNED_GITIGNORE_CONTENT, GOVERNED_GITIGNORE_LINES);
|
|
858
902
|
|
|
859
903
|
return { config, state, scaffoldWorkflowKitConfig };
|
|
860
904
|
}
|
|
@@ -1251,16 +1295,7 @@ export async function initCommand(opts) {
|
|
|
1251
1295
|
writeFileSync(join(dir, 'HUMAN_TASKS.md'), '# Human Tasks\n\n(Agents append tasks here when they need human action.)\n');
|
|
1252
1296
|
const gitignorePath = join(dir, '.gitignore');
|
|
1253
1297
|
const requiredIgnores = ['.env', '.agentxchain-trigger.json', '.agentxchain-prompts/', '.agentxchain-workspaces/', '.agentxchain-watch.pid', '.agentxchain-autonudge.state'];
|
|
1254
|
-
|
|
1255
|
-
writeFileSync(gitignorePath, requiredIgnores.join('\n') + '\n');
|
|
1256
|
-
} else {
|
|
1257
|
-
const existingIgnore = readFileSync(gitignorePath, 'utf8');
|
|
1258
|
-
const missing = requiredIgnores.filter(entry => !existingIgnore.split(/\r?\n/).includes(entry));
|
|
1259
|
-
if (missing.length > 0) {
|
|
1260
|
-
const prefix = existingIgnore.endsWith('\n') ? '' : '\n';
|
|
1261
|
-
writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
|
|
1262
|
-
}
|
|
1263
|
-
}
|
|
1298
|
+
ensureGitignoreEntries(gitignorePath, requiredIgnores.join('\n') + '\n', requiredIgnores);
|
|
1264
1299
|
|
|
1265
1300
|
// .planning/ structure
|
|
1266
1301
|
mkdirSync(join(dir, '.planning', 'research'), { recursive: true });
|
package/src/commands/restart.js
CHANGED
|
@@ -13,13 +13,16 @@
|
|
|
13
13
|
import chalk from 'chalk';
|
|
14
14
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
15
15
|
import { join, dirname } from 'path';
|
|
16
|
-
import { loadProjectContext } from '../lib/config.js';
|
|
16
|
+
import { loadProjectContext, loadProjectState } from '../lib/config.js';
|
|
17
17
|
import {
|
|
18
18
|
assignGovernedTurn,
|
|
19
19
|
getActiveTurns,
|
|
20
20
|
getActiveTurnCount,
|
|
21
21
|
reactivateGovernedRun,
|
|
22
22
|
detectStateBundleDesync,
|
|
23
|
+
normalizeGovernedStateShape,
|
|
24
|
+
reconcileApprovalPausesWithConfig,
|
|
25
|
+
reconcileRecoveryActionsWithConfig,
|
|
23
26
|
STATE_PATH,
|
|
24
27
|
HISTORY_PATH,
|
|
25
28
|
LEDGER_PATH,
|
|
@@ -27,7 +30,6 @@ import {
|
|
|
27
30
|
import { writeDispatchBundle } from '../lib/dispatch-bundle.js';
|
|
28
31
|
import { getDispatchTurnDir } from '../lib/turn-paths.js';
|
|
29
32
|
import { consumeNextApprovedIntent } from '../lib/intake.js';
|
|
30
|
-
import { loadProjectState } from '../lib/config.js';
|
|
31
33
|
import { deriveRecoveryDescriptor } from '../lib/blocked-state.js';
|
|
32
34
|
import { deriveRecommendedContinuityAction } from '../lib/continuity-status.js';
|
|
33
35
|
import { readSessionCheckpoint, writeSessionCheckpoint, captureBaselineRef, SESSION_PATH } from '../lib/session-checkpoint.js';
|
|
@@ -178,7 +180,20 @@ export async function restartCommand(opts) {
|
|
|
178
180
|
process.exit(1);
|
|
179
181
|
}
|
|
180
182
|
|
|
181
|
-
|
|
183
|
+
let state;
|
|
184
|
+
try {
|
|
185
|
+
const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
|
|
186
|
+
const normalized = normalizeGovernedStateShape(parsed);
|
|
187
|
+
const reconciledApprovals = reconcileApprovalPausesWithConfig(normalized.state, config);
|
|
188
|
+
const reconciledRecovery = reconcileRecoveryActionsWithConfig(reconciledApprovals.state, config);
|
|
189
|
+
state = reconciledRecovery.state;
|
|
190
|
+
if (normalized.changed || reconciledApprovals.changed || reconciledRecovery.changed) {
|
|
191
|
+
writeFileSync(statePath, JSON.stringify(state, null, 2));
|
|
192
|
+
}
|
|
193
|
+
} catch {
|
|
194
|
+
console.log(chalk.red('No valid governed state.json found.'));
|
|
195
|
+
process.exit(1);
|
|
196
|
+
}
|
|
182
197
|
|
|
183
198
|
// Load checkpoint (optional — restart can work without it, just with less context)
|
|
184
199
|
const checkpoint = readSessionCheckpoint(root);
|
package/src/commands/resume.js
CHANGED
|
@@ -78,6 +78,10 @@ export async function resumeCommand(opts) {
|
|
|
78
78
|
|
|
79
79
|
const staleReconciliation = reconcileStaleTurns(root, state, config);
|
|
80
80
|
state = staleReconciliation.state || state;
|
|
81
|
+
if (staleReconciliation.ghost_turns.length > 0) {
|
|
82
|
+
printGhostTurnRecovery(staleReconciliation.ghost_turns);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
81
85
|
if (staleReconciliation.stale_turns.length > 0) {
|
|
82
86
|
printStaleTurnRecovery(staleReconciliation.stale_turns);
|
|
83
87
|
process.exit(1);
|
|
@@ -359,6 +363,19 @@ export async function resumeCommand(opts) {
|
|
|
359
363
|
printDispatchSummary(state, config);
|
|
360
364
|
}
|
|
361
365
|
|
|
366
|
+
function printGhostTurnRecovery(ghostTurns) {
|
|
367
|
+
console.log(chalk.red.bold('Ghost turn detected — subprocess never started.'));
|
|
368
|
+
console.log('');
|
|
369
|
+
for (const ghost of ghostTurns) {
|
|
370
|
+
const secs = Math.floor(ghost.running_ms / 1000);
|
|
371
|
+
console.log(` Turn: ${ghost.turn_id} (${ghost.role})`);
|
|
372
|
+
console.log(` Runtime: ${ghost.runtime_id}`);
|
|
373
|
+
console.log(` Age: ${secs}s with no subprocess output`);
|
|
374
|
+
console.log(` Recover: ${chalk.cyan(`agentxchain reissue-turn --turn ${ghost.turn_id} --reason ghost`)}`);
|
|
375
|
+
console.log('');
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
362
379
|
function printStaleTurnRecovery(staleTurns) {
|
|
363
380
|
console.log(chalk.red.bold('Stale turn detected.'));
|
|
364
381
|
console.log('');
|
package/src/commands/status.js
CHANGED
|
@@ -136,6 +136,10 @@ function loadStatusContext(dir = process.cwd()) {
|
|
|
136
136
|
function renderGovernedStatus(context, opts) {
|
|
137
137
|
const { root, config, version } = context;
|
|
138
138
|
let state = loadProjectState(root, config);
|
|
139
|
+
const staleReconciliation = reconcileStaleTurns(root, state, config);
|
|
140
|
+
state = staleReconciliation.state || state;
|
|
141
|
+
const staleTurns = staleReconciliation.stale_turns;
|
|
142
|
+
const ghostTurns = staleReconciliation.ghost_turns || [];
|
|
139
143
|
const stateRunId = state?.run_id || readRawStateRunId(root, config);
|
|
140
144
|
const continuity = getContinuityStatus(root, state);
|
|
141
145
|
const connectorHealth = getConnectorHealth(root, config, state);
|
|
@@ -166,11 +170,6 @@ function renderGovernedStatus(context, opts) {
|
|
|
166
170
|
// Coordinator warning surfacing — DEC-COORD-RETRY-PROJECTION-EVENT-001
|
|
167
171
|
const coordinatorWarnings = readCoordinatorWarnings(root, { runId: stateRunId || null });
|
|
168
172
|
|
|
169
|
-
// BUG-47: detect stale running turns and emit turn_stalled events
|
|
170
|
-
const staleReconciliation = reconcileStaleTurns(root, state, config);
|
|
171
|
-
state = staleReconciliation.state || state;
|
|
172
|
-
const staleTurns = staleReconciliation.stale_turns;
|
|
173
|
-
|
|
174
173
|
if (opts.json) {
|
|
175
174
|
const dashPid = getDashboardPid(root);
|
|
176
175
|
const dashSession = getDashboardSession(root);
|
|
@@ -209,6 +208,7 @@ function renderGovernedStatus(context, opts) {
|
|
|
209
208
|
bundle_integrity: detectStateBundleDesync(root, state),
|
|
210
209
|
coordinator_warnings: coordinatorWarnings,
|
|
211
210
|
stale_turns: staleTurns,
|
|
211
|
+
ghost_turns: ghostTurns,
|
|
212
212
|
}, null, 2));
|
|
213
213
|
return;
|
|
214
214
|
}
|
|
@@ -453,6 +453,19 @@ function renderGovernedStatus(context, opts) {
|
|
|
453
453
|
}
|
|
454
454
|
}
|
|
455
455
|
|
|
456
|
+
// BUG-51: Ghost turn warning (subprocess never started)
|
|
457
|
+
if (ghostTurns.length > 0) {
|
|
458
|
+
console.log('');
|
|
459
|
+
for (const gt of ghostTurns) {
|
|
460
|
+
const secs = Math.floor(gt.running_ms / 1000);
|
|
461
|
+
console.log(` ${chalk.red.bold('⚠ Ghost turn detected — subprocess never started')}`);
|
|
462
|
+
console.log(` ${chalk.dim('Turn:')} ${gt.turn_id} (${gt.role})`);
|
|
463
|
+
console.log(` ${chalk.dim('Runtime:')} ${gt.runtime_id}`);
|
|
464
|
+
console.log(` ${chalk.dim('Age:')} ${secs}s with no subprocess output`);
|
|
465
|
+
console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(`agentxchain reissue-turn --turn ${gt.turn_id} --reason ghost`)}`);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
456
469
|
// BUG-47: Stale turn warning
|
|
457
470
|
if (staleTurns.length > 0) {
|
|
458
471
|
console.log('');
|
package/src/commands/step.js
CHANGED
|
@@ -97,6 +97,10 @@ export async function stepCommand(opts) {
|
|
|
97
97
|
|
|
98
98
|
const staleReconciliation = reconcileStaleTurns(root, state, config);
|
|
99
99
|
state = staleReconciliation.state || state;
|
|
100
|
+
if (staleReconciliation.ghost_turns.length > 0) {
|
|
101
|
+
printGhostTurnRecovery(staleReconciliation.ghost_turns);
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
100
104
|
if (staleReconciliation.stale_turns.length > 0) {
|
|
101
105
|
printStaleTurnRecovery(staleReconciliation.stale_turns);
|
|
102
106
|
process.exit(1);
|
|
@@ -909,6 +913,19 @@ export async function stepCommand(opts) {
|
|
|
909
913
|
}
|
|
910
914
|
}
|
|
911
915
|
|
|
916
|
+
function printGhostTurnRecovery(ghostTurns) {
|
|
917
|
+
console.log(chalk.red.bold('Ghost turn detected — subprocess never started.'));
|
|
918
|
+
console.log('');
|
|
919
|
+
for (const ghost of ghostTurns) {
|
|
920
|
+
const secs = Math.floor(ghost.running_ms / 1000);
|
|
921
|
+
console.log(` Turn: ${ghost.turn_id} (${ghost.role})`);
|
|
922
|
+
console.log(` Runtime: ${ghost.runtime_id}`);
|
|
923
|
+
console.log(` Age: ${secs}s with no subprocess output`);
|
|
924
|
+
console.log(` Recover: ${chalk.cyan(`agentxchain reissue-turn --turn ${ghost.turn_id} --reason ghost`)}`);
|
|
925
|
+
console.log('');
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
|
|
912
929
|
function printStaleTurnRecovery(staleTurns) {
|
|
913
930
|
console.log(chalk.red.bold('Stale turn detected.'));
|
|
914
931
|
console.log('');
|
package/src/lib/config.js
CHANGED
|
@@ -6,6 +6,7 @@ import { safeWriteJson } from './safe-write.js';
|
|
|
6
6
|
import {
|
|
7
7
|
normalizeGovernedStateShape,
|
|
8
8
|
getActiveTurn,
|
|
9
|
+
reconcileApprovalPausesWithConfig,
|
|
9
10
|
reconcileBudgetStatusWithConfig,
|
|
10
11
|
reconcileRecoveryActionsWithConfig,
|
|
11
12
|
} from './governed-state.js';
|
|
@@ -153,11 +154,13 @@ export function loadProjectState(root, config) {
|
|
|
153
154
|
if (config?.protocol_mode === 'governed') {
|
|
154
155
|
const normalized = normalizeGovernedStateShape(stateData);
|
|
155
156
|
stateData = normalized.state;
|
|
157
|
+
const reconciledApprovals = reconcileApprovalPausesWithConfig(stateData, config);
|
|
158
|
+
stateData = reconciledApprovals.state;
|
|
156
159
|
const reconciledBudget = reconcileBudgetStatusWithConfig(stateData, config);
|
|
157
160
|
stateData = reconciledBudget.state;
|
|
158
161
|
const reconciledRecovery = reconcileRecoveryActionsWithConfig(stateData, config);
|
|
159
162
|
stateData = reconciledRecovery.state;
|
|
160
|
-
if (normalized.changed || reconciledBudget.changed || reconciledRecovery.changed) {
|
|
163
|
+
if (normalized.changed || reconciledApprovals.changed || reconciledBudget.changed || reconciledRecovery.changed) {
|
|
161
164
|
safeWriteJson(filePath, stateData);
|
|
162
165
|
}
|
|
163
166
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { dirname } from 'path';
|
|
2
|
-
import { loadProjectContext } from '../config.js';
|
|
2
|
+
import { loadProjectContext, loadProjectState } from '../config.js';
|
|
3
3
|
import { approvePhaseTransition, approveRunCompletion } from '../governed-state.js';
|
|
4
4
|
import { deriveGovernedRunNextActions, deriveRecoveryDescriptor } from '../blocked-state.js';
|
|
5
5
|
import {
|
|
@@ -205,10 +205,16 @@ function approveCoordinatorGate(workspacePath, state, config) {
|
|
|
205
205
|
|
|
206
206
|
export function approvePendingDashboardGate(agentxchainDir) {
|
|
207
207
|
const workspacePath = dirname(agentxchainDir);
|
|
208
|
-
const
|
|
208
|
+
const context = loadProjectContext(workspacePath);
|
|
209
|
+
|
|
210
|
+
// Use loadProjectState to get reconciled state — approval-pause repair
|
|
211
|
+
// may surface a pending_run_completion from an orphaned blocked_on marker,
|
|
212
|
+
// and we must route on the reconciled truth, not the raw state.json.
|
|
213
|
+
const repoState = (context?.config?.protocol_mode === 'governed'
|
|
214
|
+
? loadProjectState(workspacePath, context.config)
|
|
215
|
+
: null) || readJsonFile(agentxchainDir, 'state.json');
|
|
209
216
|
|
|
210
217
|
if (repoState?.pending_phase_transition || repoState?.pending_run_completion) {
|
|
211
|
-
const context = loadProjectContext(workspacePath);
|
|
212
218
|
return approveRepoGate(workspacePath, context?.config, repoState);
|
|
213
219
|
}
|
|
214
220
|
|
|
@@ -23,6 +23,7 @@ import { readCoordinatorRepoStatusRows } from './coordinator-repo-status.js';
|
|
|
23
23
|
import { readCoordinatorTimeoutStatus } from './coordinator-timeout-status.js';
|
|
24
24
|
import { readAggregatedCoordinatorEvents, watchChildRepoEvents } from './coordinator-event-aggregation.js';
|
|
25
25
|
import { readWorkflowKitArtifacts } from './workflow-kit-artifacts.js';
|
|
26
|
+
import { readNotificationSnapshot } from './notifications-reader.js';
|
|
26
27
|
import { readConnectorHealthSnapshot } from './connectors.js';
|
|
27
28
|
import { readTimeoutStatus } from './timeout-status.js';
|
|
28
29
|
import { queryRunHistory } from '../run-history.js';
|
|
@@ -431,6 +432,16 @@ export function createBridgeServer({ agentxchainDir, dashboardDir, port = 3847,
|
|
|
431
432
|
return;
|
|
432
433
|
}
|
|
433
434
|
|
|
435
|
+
if (pathname === '/api/notifications') {
|
|
436
|
+
if (replayMode) {
|
|
437
|
+
writeJson(res, 200, { ok: true, replay_mode: true, message: 'Notification audit is live-only and not available in replay mode.' });
|
|
438
|
+
return;
|
|
439
|
+
}
|
|
440
|
+
const result = readNotificationSnapshot(workspacePath);
|
|
441
|
+
writeJson(res, result.status, result.body);
|
|
442
|
+
return;
|
|
443
|
+
}
|
|
444
|
+
|
|
434
445
|
if (pathname === '/api/connectors') {
|
|
435
446
|
const result = readConnectorHealthSnapshot(workspacePath);
|
|
436
447
|
writeJson(res, result.status, result.body);
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { loadConfig, loadProjectContext } from '../config.js';
|
|
2
|
+
import { readJsonlFile } from './state-reader.js';
|
|
3
|
+
|
|
4
|
+
function summarizeAuditEntries(entries) {
|
|
5
|
+
const summary = {
|
|
6
|
+
total_attempts: entries.length,
|
|
7
|
+
delivered: 0,
|
|
8
|
+
failed: 0,
|
|
9
|
+
timed_out: 0,
|
|
10
|
+
last_emitted_at: null,
|
|
11
|
+
last_failure_at: null,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
for (const entry of entries) {
|
|
15
|
+
if (entry?.delivered === true) {
|
|
16
|
+
summary.delivered += 1;
|
|
17
|
+
} else {
|
|
18
|
+
summary.failed += 1;
|
|
19
|
+
if (!summary.last_failure_at || String(entry?.emitted_at || '') > summary.last_failure_at) {
|
|
20
|
+
summary.last_failure_at = entry?.emitted_at || null;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (entry?.timed_out === true) {
|
|
24
|
+
summary.timed_out += 1;
|
|
25
|
+
}
|
|
26
|
+
if (!summary.last_emitted_at || String(entry?.emitted_at || '') > summary.last_emitted_at) {
|
|
27
|
+
summary.last_emitted_at = entry?.emitted_at || null;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return summary;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function normalizeWebhook(webhook) {
|
|
35
|
+
return {
|
|
36
|
+
name: webhook.name,
|
|
37
|
+
timeout_ms: webhook.timeout_ms,
|
|
38
|
+
event_count: Array.isArray(webhook.events) ? webhook.events.length : 0,
|
|
39
|
+
events: Array.isArray(webhook.events) ? webhook.events : [],
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function readNotificationSnapshot(workspacePath) {
|
|
44
|
+
const context = loadProjectContext(workspacePath);
|
|
45
|
+
const governedContext = context?.config ? context : null;
|
|
46
|
+
const legacyConfigResult = governedContext ? null : loadConfig(workspacePath);
|
|
47
|
+
if (!governedContext && !legacyConfigResult) {
|
|
48
|
+
return {
|
|
49
|
+
ok: false,
|
|
50
|
+
status: 404,
|
|
51
|
+
body: {
|
|
52
|
+
ok: false,
|
|
53
|
+
code: 'config_missing',
|
|
54
|
+
error: 'Project config not found. Run `agentxchain init --governed` first.',
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const root = governedContext?.root || legacyConfigResult.root;
|
|
60
|
+
const config = governedContext?.config || legacyConfigResult.config;
|
|
61
|
+
const notifications = config?.notifications || {};
|
|
62
|
+
const webhooks = Array.isArray(notifications.webhooks)
|
|
63
|
+
? notifications.webhooks.map(normalizeWebhook)
|
|
64
|
+
: [];
|
|
65
|
+
const configured = webhooks.length > 0;
|
|
66
|
+
const approvalSla = notifications.approval_sla
|
|
67
|
+
? {
|
|
68
|
+
enabled: notifications.approval_sla.enabled !== false,
|
|
69
|
+
reminder_after_seconds: Array.isArray(notifications.approval_sla.reminder_after_seconds)
|
|
70
|
+
? notifications.approval_sla.reminder_after_seconds
|
|
71
|
+
: [],
|
|
72
|
+
}
|
|
73
|
+
: null;
|
|
74
|
+
|
|
75
|
+
const auditEntries = (readJsonlFile(`${root}/.agentxchain`, 'notification-audit.jsonl') || [])
|
|
76
|
+
.slice()
|
|
77
|
+
.sort((a, b) => String(b?.emitted_at || '').localeCompare(String(a?.emitted_at || '')));
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
ok: true,
|
|
81
|
+
status: 200,
|
|
82
|
+
body: {
|
|
83
|
+
ok: true,
|
|
84
|
+
configured,
|
|
85
|
+
webhooks,
|
|
86
|
+
approval_sla: approvalSla,
|
|
87
|
+
summary: summarizeAuditEntries(auditEntries),
|
|
88
|
+
recent: auditEntries.slice(0, 10),
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
}
|
|
@@ -12,8 +12,9 @@ import {
|
|
|
12
12
|
deriveGovernedRunNextActions,
|
|
13
13
|
deriveRuntimeBlockedGuidance,
|
|
14
14
|
} from '../blocked-state.js';
|
|
15
|
-
import { loadProjectContext } from '../config.js';
|
|
15
|
+
import { loadProjectContext, loadProjectState } from '../config.js';
|
|
16
16
|
import { getContinuityStatus } from '../continuity-status.js';
|
|
17
|
+
import { reconcileStaleTurns } from '../stale-turn-watchdog.js';
|
|
17
18
|
import { readRepoDecisions, summarizeRepoDecisions } from '../repo-decisions.js';
|
|
18
19
|
import { readAllDispatchProgress } from '../dispatch-progress.js';
|
|
19
20
|
|
|
@@ -136,10 +137,21 @@ function enrichGovernedState(agentxchainDir, state) {
|
|
|
136
137
|
return state;
|
|
137
138
|
}
|
|
138
139
|
|
|
140
|
+
// Use loadProjectState to get reconciled state (approval-pause repair,
|
|
141
|
+
// budget reconciliation, recovery-action reconciliation applied and
|
|
142
|
+
// persisted to disk). Then apply stale-turn reconciliation so recovery
|
|
143
|
+
// and next-action surfaces reflect the post-watchdog truth — matching
|
|
144
|
+
// the same ordering used by the CLI `status` command.
|
|
145
|
+
let reconciledState = loadProjectState(workspacePath, context.config) || state;
|
|
146
|
+
const staleResult = reconcileStaleTurns(workspacePath, reconciledState, context.config);
|
|
147
|
+
if (staleResult.changed) {
|
|
148
|
+
reconciledState = staleResult.state;
|
|
149
|
+
}
|
|
150
|
+
|
|
139
151
|
return {
|
|
140
|
-
...
|
|
141
|
-
runtime_guidance: deriveRuntimeBlockedGuidance(
|
|
142
|
-
next_actions: deriveGovernedRunNextActions(
|
|
152
|
+
...reconciledState,
|
|
153
|
+
runtime_guidance: deriveRuntimeBlockedGuidance(reconciledState, context.config),
|
|
154
|
+
next_actions: deriveGovernedRunNextActions(reconciledState, context.config),
|
|
143
155
|
dispatch_progress: readAllDispatchProgress(workspacePath),
|
|
144
156
|
};
|
|
145
157
|
}
|
|
@@ -1894,6 +1894,137 @@ export function reconcileRecoveryActionsWithConfig(state, config) {
|
|
|
1894
1894
|
return { state: nextState, changed };
|
|
1895
1895
|
}
|
|
1896
1896
|
|
|
1897
|
+
function inferApprovalPauseFromState(state, config) {
|
|
1898
|
+
if (!state || typeof state !== 'object' || !config) {
|
|
1899
|
+
return null;
|
|
1900
|
+
}
|
|
1901
|
+
|
|
1902
|
+
if (state.pending_run_completion?.gate) {
|
|
1903
|
+
return {
|
|
1904
|
+
gateType: 'run_completion',
|
|
1905
|
+
gateId: state.pending_run_completion.gate,
|
|
1906
|
+
pendingField: 'pending_run_completion',
|
|
1907
|
+
pendingValue: state.pending_run_completion,
|
|
1908
|
+
typedReason: 'pending_run_completion',
|
|
1909
|
+
recoveryAction: 'agentxchain approve-completion',
|
|
1910
|
+
};
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
if (state.pending_phase_transition?.gate) {
|
|
1914
|
+
return {
|
|
1915
|
+
gateType: 'phase_transition',
|
|
1916
|
+
gateId: state.pending_phase_transition.gate,
|
|
1917
|
+
pendingField: 'pending_phase_transition',
|
|
1918
|
+
pendingValue: state.pending_phase_transition,
|
|
1919
|
+
typedReason: 'pending_phase_transition',
|
|
1920
|
+
recoveryAction: 'agentxchain approve-transition',
|
|
1921
|
+
};
|
|
1922
|
+
}
|
|
1923
|
+
|
|
1924
|
+
// Approval waits are post-turn pause states. If a turn is still retained,
|
|
1925
|
+
// recover the turn first instead of synthesizing a gate wait from stale
|
|
1926
|
+
// blocked_on metadata.
|
|
1927
|
+
if (getActiveTurnCount(state) > 0) {
|
|
1928
|
+
return null;
|
|
1929
|
+
}
|
|
1930
|
+
|
|
1931
|
+
if (typeof state.blocked_on !== 'string' || !state.blocked_on.startsWith('human_approval:')) {
|
|
1932
|
+
return null;
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
const gateId = state.blocked_on.slice('human_approval:'.length) || null;
|
|
1936
|
+
const currentRouting = config.routing?.[state.phase];
|
|
1937
|
+
if (!gateId || !currentRouting?.exit_gate || currentRouting.exit_gate !== gateId) {
|
|
1938
|
+
return null;
|
|
1939
|
+
}
|
|
1940
|
+
|
|
1941
|
+
const requestedByTurn = state.blocked_reason?.turn_id ?? state.last_completed_turn_id ?? null;
|
|
1942
|
+
const nextPhase = getNextPhase(state.phase, config.routing || {});
|
|
1943
|
+
|
|
1944
|
+
if (nextPhase) {
|
|
1945
|
+
return {
|
|
1946
|
+
gateType: 'phase_transition',
|
|
1947
|
+
gateId,
|
|
1948
|
+
pendingField: 'pending_phase_transition',
|
|
1949
|
+
pendingValue: {
|
|
1950
|
+
from: state.phase,
|
|
1951
|
+
to: nextPhase,
|
|
1952
|
+
gate: gateId,
|
|
1953
|
+
requested_by_turn: requestedByTurn,
|
|
1954
|
+
},
|
|
1955
|
+
typedReason: 'pending_phase_transition',
|
|
1956
|
+
recoveryAction: 'agentxchain approve-transition',
|
|
1957
|
+
};
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
return {
|
|
1961
|
+
gateType: 'run_completion',
|
|
1962
|
+
gateId,
|
|
1963
|
+
pendingField: 'pending_run_completion',
|
|
1964
|
+
pendingValue: {
|
|
1965
|
+
gate: gateId,
|
|
1966
|
+
requested_by_turn: requestedByTurn,
|
|
1967
|
+
},
|
|
1968
|
+
typedReason: 'pending_run_completion',
|
|
1969
|
+
recoveryAction: 'agentxchain approve-completion',
|
|
1970
|
+
};
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
export function reconcileApprovalPausesWithConfig(state, config) {
|
|
1974
|
+
if (!state || typeof state !== 'object' || !config) {
|
|
1975
|
+
return { state, changed: false };
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
const inferred = inferApprovalPauseFromState(state, config);
|
|
1979
|
+
if (!inferred) {
|
|
1980
|
+
return { state, changed: false };
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
let nextState = state;
|
|
1984
|
+
let changed = false;
|
|
1985
|
+
|
|
1986
|
+
if (!state[inferred.pendingField]) {
|
|
1987
|
+
nextState = {
|
|
1988
|
+
...nextState,
|
|
1989
|
+
[inferred.pendingField]: inferred.pendingValue,
|
|
1990
|
+
};
|
|
1991
|
+
changed = true;
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
if (nextState.status === 'blocked' || nextState.blocked_reason != null) {
|
|
1995
|
+
nextState = {
|
|
1996
|
+
...nextState,
|
|
1997
|
+
status: 'paused',
|
|
1998
|
+
blocked_reason: null,
|
|
1999
|
+
};
|
|
2000
|
+
changed = true;
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
const recovery = nextState.blocked_reason?.recovery;
|
|
2004
|
+
if (recovery && (
|
|
2005
|
+
recovery.typed_reason !== inferred.typedReason
|
|
2006
|
+
|| recovery.recovery_action !== inferred.recoveryAction
|
|
2007
|
+
|| recovery.detail !== inferred.gateId
|
|
2008
|
+
)) {
|
|
2009
|
+
nextState = {
|
|
2010
|
+
...nextState,
|
|
2011
|
+
blocked_reason: {
|
|
2012
|
+
...nextState.blocked_reason,
|
|
2013
|
+
recovery: {
|
|
2014
|
+
...recovery,
|
|
2015
|
+
typed_reason: inferred.typedReason,
|
|
2016
|
+
recovery_action: inferred.recoveryAction,
|
|
2017
|
+
turn_retained: false,
|
|
2018
|
+
detail: inferred.gateId,
|
|
2019
|
+
},
|
|
2020
|
+
},
|
|
2021
|
+
};
|
|
2022
|
+
changed = true;
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
return { state: nextState, changed };
|
|
2026
|
+
}
|
|
2027
|
+
|
|
1897
2028
|
function inferBlockedReasonFromState(state) {
|
|
1898
2029
|
if (!state || typeof state !== 'object') {
|
|
1899
2030
|
return null;
|
|
@@ -54,6 +54,7 @@ function describeEvent(eventType, entry) {
|
|
|
54
54
|
}
|
|
55
55
|
case 'turn_checkpointed':
|
|
56
56
|
case 'turn_stalled':
|
|
57
|
+
case 'turn_start_failed':
|
|
57
58
|
return `${prefix}${eventType}${roleId ? ` [${roleId}]` : ''}`;
|
|
58
59
|
case 'dispatch_progress':
|
|
59
60
|
return `${prefix}${eventType}${roleId ? ` [${roleId}]` : ''}`;
|
|
@@ -1,16 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Stale Turn Watchdog — BUG-47
|
|
2
|
+
* Stale Turn Watchdog — BUG-47 + BUG-51
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Two-tier lazy idle-threshold detection:
|
|
5
|
+
*
|
|
6
|
+
* 1. **Fast startup watchdog (BUG-51):** if an active turn has been dispatched
|
|
7
|
+
* for >30 seconds with NO dispatch-progress file, NO staged result, and NO
|
|
8
|
+
* recent events, it is a "ghost turn" — the subprocess never attached.
|
|
9
|
+
* Transitions to `failed_start` immediately.
|
|
10
|
+
*
|
|
11
|
+
* Design note: the watchdog intentionally keys on turn-scoped
|
|
12
|
+
* dispatch-progress rather than `stdout.log` existence. Dispatch-progress is
|
|
13
|
+
* a framework-authored signal with a stable per-turn contract across runtime
|
|
14
|
+
* wiring; `stdout.log` is adapter-authored visibility output and is allowed
|
|
15
|
+
* to be best-effort. Using dispatch-progress therefore gives us the same
|
|
16
|
+
* operator-facing "no first byte / no worker heartbeat" detection without
|
|
17
|
+
* coupling the watchdog to adapter-specific log-attachment details.
|
|
18
|
+
*
|
|
19
|
+
* 2. **Stale turn watchdog (BUG-47):** if an active turn has status "running"
|
|
20
|
+
* for >N minutes with no event log activity AND no staged result file,
|
|
21
|
+
* report it as stalled.
|
|
7
22
|
*
|
|
8
23
|
* Fires on CLI invocations (status, resume, step --resume) rather than
|
|
9
24
|
* requiring a background daemon.
|
|
10
25
|
*
|
|
11
26
|
* Default thresholds:
|
|
12
|
-
* -
|
|
13
|
-
* -
|
|
27
|
+
* - Startup watchdog: 30 seconds (configurable via run_loop.startup_watchdog_ms)
|
|
28
|
+
* - local_cli stale turns: 10 minutes
|
|
29
|
+
* - api_proxy stale turns: 5 minutes
|
|
14
30
|
* - Configurable via run_loop.stale_turn_threshold_ms in agentxchain.json
|
|
15
31
|
*/
|
|
16
32
|
|
|
@@ -23,6 +39,7 @@ import { getDispatchProgressRelativePath } from './dispatch-progress.js';
|
|
|
23
39
|
|
|
24
40
|
const DEFAULT_LOCAL_CLI_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
|
|
25
41
|
const DEFAULT_API_PROXY_THRESHOLD_MS = 5 * 60 * 1000; // 5 minutes
|
|
42
|
+
const DEFAULT_STARTUP_WATCHDOG_MS = 30 * 1000; // 30 seconds (BUG-51)
|
|
26
43
|
const LEGACY_STAGING_PATH = '.agentxchain/staging/turn-result.json';
|
|
27
44
|
|
|
28
45
|
/**
|
|
@@ -83,6 +100,72 @@ export function detectStaleTurns(root, state, config) {
|
|
|
83
100
|
return stale;
|
|
84
101
|
}
|
|
85
102
|
|
|
103
|
+
/**
|
|
104
|
+
* BUG-51: Detect ghost-dispatched turns — subprocess never started.
|
|
105
|
+
*
|
|
106
|
+
* A ghost turn is one that has been in "running" or "retrying" status for
|
|
107
|
+
* longer than the startup watchdog threshold (default 30s) AND has:
|
|
108
|
+
* - no dispatch-progress file (framework-observed proof that no subprocess
|
|
109
|
+
* output or heartbeat was attached)
|
|
110
|
+
* - no staged result file
|
|
111
|
+
* - no recent turn-scoped events (beyond the initial turn_dispatched)
|
|
112
|
+
*
|
|
113
|
+
* This is a stricter, faster check than detectStaleTurns (BUG-47).
|
|
114
|
+
* Ghost turns transition to "failed_start" rather than "stalled".
|
|
115
|
+
*
|
|
116
|
+
* @param {string} root - project root directory
|
|
117
|
+
* @param {object} state - current governed state
|
|
118
|
+
* @param {object} config - normalized config
|
|
119
|
+
* @returns {Array<{ turn_id: string, role: string, runtime_id: string, running_ms: number, threshold_ms: number, recommendation: string, failure_type: string }>}
|
|
120
|
+
*/
|
|
121
|
+
export function detectGhostTurns(root, state, config) {
|
|
122
|
+
const activeTurns = state?.active_turns || {};
|
|
123
|
+
const ghosts = [];
|
|
124
|
+
const now = Date.now();
|
|
125
|
+
const startupThreshold = resolveStartupThreshold(config);
|
|
126
|
+
|
|
127
|
+
for (const [turnId, turn] of Object.entries(activeTurns)) {
|
|
128
|
+
if (turn.status !== 'running' && turn.status !== 'retrying') continue;
|
|
129
|
+
if (!turn.started_at) continue;
|
|
130
|
+
|
|
131
|
+
const startedAt = new Date(turn.started_at).getTime();
|
|
132
|
+
if (isNaN(startedAt)) continue;
|
|
133
|
+
|
|
134
|
+
const runningMs = now - startedAt;
|
|
135
|
+
if (runningMs < startupThreshold) continue;
|
|
136
|
+
|
|
137
|
+
// Ghost detection: NO dispatch-progress file means subprocess never attached
|
|
138
|
+
const progressPath = join(root, getDispatchProgressRelativePath(turnId));
|
|
139
|
+
const hasProgress = existsSync(progressPath);
|
|
140
|
+
|
|
141
|
+
// If dispatch-progress exists, subprocess started — this is NOT a ghost turn.
|
|
142
|
+
// The regular stale-turn watchdog (BUG-47) will handle it if it goes silent.
|
|
143
|
+
if (hasProgress) continue;
|
|
144
|
+
|
|
145
|
+
// Also check for staged result (unlikely without progress, but be safe)
|
|
146
|
+
if (hasTurnScopedStagedResult(root, turnId)) continue;
|
|
147
|
+
|
|
148
|
+
// Check for any turn-scoped events beyond the initial dispatch event
|
|
149
|
+
if (hasRecentTurnEventActivity(root, turnId, startedAt, startupThreshold, now)) continue;
|
|
150
|
+
|
|
151
|
+
const runningSeconds = Math.floor(runningMs / 1000);
|
|
152
|
+
const failureType = 'no_subprocess_output';
|
|
153
|
+
ghosts.push({
|
|
154
|
+
turn_id: turnId,
|
|
155
|
+
role: turn.assigned_role || 'unknown',
|
|
156
|
+
runtime_id: turn.runtime_id || 'unknown',
|
|
157
|
+
running_ms: runningMs,
|
|
158
|
+
threshold_ms: startupThreshold,
|
|
159
|
+
failure_type: failureType,
|
|
160
|
+
recommendation: `Turn ${turnId} has been dispatched for ${runningSeconds}s with no subprocess output. `
|
|
161
|
+
+ `The subprocess likely never started. `
|
|
162
|
+
+ `Run \`agentxchain reissue-turn --turn ${turnId} --reason ghost\` to recover.`,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return ghosts;
|
|
167
|
+
}
|
|
168
|
+
|
|
86
169
|
/**
|
|
87
170
|
* Detect stale turns and emit turn_stalled events for each.
|
|
88
171
|
* Returns the stale turn list for caller display.
|
|
@@ -95,18 +178,62 @@ export function detectAndEmitStaleTurns(root, state, config) {
|
|
|
95
178
|
|
|
96
179
|
export function reconcileStaleTurns(root, state, config) {
|
|
97
180
|
if (!state || typeof state !== 'object') {
|
|
98
|
-
return { stale_turns: [], state, changed: false };
|
|
181
|
+
return { stale_turns: [], ghost_turns: [], state, changed: false };
|
|
99
182
|
}
|
|
100
183
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
184
|
+
// BUG-51: Fast startup watchdog — detect ghost turns first (30s threshold)
|
|
185
|
+
const ghosts = detectGhostTurns(root, state, config);
|
|
186
|
+
|
|
187
|
+
// BUG-47: Stale turn watchdog — detect turns that started but went silent (10m threshold)
|
|
188
|
+
// Exclude turns already caught by ghost detection to avoid double-counting
|
|
189
|
+
const ghostIds = new Set(ghosts.map(g => g.turn_id));
|
|
190
|
+
const stale = detectStaleTurns(root, state, config).filter(s => !ghostIds.has(s.turn_id));
|
|
191
|
+
|
|
192
|
+
if (ghosts.length === 0 && stale.length === 0) {
|
|
193
|
+
return { stale_turns: [], ghost_turns: [], state, changed: false };
|
|
104
194
|
}
|
|
105
195
|
|
|
106
196
|
const nowIso = new Date().toISOString();
|
|
107
197
|
const activeTurns = { ...(state.active_turns || {}) };
|
|
198
|
+
const budgetReservations = { ...(state.budget_reservations || {}) };
|
|
108
199
|
let changed = false;
|
|
109
200
|
|
|
201
|
+
// Process ghost turns (BUG-51) — transition to failed_start
|
|
202
|
+
for (const entry of ghosts) {
|
|
203
|
+
const turn = activeTurns[entry.turn_id];
|
|
204
|
+
if (!turn || (turn.status !== 'running' && turn.status !== 'retrying')) continue;
|
|
205
|
+
|
|
206
|
+
activeTurns[entry.turn_id] = {
|
|
207
|
+
...turn,
|
|
208
|
+
status: 'failed_start',
|
|
209
|
+
failed_start_at: nowIso,
|
|
210
|
+
failed_start_reason: entry.failure_type,
|
|
211
|
+
failed_start_previous_status: turn.status,
|
|
212
|
+
failed_start_threshold_ms: entry.threshold_ms,
|
|
213
|
+
failed_start_running_ms: entry.running_ms,
|
|
214
|
+
recovery_command: `agentxchain reissue-turn --turn ${entry.turn_id} --reason ghost`,
|
|
215
|
+
};
|
|
216
|
+
changed = true;
|
|
217
|
+
|
|
218
|
+
// BUG-51 fix #6: Release budget reservation for ghost turns
|
|
219
|
+
delete budgetReservations[entry.turn_id];
|
|
220
|
+
|
|
221
|
+
emitRunEvent(root, 'turn_start_failed', {
|
|
222
|
+
run_id: state?.run_id || null,
|
|
223
|
+
phase: state?.phase || null,
|
|
224
|
+
status: 'blocked',
|
|
225
|
+
turn: { turn_id: entry.turn_id, role_id: entry.role },
|
|
226
|
+
payload: {
|
|
227
|
+
running_ms: entry.running_ms,
|
|
228
|
+
threshold_ms: entry.threshold_ms,
|
|
229
|
+
runtime_id: entry.runtime_id,
|
|
230
|
+
failure_type: entry.failure_type,
|
|
231
|
+
recommendation: entry.recommendation,
|
|
232
|
+
},
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Process stale turns (BUG-47) — transition to stalled
|
|
110
237
|
for (const entry of stale) {
|
|
111
238
|
const turn = activeTurns[entry.turn_id];
|
|
112
239
|
if (!turn || (turn.status !== 'running' && turn.status !== 'retrying')) continue;
|
|
@@ -123,6 +250,9 @@ export function reconcileStaleTurns(root, state, config) {
|
|
|
123
250
|
};
|
|
124
251
|
changed = true;
|
|
125
252
|
|
|
253
|
+
// BUG-51 fix #6: Release budget reservation for stale turns too
|
|
254
|
+
delete budgetReservations[entry.turn_id];
|
|
255
|
+
|
|
126
256
|
emitRunEvent(root, 'turn_stalled', {
|
|
127
257
|
run_id: state?.run_id || null,
|
|
128
258
|
phase: state?.phase || null,
|
|
@@ -138,21 +268,28 @@ export function reconcileStaleTurns(root, state, config) {
|
|
|
138
268
|
}
|
|
139
269
|
|
|
140
270
|
if (!changed) {
|
|
141
|
-
return { stale_turns: stale, state, changed: false };
|
|
271
|
+
return { stale_turns: stale, ghost_turns: ghosts, state, changed: false };
|
|
142
272
|
}
|
|
143
273
|
|
|
144
|
-
const
|
|
274
|
+
const allDetected = [...ghosts, ...stale];
|
|
275
|
+
const primary = allDetected[0];
|
|
276
|
+
const category = ghosts.length > 0 ? 'ghost_turn' : 'stale_turn';
|
|
277
|
+
const blockedOn = allDetected.length === 1
|
|
278
|
+
? `turn:${primary.failure_type ? 'failed_start' : 'stalled'}:${primary.turn_id}`
|
|
279
|
+
: ghosts.length > 0 ? 'turns:failed_start' : 'turns:stalled';
|
|
280
|
+
|
|
145
281
|
const nextState = {
|
|
146
282
|
...state,
|
|
147
283
|
status: 'blocked',
|
|
148
284
|
active_turns: activeTurns,
|
|
149
|
-
|
|
285
|
+
budget_reservations: budgetReservations,
|
|
286
|
+
blocked_on: blockedOn,
|
|
150
287
|
blocked_reason: {
|
|
151
|
-
category
|
|
288
|
+
category,
|
|
152
289
|
blocked_at: nowIso,
|
|
153
290
|
turn_id: primary.turn_id,
|
|
154
291
|
recovery: {
|
|
155
|
-
typed_reason:
|
|
292
|
+
typed_reason: category,
|
|
156
293
|
owner: 'human',
|
|
157
294
|
recovery_action: primary.recommendation,
|
|
158
295
|
turn_retained: true,
|
|
@@ -168,11 +305,12 @@ export function reconcileStaleTurns(root, state, config) {
|
|
|
168
305
|
status: 'blocked',
|
|
169
306
|
turn: { turn_id: primary.turn_id, role_id: primary.role },
|
|
170
307
|
payload: {
|
|
171
|
-
category
|
|
308
|
+
category,
|
|
309
|
+
ghost_turn_ids: ghosts.map((entry) => entry.turn_id),
|
|
172
310
|
stalled_turn_ids: stale.map((entry) => entry.turn_id),
|
|
173
311
|
},
|
|
174
312
|
});
|
|
175
|
-
return { stale_turns: stale, state: nextState, changed: true };
|
|
313
|
+
return { stale_turns: stale, ghost_turns: ghosts, state: nextState, changed: true };
|
|
176
314
|
}
|
|
177
315
|
|
|
178
316
|
function resolveThreshold(turn, config) {
|
|
@@ -194,13 +332,21 @@ function resolveThreshold(turn, config) {
|
|
|
194
332
|
return DEFAULT_LOCAL_CLI_THRESHOLD_MS;
|
|
195
333
|
}
|
|
196
334
|
|
|
335
|
+
function resolveStartupThreshold(config) {
|
|
336
|
+
const configThreshold = config?.run_loop?.startup_watchdog_ms;
|
|
337
|
+
if (typeof configThreshold === 'number' && configThreshold > 0) {
|
|
338
|
+
return configThreshold;
|
|
339
|
+
}
|
|
340
|
+
return DEFAULT_STARTUP_WATCHDOG_MS;
|
|
341
|
+
}
|
|
342
|
+
|
|
197
343
|
function hasRecentTurnEventActivity(root, turnId, startedAt, threshold, now) {
|
|
198
344
|
try {
|
|
199
345
|
const events = readRunEvents(root, { limit: 200 });
|
|
200
346
|
for (let i = events.length - 1; i >= 0; i--) {
|
|
201
347
|
const event = events[i];
|
|
202
348
|
if (event?.turn?.turn_id !== turnId) continue;
|
|
203
|
-
if (event.event_type === 'turn_stalled') continue;
|
|
349
|
+
if (event.event_type === 'turn_stalled' || event.event_type === 'turn_start_failed') continue;
|
|
204
350
|
const timestamp = Date.parse(event.timestamp || '');
|
|
205
351
|
if (!Number.isFinite(timestamp)) continue;
|
|
206
352
|
if (timestamp < startedAt) continue;
|