agentxchain 2.145.0 → 2.146.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dashboard/app.js CHANGED
@@ -15,6 +15,7 @@ import { render as renderCrossRepo } from './components/cross-repo.js';
15
15
  import { render as renderDelegations } from './components/delegations.js';
16
16
  import { render as renderBlockers } from './components/blockers.js';
17
17
  import { render as renderArtifacts } from './components/artifacts.js';
18
+ import { render as renderNotifications } from './components/notifications.js';
18
19
  import { render as renderMission } from './components/mission.js';
19
20
  import { render as renderChain } from './components/chain.js';
20
21
  import { render as renderRunHistory } from './components/run-history.js';
@@ -31,6 +32,7 @@ const VIEWS = {
31
32
  delegations: { fetch: ['state', 'history'], render: renderDelegations },
32
33
  ledger: { fetch: ['state', 'ledger', 'coordinatorState', 'coordinatorLedger', 'repoDecisionsSummary'], render: renderLedger },
33
34
  hooks: { fetch: ['audit', 'annotations', 'coordinatorAudit', 'coordinatorAnnotations'], render: renderHooks },
35
+ notifications: { fetch: ['notifications'], render: renderNotifications },
34
36
  blocked: { fetch: ['state', 'audit', 'coordinatorState', 'coordinatorAudit', 'coordinatorBlockers', 'coordinatorRepoStatusRows', 'gateActions'], render: renderBlocked },
35
37
  gate: { fetch: ['state', 'history', 'coordinatorState', 'coordinatorHistory', 'coordinatorBarriers', 'gateActions'], render: renderGate },
36
38
  initiative: { fetch: ['coordinatorState', 'coordinatorBarriers', 'barrierLedger', 'coordinatorBlockers', 'coordinatorRepoStatusRows'], render: renderInitiative },
@@ -62,6 +64,7 @@ const API_MAP = {
62
64
  coordinatorBlockers: '/api/coordinator/blockers',
63
65
  coordinatorRepoStatusRows: '/api/coordinator/repo-status',
64
66
  workflowKitArtifacts: '/api/workflow-kit-artifacts',
67
+ notifications: '/api/notifications',
65
68
  missions: '/api/missions',
66
69
  plans: '/api/plans',
67
70
  chainReports: '/api/chain-reports',
@@ -0,0 +1,127 @@
1
+ function esc(str) {
2
+ if (str == null) return '';
3
+ return String(str)
4
+ .replace(/&/g, '&')
5
+ .replace(/</g, '&lt;')
6
+ .replace(/>/g, '&gt;')
7
+ .replace(/"/g, '&quot;')
8
+ .replace(/'/g, '&#39;');
9
+ }
10
+
11
+ function badge(label, color = 'var(--text-dim)') {
12
+ return `<span class="badge" style="color:${color};border-color:${color}">${esc(label)}</span>`;
13
+ }
14
+
15
+ function formatResult(entry) {
16
+ if (entry?.delivered) return badge('delivered', 'var(--green)');
17
+ if (entry?.timed_out) return badge('timed out', 'var(--yellow)');
18
+ return badge('failed', 'var(--red)');
19
+ }
20
+
21
+ function renderWebhookRow(webhook) {
22
+ return `<tr>
23
+ <td class="mono">${esc(webhook.name)}</td>
24
+ <td>${esc(webhook.timeout_ms)}</td>
25
+ <td>${esc(webhook.event_count)}</td>
26
+ <td><span class="mono">${esc((webhook.events || []).join(', '))}</span></td>
27
+ </tr>`;
28
+ }
29
+
30
+ function renderAuditRow(entry) {
31
+ const rowStyle = entry?.delivered ? '' : ' style="border-left:3px solid var(--red)"';
32
+ const statusCode = entry?.status_code == null ? '—' : String(entry.status_code);
33
+ const duration = entry?.duration_ms == null ? '—' : `${entry.duration_ms}ms`;
34
+ return `<tr${rowStyle}>
35
+ <td class="mono">${esc(entry?.emitted_at || '—')}</td>
36
+ <td><span class="mono">${esc(entry?.event_type || '—')}</span></td>
37
+ <td class="mono">${esc(entry?.notification_name || '—')}</td>
38
+ <td>${formatResult(entry)}</td>
39
+ <td>${esc(statusCode)}</td>
40
+ <td>${esc(duration)}</td>
41
+ <td>${esc(entry?.message || '—')}</td>
42
+ </tr>`;
43
+ }
44
+
45
+ export function render({ notifications }) {
46
+ if (!notifications) {
47
+ return `<div class="placeholder"><h2>Notifications</h2><p>No notification data available.</p></div>`;
48
+ }
49
+
50
+ if (notifications.ok === false) {
51
+ const hint = notifications.code === 'config_missing'
52
+ ? ' Run <code>agentxchain init --governed</code> to get started.'
53
+ : '';
54
+ return `<div class="placeholder"><h2>Notifications</h2><p>${esc(notifications.error || 'Failed to load notification data.')}${hint}</p></div>`;
55
+ }
56
+
57
+ const recent = Array.isArray(notifications.recent) ? notifications.recent : [];
58
+ const webhooks = Array.isArray(notifications.webhooks) ? notifications.webhooks : [];
59
+ const summary = notifications.summary || {};
60
+
61
+ if (!notifications.configured && recent.length === 0) {
62
+ return `<div class="placeholder"><h2>Notifications</h2><p>No <code>notifications.webhooks</code> are configured and no delivery audit entries exist yet.</p></div>`;
63
+ }
64
+
65
+ let html = `<div class="notifications-view"><div class="run-header"><div class="run-meta">`;
66
+ html += notifications.configured
67
+ ? badge(`${webhooks.length} webhook${webhooks.length === 1 ? '' : 's'} configured`, 'var(--green)')
68
+ : badge('not currently configured', 'var(--yellow)');
69
+ html += badge(`${summary.total_attempts || 0} attempts`, 'var(--accent)');
70
+ if ((summary.failed || 0) > 0) {
71
+ html += badge(`${summary.failed} failed`, 'var(--red)');
72
+ }
73
+ if ((summary.timed_out || 0) > 0) {
74
+ html += badge(`${summary.timed_out} timed out`, 'var(--yellow)');
75
+ }
76
+ if (notifications.approval_sla?.enabled) {
77
+ html += badge(`approval SLA: ${(notifications.approval_sla.reminder_after_seconds || []).join(', ')}s`, 'var(--accent)');
78
+ }
79
+ html += `</div></div>`;
80
+
81
+ if (webhooks.length > 0) {
82
+ html += `<div class="section"><h3>Notification Targets</h3>
83
+ <table class="data-table">
84
+ <thead>
85
+ <tr>
86
+ <th>Name</th>
87
+ <th>Timeout</th>
88
+ <th>Events</th>
89
+ <th>Subscribed Event Types</th>
90
+ </tr>
91
+ </thead>
92
+ <tbody>${webhooks.map(renderWebhookRow).join('')}</tbody>
93
+ </table>
94
+ </div>`;
95
+ }
96
+
97
+ html += `<div class="section"><h3>Delivery Summary</h3>
98
+ <p><strong>Delivered:</strong> ${esc(summary.delivered || 0)}<br>
99
+ <strong>Failed:</strong> ${esc(summary.failed || 0)}<br>
100
+ <strong>Last emitted:</strong> ${esc(summary.last_emitted_at || '—')}<br>
101
+ <strong>Last failure:</strong> ${esc(summary.last_failure_at || '—')}</p>
102
+ </div>`;
103
+
104
+ if (recent.length === 0) {
105
+ html += `<div class="section"><h3>Recent Delivery Attempts</h3><p style="color:var(--text-dim)">No notification deliveries recorded yet.</p></div>`;
106
+ } else {
107
+ html += `<div class="section"><h3>Recent Delivery Attempts</h3>
108
+ <table class="data-table">
109
+ <thead>
110
+ <tr>
111
+ <th>Emitted</th>
112
+ <th>Event</th>
113
+ <th>Target</th>
114
+ <th>Result</th>
115
+ <th>Status</th>
116
+ <th>Duration</th>
117
+ <th>Message</th>
118
+ </tr>
119
+ </thead>
120
+ <tbody>${recent.map(renderAuditRow).join('')}</tbody>
121
+ </table>
122
+ </div>`;
123
+ }
124
+
125
+ html += `</div>`;
126
+ return html;
127
+ }
@@ -401,6 +401,7 @@
401
401
  <a href="#delegations">Delegations</a>
402
402
  <a href="#ledger">Decisions</a>
403
403
  <a href="#hooks">Hooks</a>
404
+ <a href="#notifications">Notifications</a>
404
405
  <a href="#blocked">Blocked</a>
405
406
  <a href="#gate">Gates</a>
406
407
  <a href="#blockers">Blockers</a>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentxchain",
3
- "version": "2.145.0",
3
+ "version": "2.146.0",
4
4
  "description": "CLI for AgentXchain — governed multi-agent software delivery",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env bash
2
2
  # Release downstream truth — run after all downstream surfaces are updated.
3
- # Verifies: GitHub release exists, Homebrew tap SHA and URL match registry tarball.
3
+ # Verifies: GitHub release is published on the expected tag URL, Homebrew tap SHA and URL match registry tarball.
4
4
  # Usage: bash scripts/release-downstream-truth.sh --target-version <semver>
5
5
  set -uo pipefail
6
6
 
@@ -91,22 +91,30 @@ echo "[1/3] GitHub release"
91
91
  if ! command -v gh >/dev/null 2>&1; then
92
92
  fail "gh CLI not available — cannot verify GitHub release"
93
93
  else
94
- GH_FOUND=false
94
+ GH_READY=false
95
+ EXPECTED_GH_URL="https://github.com/shivamtiwari93/agentXchain.dev/releases/tag/v${TARGET_VERSION}"
95
96
  for attempt in $(seq 1 "$RETRY_ATTEMPTS"); do
96
97
  GH_TAG="$(gh release view "v${TARGET_VERSION}" --json tagName -q '.tagName' 2>/dev/null || true)"
97
- if [[ "$GH_TAG" == "v${TARGET_VERSION}" ]]; then
98
- GH_FOUND=true
98
+ GH_DRAFT="$(gh release view "v${TARGET_VERSION}" --json isDraft -q '.isDraft' 2>/dev/null || true)"
99
+ GH_URL="$(gh release view "v${TARGET_VERSION}" --json url -q '.url' 2>/dev/null || true)"
100
+ GH_PUBLISHED_AT="$(gh release view "v${TARGET_VERSION}" --json publishedAt -q '.publishedAt' 2>/dev/null || true)"
101
+ if [[ "$GH_TAG" == "v${TARGET_VERSION}" ]] \
102
+ && [[ "$GH_DRAFT" == "false" ]] \
103
+ && [[ "$GH_URL" == "$EXPECTED_GH_URL" ]] \
104
+ && [[ -n "$GH_PUBLISHED_AT" ]] \
105
+ && [[ "$GH_PUBLISHED_AT" != "null" ]]; then
106
+ GH_READY=true
99
107
  break
100
108
  fi
101
109
  if [[ "$attempt" -lt "$RETRY_ATTEMPTS" ]]; then
102
- echo " INFO: GitHub release not found (attempt ${attempt}/${RETRY_ATTEMPTS}); retrying in ${RETRY_DELAY_SECONDS}s..."
110
+ echo " INFO: GitHub release not ready (attempt ${attempt}/${RETRY_ATTEMPTS}); retrying in ${RETRY_DELAY_SECONDS}s..."
103
111
  sleep "$RETRY_DELAY_SECONDS"
104
112
  fi
105
113
  done
106
- if $GH_FOUND; then
107
- pass "GitHub release v${TARGET_VERSION} exists"
114
+ if $GH_READY; then
115
+ pass "GitHub release v${TARGET_VERSION} is published on the tagged release URL"
108
116
  else
109
- fail "GitHub release v${TARGET_VERSION} not found after ${RETRY_ATTEMPTS} attempts"
117
+ fail "GitHub release v${TARGET_VERSION} is not fully published (tag=${GH_TAG:-<missing>} draft=${GH_DRAFT:-<missing>} url=${GH_URL:-<missing>} publishedAt=${GH_PUBLISHED_AT:-<missing>})"
110
118
  fi
111
119
  fi
112
120
 
@@ -103,6 +103,68 @@ const DEFAULT_GOVERNED_LOCAL_DEV_RUNTIME = Object.freeze({
103
103
  prompt_transport: 'stdin',
104
104
  });
105
105
 
106
+ const GOVERNED_GITIGNORE_LINES = Object.freeze([
107
+ '.env',
108
+ '.agentxchain/staging/',
109
+ '.agentxchain/dispatch/',
110
+ '.agentxchain/transactions/',
111
+ '.agentxchain/state.json',
112
+ '.agentxchain/session.json',
113
+ '.agentxchain/history.jsonl',
114
+ '.agentxchain/decision-ledger.jsonl',
115
+ '.agentxchain/repo-decisions.jsonl',
116
+ '.agentxchain/lock.json',
117
+ '.agentxchain/hook-audit.jsonl',
118
+ '.agentxchain/hook-annotations.jsonl',
119
+ '.agentxchain/run-history.jsonl',
120
+ '.agentxchain/events.jsonl',
121
+ '.agentxchain/notification-audit.jsonl',
122
+ '.agentxchain/schedule-state.json',
123
+ '.agentxchain/schedule-daemon.json',
124
+ '.agentxchain/continuous-session.json',
125
+ '.agentxchain/human-escalations.jsonl',
126
+ '.agentxchain/sla-reminders.json',
127
+ '.agentxchain/SESSION_RECOVERY.md',
128
+ '.agentxchain/migration-report.md',
129
+ '.agentxchain/intake/',
130
+ '.agentxchain/missions/',
131
+ '.agentxchain/multirepo/',
132
+ '.agentxchain/reviews/',
133
+ '.agentxchain/reports/',
134
+ '.agentxchain/proposed/',
135
+ 'TALK.md',
136
+ 'HUMAN_TASKS.md',
137
+ ]);
138
+
139
+ const GOVERNED_GITIGNORE_CONTENT = [
140
+ '# AgentXchain — secrets',
141
+ '.env',
142
+ '',
143
+ '# AgentXchain — transient execution artifacts (never commit)',
144
+ '.agentxchain/staging/',
145
+ '.agentxchain/dispatch/',
146
+ '.agentxchain/transactions/',
147
+ '',
148
+ '# AgentXchain — framework-owned state (gitignored by default in fresh scaffolds)',
149
+ '# These files remain durable on disk and in export/restore, but defaulting them',
150
+ '# out of raw `git status` reduces operator noise. Existing tracked copies still',
151
+ '# appear dirty until the repo explicitly untracks them.',
152
+ ...GOVERNED_GITIGNORE_LINES.slice(4),
153
+ ].join('\n') + '\n';
154
+
155
+ function ensureGitignoreEntries(gitignorePath, content, requiredEntries) {
156
+ if (!existsSync(gitignorePath)) {
157
+ writeFileSync(gitignorePath, content);
158
+ return;
159
+ }
160
+ const existingIgnore = readFileSync(gitignorePath, 'utf8');
161
+ const existingLines = new Set(existingIgnore.split(/\r?\n/));
162
+ const missing = requiredEntries.filter(entry => !existingLines.has(entry));
163
+ if (missing.length === 0) return;
164
+ const prefix = existingIgnore.endsWith('\n') || existingIgnore.length === 0 ? '' : '\n';
165
+ writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
166
+ }
167
+
106
168
  const GOVERNED_RUNTIMES = {
107
169
  'manual-pm': { type: 'manual' },
108
170
  'manual-dev': { type: 'manual' },
@@ -833,28 +895,10 @@ export function scaffoldGoverned(dir, projectName, projectId, templateId = 'gene
833
895
  // TALK.md
834
896
  writeFileSync(join(dir, 'TALK.md'), `# ${projectName} — Team Talk File\n\nCanonical human-readable handoff log for all agents.\n\n---\n\n`);
835
897
 
836
- // .gitignore additions with inline comments so operators know what to commit vs. ignore
898
+ // .gitignore additions with inline comments so fresh governed repos keep
899
+ // framework-owned runtime state out of raw git status by default.
837
900
  const gitignorePath = join(dir, '.gitignore');
838
- const gitignoreContent = [
839
- '# AgentXchain — secrets',
840
- '.env',
841
- '',
842
- '# AgentXchain — transient execution artifacts (never commit)',
843
- '.agentxchain/staging/',
844
- '.agentxchain/dispatch/',
845
- '.agentxchain/transactions/',
846
- ].join('\n') + '\n';
847
- const requiredPaths = ['.env', '.agentxchain/staging/', '.agentxchain/dispatch/', '.agentxchain/transactions/'];
848
- if (!existsSync(gitignorePath)) {
849
- writeFileSync(gitignorePath, gitignoreContent);
850
- } else {
851
- const existingIgnore = readFileSync(gitignorePath, 'utf8');
852
- const missing = requiredPaths.filter(entry => !existingIgnore.split(/\r?\n/).includes(entry));
853
- if (missing.length > 0) {
854
- const prefix = existingIgnore.endsWith('\n') ? '' : '\n';
855
- writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
856
- }
857
- }
901
+ ensureGitignoreEntries(gitignorePath, GOVERNED_GITIGNORE_CONTENT, GOVERNED_GITIGNORE_LINES);
858
902
 
859
903
  return { config, state, scaffoldWorkflowKitConfig };
860
904
  }
@@ -1251,16 +1295,7 @@ export async function initCommand(opts) {
1251
1295
  writeFileSync(join(dir, 'HUMAN_TASKS.md'), '# Human Tasks\n\n(Agents append tasks here when they need human action.)\n');
1252
1296
  const gitignorePath = join(dir, '.gitignore');
1253
1297
  const requiredIgnores = ['.env', '.agentxchain-trigger.json', '.agentxchain-prompts/', '.agentxchain-workspaces/', '.agentxchain-watch.pid', '.agentxchain-autonudge.state'];
1254
- if (!existsSync(gitignorePath)) {
1255
- writeFileSync(gitignorePath, requiredIgnores.join('\n') + '\n');
1256
- } else {
1257
- const existingIgnore = readFileSync(gitignorePath, 'utf8');
1258
- const missing = requiredIgnores.filter(entry => !existingIgnore.split(/\r?\n/).includes(entry));
1259
- if (missing.length > 0) {
1260
- const prefix = existingIgnore.endsWith('\n') ? '' : '\n';
1261
- writeFileSync(gitignorePath, existingIgnore + prefix + missing.join('\n') + '\n');
1262
- }
1263
- }
1298
+ ensureGitignoreEntries(gitignorePath, requiredIgnores.join('\n') + '\n', requiredIgnores);
1264
1299
 
1265
1300
  // .planning/ structure
1266
1301
  mkdirSync(join(dir, '.planning', 'research'), { recursive: true });
@@ -13,13 +13,16 @@
13
13
  import chalk from 'chalk';
14
14
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
15
15
  import { join, dirname } from 'path';
16
- import { loadProjectContext } from '../lib/config.js';
16
+ import { loadProjectContext, loadProjectState } from '../lib/config.js';
17
17
  import {
18
18
  assignGovernedTurn,
19
19
  getActiveTurns,
20
20
  getActiveTurnCount,
21
21
  reactivateGovernedRun,
22
22
  detectStateBundleDesync,
23
+ normalizeGovernedStateShape,
24
+ reconcileApprovalPausesWithConfig,
25
+ reconcileRecoveryActionsWithConfig,
23
26
  STATE_PATH,
24
27
  HISTORY_PATH,
25
28
  LEDGER_PATH,
@@ -27,7 +30,6 @@ import {
27
30
  import { writeDispatchBundle } from '../lib/dispatch-bundle.js';
28
31
  import { getDispatchTurnDir } from '../lib/turn-paths.js';
29
32
  import { consumeNextApprovedIntent } from '../lib/intake.js';
30
- import { loadProjectState } from '../lib/config.js';
31
33
  import { deriveRecoveryDescriptor } from '../lib/blocked-state.js';
32
34
  import { deriveRecommendedContinuityAction } from '../lib/continuity-status.js';
33
35
  import { readSessionCheckpoint, writeSessionCheckpoint, captureBaselineRef, SESSION_PATH } from '../lib/session-checkpoint.js';
@@ -178,7 +180,20 @@ export async function restartCommand(opts) {
178
180
  process.exit(1);
179
181
  }
180
182
 
181
- const state = JSON.parse(readFileSync(statePath, 'utf8'));
183
+ let state;
184
+ try {
185
+ const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
186
+ const normalized = normalizeGovernedStateShape(parsed);
187
+ const reconciledApprovals = reconcileApprovalPausesWithConfig(normalized.state, config);
188
+ const reconciledRecovery = reconcileRecoveryActionsWithConfig(reconciledApprovals.state, config);
189
+ state = reconciledRecovery.state;
190
+ if (normalized.changed || reconciledApprovals.changed || reconciledRecovery.changed) {
191
+ writeFileSync(statePath, JSON.stringify(state, null, 2));
192
+ }
193
+ } catch {
194
+ console.log(chalk.red('No valid governed state.json found.'));
195
+ process.exit(1);
196
+ }
182
197
 
183
198
  // Load checkpoint (optional — restart can work without it, just with less context)
184
199
  const checkpoint = readSessionCheckpoint(root);
@@ -78,6 +78,10 @@ export async function resumeCommand(opts) {
78
78
 
79
79
  const staleReconciliation = reconcileStaleTurns(root, state, config);
80
80
  state = staleReconciliation.state || state;
81
+ if (staleReconciliation.ghost_turns.length > 0) {
82
+ printGhostTurnRecovery(staleReconciliation.ghost_turns);
83
+ process.exit(1);
84
+ }
81
85
  if (staleReconciliation.stale_turns.length > 0) {
82
86
  printStaleTurnRecovery(staleReconciliation.stale_turns);
83
87
  process.exit(1);
@@ -359,6 +363,19 @@ export async function resumeCommand(opts) {
359
363
  printDispatchSummary(state, config);
360
364
  }
361
365
 
366
+ function printGhostTurnRecovery(ghostTurns) {
367
+ console.log(chalk.red.bold('Ghost turn detected — subprocess never started.'));
368
+ console.log('');
369
+ for (const ghost of ghostTurns) {
370
+ const secs = Math.floor(ghost.running_ms / 1000);
371
+ console.log(` Turn: ${ghost.turn_id} (${ghost.role})`);
372
+ console.log(` Runtime: ${ghost.runtime_id}`);
373
+ console.log(` Age: ${secs}s with no subprocess output`);
374
+ console.log(` Recover: ${chalk.cyan(`agentxchain reissue-turn --turn ${ghost.turn_id} --reason ghost`)}`);
375
+ console.log('');
376
+ }
377
+ }
378
+
362
379
  function printStaleTurnRecovery(staleTurns) {
363
380
  console.log(chalk.red.bold('Stale turn detected.'));
364
381
  console.log('');
@@ -136,6 +136,10 @@ function loadStatusContext(dir = process.cwd()) {
136
136
  function renderGovernedStatus(context, opts) {
137
137
  const { root, config, version } = context;
138
138
  let state = loadProjectState(root, config);
139
+ const staleReconciliation = reconcileStaleTurns(root, state, config);
140
+ state = staleReconciliation.state || state;
141
+ const staleTurns = staleReconciliation.stale_turns;
142
+ const ghostTurns = staleReconciliation.ghost_turns || [];
139
143
  const stateRunId = state?.run_id || readRawStateRunId(root, config);
140
144
  const continuity = getContinuityStatus(root, state);
141
145
  const connectorHealth = getConnectorHealth(root, config, state);
@@ -166,11 +170,6 @@ function renderGovernedStatus(context, opts) {
166
170
  // Coordinator warning surfacing — DEC-COORD-RETRY-PROJECTION-EVENT-001
167
171
  const coordinatorWarnings = readCoordinatorWarnings(root, { runId: stateRunId || null });
168
172
 
169
- // BUG-47: detect stale running turns and emit turn_stalled events
170
- const staleReconciliation = reconcileStaleTurns(root, state, config);
171
- state = staleReconciliation.state || state;
172
- const staleTurns = staleReconciliation.stale_turns;
173
-
174
173
  if (opts.json) {
175
174
  const dashPid = getDashboardPid(root);
176
175
  const dashSession = getDashboardSession(root);
@@ -209,6 +208,7 @@ function renderGovernedStatus(context, opts) {
209
208
  bundle_integrity: detectStateBundleDesync(root, state),
210
209
  coordinator_warnings: coordinatorWarnings,
211
210
  stale_turns: staleTurns,
211
+ ghost_turns: ghostTurns,
212
212
  }, null, 2));
213
213
  return;
214
214
  }
@@ -453,6 +453,19 @@ function renderGovernedStatus(context, opts) {
453
453
  }
454
454
  }
455
455
 
456
+ // BUG-51: Ghost turn warning (subprocess never started)
457
+ if (ghostTurns.length > 0) {
458
+ console.log('');
459
+ for (const gt of ghostTurns) {
460
+ const secs = Math.floor(gt.running_ms / 1000);
461
+ console.log(` ${chalk.red.bold('⚠ Ghost turn detected — subprocess never started')}`);
462
+ console.log(` ${chalk.dim('Turn:')} ${gt.turn_id} (${gt.role})`);
463
+ console.log(` ${chalk.dim('Runtime:')} ${gt.runtime_id}`);
464
+ console.log(` ${chalk.dim('Age:')} ${secs}s with no subprocess output`);
465
+ console.log(` ${chalk.dim('Recover:')} ${chalk.cyan(`agentxchain reissue-turn --turn ${gt.turn_id} --reason ghost`)}`);
466
+ }
467
+ }
468
+
456
469
  // BUG-47: Stale turn warning
457
470
  if (staleTurns.length > 0) {
458
471
  console.log('');
@@ -97,6 +97,10 @@ export async function stepCommand(opts) {
97
97
 
98
98
  const staleReconciliation = reconcileStaleTurns(root, state, config);
99
99
  state = staleReconciliation.state || state;
100
+ if (staleReconciliation.ghost_turns.length > 0) {
101
+ printGhostTurnRecovery(staleReconciliation.ghost_turns);
102
+ process.exit(1);
103
+ }
100
104
  if (staleReconciliation.stale_turns.length > 0) {
101
105
  printStaleTurnRecovery(staleReconciliation.stale_turns);
102
106
  process.exit(1);
@@ -909,6 +913,19 @@ export async function stepCommand(opts) {
909
913
  }
910
914
  }
911
915
 
916
+ function printGhostTurnRecovery(ghostTurns) {
917
+ console.log(chalk.red.bold('Ghost turn detected — subprocess never started.'));
918
+ console.log('');
919
+ for (const ghost of ghostTurns) {
920
+ const secs = Math.floor(ghost.running_ms / 1000);
921
+ console.log(` Turn: ${ghost.turn_id} (${ghost.role})`);
922
+ console.log(` Runtime: ${ghost.runtime_id}`);
923
+ console.log(` Age: ${secs}s with no subprocess output`);
924
+ console.log(` Recover: ${chalk.cyan(`agentxchain reissue-turn --turn ${ghost.turn_id} --reason ghost`)}`);
925
+ console.log('');
926
+ }
927
+ }
928
+
912
929
  function printStaleTurnRecovery(staleTurns) {
913
930
  console.log(chalk.red.bold('Stale turn detected.'));
914
931
  console.log('');
package/src/lib/config.js CHANGED
@@ -6,6 +6,7 @@ import { safeWriteJson } from './safe-write.js';
6
6
  import {
7
7
  normalizeGovernedStateShape,
8
8
  getActiveTurn,
9
+ reconcileApprovalPausesWithConfig,
9
10
  reconcileBudgetStatusWithConfig,
10
11
  reconcileRecoveryActionsWithConfig,
11
12
  } from './governed-state.js';
@@ -153,11 +154,13 @@ export function loadProjectState(root, config) {
153
154
  if (config?.protocol_mode === 'governed') {
154
155
  const normalized = normalizeGovernedStateShape(stateData);
155
156
  stateData = normalized.state;
157
+ const reconciledApprovals = reconcileApprovalPausesWithConfig(stateData, config);
158
+ stateData = reconciledApprovals.state;
156
159
  const reconciledBudget = reconcileBudgetStatusWithConfig(stateData, config);
157
160
  stateData = reconciledBudget.state;
158
161
  const reconciledRecovery = reconcileRecoveryActionsWithConfig(stateData, config);
159
162
  stateData = reconciledRecovery.state;
160
- if (normalized.changed || reconciledBudget.changed || reconciledRecovery.changed) {
163
+ if (normalized.changed || reconciledApprovals.changed || reconciledBudget.changed || reconciledRecovery.changed) {
161
164
  safeWriteJson(filePath, stateData);
162
165
  }
163
166
  }
@@ -1,5 +1,5 @@
1
1
  import { dirname } from 'path';
2
- import { loadProjectContext } from '../config.js';
2
+ import { loadProjectContext, loadProjectState } from '../config.js';
3
3
  import { approvePhaseTransition, approveRunCompletion } from '../governed-state.js';
4
4
  import { deriveGovernedRunNextActions, deriveRecoveryDescriptor } from '../blocked-state.js';
5
5
  import {
@@ -205,10 +205,16 @@ function approveCoordinatorGate(workspacePath, state, config) {
205
205
 
206
206
  export function approvePendingDashboardGate(agentxchainDir) {
207
207
  const workspacePath = dirname(agentxchainDir);
208
- const repoState = readJsonFile(agentxchainDir, 'state.json');
208
+ const context = loadProjectContext(workspacePath);
209
+
210
+ // Use loadProjectState to get reconciled state — approval-pause repair
211
+ // may surface a pending_run_completion from an orphaned blocked_on marker,
212
+ // and we must route on the reconciled truth, not the raw state.json.
213
+ const repoState = (context?.config?.protocol_mode === 'governed'
214
+ ? loadProjectState(workspacePath, context.config)
215
+ : null) || readJsonFile(agentxchainDir, 'state.json');
209
216
 
210
217
  if (repoState?.pending_phase_transition || repoState?.pending_run_completion) {
211
- const context = loadProjectContext(workspacePath);
212
218
  return approveRepoGate(workspacePath, context?.config, repoState);
213
219
  }
214
220
 
@@ -23,6 +23,7 @@ import { readCoordinatorRepoStatusRows } from './coordinator-repo-status.js';
23
23
  import { readCoordinatorTimeoutStatus } from './coordinator-timeout-status.js';
24
24
  import { readAggregatedCoordinatorEvents, watchChildRepoEvents } from './coordinator-event-aggregation.js';
25
25
  import { readWorkflowKitArtifacts } from './workflow-kit-artifacts.js';
26
+ import { readNotificationSnapshot } from './notifications-reader.js';
26
27
  import { readConnectorHealthSnapshot } from './connectors.js';
27
28
  import { readTimeoutStatus } from './timeout-status.js';
28
29
  import { queryRunHistory } from '../run-history.js';
@@ -431,6 +432,16 @@ export function createBridgeServer({ agentxchainDir, dashboardDir, port = 3847,
431
432
  return;
432
433
  }
433
434
 
435
+ if (pathname === '/api/notifications') {
436
+ if (replayMode) {
437
+ writeJson(res, 200, { ok: true, replay_mode: true, message: 'Notification audit is live-only and not available in replay mode.' });
438
+ return;
439
+ }
440
+ const result = readNotificationSnapshot(workspacePath);
441
+ writeJson(res, result.status, result.body);
442
+ return;
443
+ }
444
+
434
445
  if (pathname === '/api/connectors') {
435
446
  const result = readConnectorHealthSnapshot(workspacePath);
436
447
  writeJson(res, result.status, result.body);
@@ -0,0 +1,91 @@
1
+ import { loadConfig, loadProjectContext } from '../config.js';
2
+ import { readJsonlFile } from './state-reader.js';
3
+
4
+ function summarizeAuditEntries(entries) {
5
+ const summary = {
6
+ total_attempts: entries.length,
7
+ delivered: 0,
8
+ failed: 0,
9
+ timed_out: 0,
10
+ last_emitted_at: null,
11
+ last_failure_at: null,
12
+ };
13
+
14
+ for (const entry of entries) {
15
+ if (entry?.delivered === true) {
16
+ summary.delivered += 1;
17
+ } else {
18
+ summary.failed += 1;
19
+ if (!summary.last_failure_at || String(entry?.emitted_at || '') > summary.last_failure_at) {
20
+ summary.last_failure_at = entry?.emitted_at || null;
21
+ }
22
+ }
23
+ if (entry?.timed_out === true) {
24
+ summary.timed_out += 1;
25
+ }
26
+ if (!summary.last_emitted_at || String(entry?.emitted_at || '') > summary.last_emitted_at) {
27
+ summary.last_emitted_at = entry?.emitted_at || null;
28
+ }
29
+ }
30
+
31
+ return summary;
32
+ }
33
+
34
+ function normalizeWebhook(webhook) {
35
+ return {
36
+ name: webhook.name,
37
+ timeout_ms: webhook.timeout_ms,
38
+ event_count: Array.isArray(webhook.events) ? webhook.events.length : 0,
39
+ events: Array.isArray(webhook.events) ? webhook.events : [],
40
+ };
41
+ }
42
+
43
+ export function readNotificationSnapshot(workspacePath) {
44
+ const context = loadProjectContext(workspacePath);
45
+ const governedContext = context?.config ? context : null;
46
+ const legacyConfigResult = governedContext ? null : loadConfig(workspacePath);
47
+ if (!governedContext && !legacyConfigResult) {
48
+ return {
49
+ ok: false,
50
+ status: 404,
51
+ body: {
52
+ ok: false,
53
+ code: 'config_missing',
54
+ error: 'Project config not found. Run `agentxchain init --governed` first.',
55
+ },
56
+ };
57
+ }
58
+
59
+ const root = governedContext?.root || legacyConfigResult.root;
60
+ const config = governedContext?.config || legacyConfigResult.config;
61
+ const notifications = config?.notifications || {};
62
+ const webhooks = Array.isArray(notifications.webhooks)
63
+ ? notifications.webhooks.map(normalizeWebhook)
64
+ : [];
65
+ const configured = webhooks.length > 0;
66
+ const approvalSla = notifications.approval_sla
67
+ ? {
68
+ enabled: notifications.approval_sla.enabled !== false,
69
+ reminder_after_seconds: Array.isArray(notifications.approval_sla.reminder_after_seconds)
70
+ ? notifications.approval_sla.reminder_after_seconds
71
+ : [],
72
+ }
73
+ : null;
74
+
75
+ const auditEntries = (readJsonlFile(`${root}/.agentxchain`, 'notification-audit.jsonl') || [])
76
+ .slice()
77
+ .sort((a, b) => String(b?.emitted_at || '').localeCompare(String(a?.emitted_at || '')));
78
+
79
+ return {
80
+ ok: true,
81
+ status: 200,
82
+ body: {
83
+ ok: true,
84
+ configured,
85
+ webhooks,
86
+ approval_sla: approvalSla,
87
+ summary: summarizeAuditEntries(auditEntries),
88
+ recent: auditEntries.slice(0, 10),
89
+ },
90
+ };
91
+ }
@@ -12,8 +12,9 @@ import {
12
12
  deriveGovernedRunNextActions,
13
13
  deriveRuntimeBlockedGuidance,
14
14
  } from '../blocked-state.js';
15
- import { loadProjectContext } from '../config.js';
15
+ import { loadProjectContext, loadProjectState } from '../config.js';
16
16
  import { getContinuityStatus } from '../continuity-status.js';
17
+ import { reconcileStaleTurns } from '../stale-turn-watchdog.js';
17
18
  import { readRepoDecisions, summarizeRepoDecisions } from '../repo-decisions.js';
18
19
  import { readAllDispatchProgress } from '../dispatch-progress.js';
19
20
 
@@ -136,10 +137,21 @@ function enrichGovernedState(agentxchainDir, state) {
136
137
  return state;
137
138
  }
138
139
 
140
+ // Use loadProjectState to get reconciled state (approval-pause repair,
141
+ // budget reconciliation, recovery-action reconciliation applied and
142
+ // persisted to disk). Then apply stale-turn reconciliation so recovery
143
+ // and next-action surfaces reflect the post-watchdog truth — matching
144
+ // the same ordering used by the CLI `status` command.
145
+ let reconciledState = loadProjectState(workspacePath, context.config) || state;
146
+ const staleResult = reconcileStaleTurns(workspacePath, reconciledState, context.config);
147
+ if (staleResult.changed) {
148
+ reconciledState = staleResult.state;
149
+ }
150
+
139
151
  return {
140
- ...state,
141
- runtime_guidance: deriveRuntimeBlockedGuidance(state, context.config),
142
- next_actions: deriveGovernedRunNextActions(state, context.config),
152
+ ...reconciledState,
153
+ runtime_guidance: deriveRuntimeBlockedGuidance(reconciledState, context.config),
154
+ next_actions: deriveGovernedRunNextActions(reconciledState, context.config),
143
155
  dispatch_progress: readAllDispatchProgress(workspacePath),
144
156
  };
145
157
  }
@@ -1894,6 +1894,137 @@ export function reconcileRecoveryActionsWithConfig(state, config) {
1894
1894
  return { state: nextState, changed };
1895
1895
  }
1896
1896
 
1897
+ function inferApprovalPauseFromState(state, config) {
1898
+ if (!state || typeof state !== 'object' || !config) {
1899
+ return null;
1900
+ }
1901
+
1902
+ if (state.pending_run_completion?.gate) {
1903
+ return {
1904
+ gateType: 'run_completion',
1905
+ gateId: state.pending_run_completion.gate,
1906
+ pendingField: 'pending_run_completion',
1907
+ pendingValue: state.pending_run_completion,
1908
+ typedReason: 'pending_run_completion',
1909
+ recoveryAction: 'agentxchain approve-completion',
1910
+ };
1911
+ }
1912
+
1913
+ if (state.pending_phase_transition?.gate) {
1914
+ return {
1915
+ gateType: 'phase_transition',
1916
+ gateId: state.pending_phase_transition.gate,
1917
+ pendingField: 'pending_phase_transition',
1918
+ pendingValue: state.pending_phase_transition,
1919
+ typedReason: 'pending_phase_transition',
1920
+ recoveryAction: 'agentxchain approve-transition',
1921
+ };
1922
+ }
1923
+
1924
+ // Approval waits are post-turn pause states. If a turn is still retained,
1925
+ // recover the turn first instead of synthesizing a gate wait from stale
1926
+ // blocked_on metadata.
1927
+ if (getActiveTurnCount(state) > 0) {
1928
+ return null;
1929
+ }
1930
+
1931
+ if (typeof state.blocked_on !== 'string' || !state.blocked_on.startsWith('human_approval:')) {
1932
+ return null;
1933
+ }
1934
+
1935
+ const gateId = state.blocked_on.slice('human_approval:'.length) || null;
1936
+ const currentRouting = config.routing?.[state.phase];
1937
+ if (!gateId || !currentRouting?.exit_gate || currentRouting.exit_gate !== gateId) {
1938
+ return null;
1939
+ }
1940
+
1941
+ const requestedByTurn = state.blocked_reason?.turn_id ?? state.last_completed_turn_id ?? null;
1942
+ const nextPhase = getNextPhase(state.phase, config.routing || {});
1943
+
1944
+ if (nextPhase) {
1945
+ return {
1946
+ gateType: 'phase_transition',
1947
+ gateId,
1948
+ pendingField: 'pending_phase_transition',
1949
+ pendingValue: {
1950
+ from: state.phase,
1951
+ to: nextPhase,
1952
+ gate: gateId,
1953
+ requested_by_turn: requestedByTurn,
1954
+ },
1955
+ typedReason: 'pending_phase_transition',
1956
+ recoveryAction: 'agentxchain approve-transition',
1957
+ };
1958
+ }
1959
+
1960
+ return {
1961
+ gateType: 'run_completion',
1962
+ gateId,
1963
+ pendingField: 'pending_run_completion',
1964
+ pendingValue: {
1965
+ gate: gateId,
1966
+ requested_by_turn: requestedByTurn,
1967
+ },
1968
+ typedReason: 'pending_run_completion',
1969
+ recoveryAction: 'agentxchain approve-completion',
1970
+ };
1971
+ }
1972
+
1973
+ export function reconcileApprovalPausesWithConfig(state, config) {
1974
+ if (!state || typeof state !== 'object' || !config) {
1975
+ return { state, changed: false };
1976
+ }
1977
+
1978
+ const inferred = inferApprovalPauseFromState(state, config);
1979
+ if (!inferred) {
1980
+ return { state, changed: false };
1981
+ }
1982
+
1983
+ let nextState = state;
1984
+ let changed = false;
1985
+
1986
+ if (!state[inferred.pendingField]) {
1987
+ nextState = {
1988
+ ...nextState,
1989
+ [inferred.pendingField]: inferred.pendingValue,
1990
+ };
1991
+ changed = true;
1992
+ }
1993
+
1994
+ if (nextState.status === 'blocked' || nextState.blocked_reason != null) {
1995
+ nextState = {
1996
+ ...nextState,
1997
+ status: 'paused',
1998
+ blocked_reason: null,
1999
+ };
2000
+ changed = true;
2001
+ }
2002
+
2003
+ const recovery = nextState.blocked_reason?.recovery;
2004
+ if (recovery && (
2005
+ recovery.typed_reason !== inferred.typedReason
2006
+ || recovery.recovery_action !== inferred.recoveryAction
2007
+ || recovery.detail !== inferred.gateId
2008
+ )) {
2009
+ nextState = {
2010
+ ...nextState,
2011
+ blocked_reason: {
2012
+ ...nextState.blocked_reason,
2013
+ recovery: {
2014
+ ...recovery,
2015
+ typed_reason: inferred.typedReason,
2016
+ recovery_action: inferred.recoveryAction,
2017
+ turn_retained: false,
2018
+ detail: inferred.gateId,
2019
+ },
2020
+ },
2021
+ };
2022
+ changed = true;
2023
+ }
2024
+
2025
+ return { state: nextState, changed };
2026
+ }
2027
+
1897
2028
  function inferBlockedReasonFromState(state) {
1898
2029
  if (!state || typeof state !== 'object') {
1899
2030
  return null;
@@ -54,6 +54,7 @@ function describeEvent(eventType, entry) {
54
54
  }
55
55
  case 'turn_checkpointed':
56
56
  case 'turn_stalled':
57
+ case 'turn_start_failed':
57
58
  return `${prefix}${eventType}${roleId ? ` [${roleId}]` : ''}`;
58
59
  case 'dispatch_progress':
59
60
  return `${prefix}${eventType}${roleId ? ` [${roleId}]` : ''}`;
@@ -1,16 +1,32 @@
1
1
  /**
2
- * Stale Turn Watchdog — BUG-47
2
+ * Stale Turn Watchdog — BUG-47 + BUG-51
3
3
  *
4
- * Lazy idle-threshold detection: if an active turn has status "running"
5
- * for >N seconds with no event log activity AND no staged result file,
6
- * report it as stalled.
4
+ * Two-tier lazy idle-threshold detection:
5
+ *
6
+ * 1. **Fast startup watchdog (BUG-51):** if an active turn has been dispatched
7
+ * for >30 seconds with NO dispatch-progress file, NO staged result, and NO
8
+ * recent events, it is a "ghost turn" — the subprocess never attached.
9
+ * Transitions to `failed_start` immediately.
10
+ *
11
+ * Design note: the watchdog intentionally keys on turn-scoped
12
+ * dispatch-progress rather than `stdout.log` existence. Dispatch-progress is
13
+ * a framework-authored signal with a stable per-turn contract across runtime
14
+ * wiring; `stdout.log` is adapter-authored visibility output and is allowed
15
+ * to be best-effort. Using dispatch-progress therefore gives us the same
16
+ * operator-facing "no first byte / no worker heartbeat" detection without
17
+ * coupling the watchdog to adapter-specific log-attachment details.
18
+ *
19
+ * 2. **Stale turn watchdog (BUG-47):** if an active turn has status "running"
20
+ * for >N minutes with no event log activity AND no staged result file,
21
+ * report it as stalled.
7
22
  *
8
23
  * Fires on CLI invocations (status, resume, step --resume) rather than
9
24
  * requiring a background daemon.
10
25
  *
11
26
  * Default thresholds:
12
- * - local_cli turns: 10 minutes
13
- * - api_proxy turns: 5 minutes
27
+ * - Startup watchdog: 30 seconds (configurable via run_loop.startup_watchdog_ms)
28
+ * - local_cli stale turns: 10 minutes
29
+ * - api_proxy stale turns: 5 minutes
14
30
  * - Configurable via run_loop.stale_turn_threshold_ms in agentxchain.json
15
31
  */
16
32
 
@@ -23,6 +39,7 @@ import { getDispatchProgressRelativePath } from './dispatch-progress.js';
23
39
 
24
40
  const DEFAULT_LOCAL_CLI_THRESHOLD_MS = 10 * 60 * 1000; // 10 minutes
25
41
  const DEFAULT_API_PROXY_THRESHOLD_MS = 5 * 60 * 1000; // 5 minutes
42
+ const DEFAULT_STARTUP_WATCHDOG_MS = 30 * 1000; // 30 seconds (BUG-51)
26
43
  const LEGACY_STAGING_PATH = '.agentxchain/staging/turn-result.json';
27
44
 
28
45
  /**
@@ -83,6 +100,72 @@ export function detectStaleTurns(root, state, config) {
83
100
  return stale;
84
101
  }
85
102
 
103
+ /**
104
+ * BUG-51: Detect ghost-dispatched turns — subprocess never started.
105
+ *
106
+ * A ghost turn is one that has been in "running" or "retrying" status for
107
+ * longer than the startup watchdog threshold (default 30s) AND has:
108
+ * - no dispatch-progress file (framework-observed proof that no subprocess
109
+ * output or heartbeat was attached)
110
+ * - no staged result file
111
+ * - no recent turn-scoped events (beyond the initial turn_dispatched)
112
+ *
113
+ * This is a stricter, faster check than detectStaleTurns (BUG-47).
114
+ * Ghost turns transition to "failed_start" rather than "stalled".
115
+ *
116
+ * @param {string} root - project root directory
117
+ * @param {object} state - current governed state
118
+ * @param {object} config - normalized config
119
+ * @returns {Array<{ turn_id: string, role: string, runtime_id: string, running_ms: number, threshold_ms: number, recommendation: string, failure_type: string }>}
120
+ */
121
+ export function detectGhostTurns(root, state, config) {
122
+ const activeTurns = state?.active_turns || {};
123
+ const ghosts = [];
124
+ const now = Date.now();
125
+ const startupThreshold = resolveStartupThreshold(config);
126
+
127
+ for (const [turnId, turn] of Object.entries(activeTurns)) {
128
+ if (turn.status !== 'running' && turn.status !== 'retrying') continue;
129
+ if (!turn.started_at) continue;
130
+
131
+ const startedAt = new Date(turn.started_at).getTime();
132
+ if (isNaN(startedAt)) continue;
133
+
134
+ const runningMs = now - startedAt;
135
+ if (runningMs < startupThreshold) continue;
136
+
137
+ // Ghost detection: NO dispatch-progress file means subprocess never attached
138
+ const progressPath = join(root, getDispatchProgressRelativePath(turnId));
139
+ const hasProgress = existsSync(progressPath);
140
+
141
+ // If dispatch-progress exists, subprocess started — this is NOT a ghost turn.
142
+ // The regular stale-turn watchdog (BUG-47) will handle it if it goes silent.
143
+ if (hasProgress) continue;
144
+
145
+ // Also check for staged result (unlikely without progress, but be safe)
146
+ if (hasTurnScopedStagedResult(root, turnId)) continue;
147
+
148
+ // Check for any turn-scoped events beyond the initial dispatch event
149
+ if (hasRecentTurnEventActivity(root, turnId, startedAt, startupThreshold, now)) continue;
150
+
151
+ const runningSeconds = Math.floor(runningMs / 1000);
152
+ const failureType = 'no_subprocess_output';
153
+ ghosts.push({
154
+ turn_id: turnId,
155
+ role: turn.assigned_role || 'unknown',
156
+ runtime_id: turn.runtime_id || 'unknown',
157
+ running_ms: runningMs,
158
+ threshold_ms: startupThreshold,
159
+ failure_type: failureType,
160
+ recommendation: `Turn ${turnId} has been dispatched for ${runningSeconds}s with no subprocess output. `
161
+ + `The subprocess likely never started. `
162
+ + `Run \`agentxchain reissue-turn --turn ${turnId} --reason ghost\` to recover.`,
163
+ });
164
+ }
165
+
166
+ return ghosts;
167
+ }
168
+
86
169
  /**
87
170
  * Detect stale turns and emit turn_stalled events for each.
88
171
  * Returns the stale turn list for caller display.
@@ -95,18 +178,62 @@ export function detectAndEmitStaleTurns(root, state, config) {
95
178
 
96
179
  export function reconcileStaleTurns(root, state, config) {
97
180
  if (!state || typeof state !== 'object') {
98
- return { stale_turns: [], state, changed: false };
181
+ return { stale_turns: [], ghost_turns: [], state, changed: false };
99
182
  }
100
183
 
101
- const stale = detectStaleTurns(root, state, config);
102
- if (stale.length === 0) {
103
- return { stale_turns: [], state, changed: false };
184
+ // BUG-51: Fast startup watchdog — detect ghost turns first (30s threshold)
185
+ const ghosts = detectGhostTurns(root, state, config);
186
+
187
+ // BUG-47: Stale turn watchdog — detect turns that started but went silent (10m threshold)
188
+ // Exclude turns already caught by ghost detection to avoid double-counting
189
+ const ghostIds = new Set(ghosts.map(g => g.turn_id));
190
+ const stale = detectStaleTurns(root, state, config).filter(s => !ghostIds.has(s.turn_id));
191
+
192
+ if (ghosts.length === 0 && stale.length === 0) {
193
+ return { stale_turns: [], ghost_turns: [], state, changed: false };
104
194
  }
105
195
 
106
196
  const nowIso = new Date().toISOString();
107
197
  const activeTurns = { ...(state.active_turns || {}) };
198
+ const budgetReservations = { ...(state.budget_reservations || {}) };
108
199
  let changed = false;
109
200
 
201
+ // Process ghost turns (BUG-51) — transition to failed_start
202
+ for (const entry of ghosts) {
203
+ const turn = activeTurns[entry.turn_id];
204
+ if (!turn || (turn.status !== 'running' && turn.status !== 'retrying')) continue;
205
+
206
+ activeTurns[entry.turn_id] = {
207
+ ...turn,
208
+ status: 'failed_start',
209
+ failed_start_at: nowIso,
210
+ failed_start_reason: entry.failure_type,
211
+ failed_start_previous_status: turn.status,
212
+ failed_start_threshold_ms: entry.threshold_ms,
213
+ failed_start_running_ms: entry.running_ms,
214
+ recovery_command: `agentxchain reissue-turn --turn ${entry.turn_id} --reason ghost`,
215
+ };
216
+ changed = true;
217
+
218
+ // BUG-51 fix #6: Release budget reservation for ghost turns
219
+ delete budgetReservations[entry.turn_id];
220
+
221
+ emitRunEvent(root, 'turn_start_failed', {
222
+ run_id: state?.run_id || null,
223
+ phase: state?.phase || null,
224
+ status: 'blocked',
225
+ turn: { turn_id: entry.turn_id, role_id: entry.role },
226
+ payload: {
227
+ running_ms: entry.running_ms,
228
+ threshold_ms: entry.threshold_ms,
229
+ runtime_id: entry.runtime_id,
230
+ failure_type: entry.failure_type,
231
+ recommendation: entry.recommendation,
232
+ },
233
+ });
234
+ }
235
+
236
+ // Process stale turns (BUG-47) — transition to stalled
110
237
  for (const entry of stale) {
111
238
  const turn = activeTurns[entry.turn_id];
112
239
  if (!turn || (turn.status !== 'running' && turn.status !== 'retrying')) continue;
@@ -123,6 +250,9 @@ export function reconcileStaleTurns(root, state, config) {
123
250
  };
124
251
  changed = true;
125
252
 
253
+ // BUG-51 fix #6: Release budget reservation for stale turns too
254
+ delete budgetReservations[entry.turn_id];
255
+
126
256
  emitRunEvent(root, 'turn_stalled', {
127
257
  run_id: state?.run_id || null,
128
258
  phase: state?.phase || null,
@@ -138,21 +268,28 @@ export function reconcileStaleTurns(root, state, config) {
138
268
  }
139
269
 
140
270
  if (!changed) {
141
- return { stale_turns: stale, state, changed: false };
271
+ return { stale_turns: stale, ghost_turns: ghosts, state, changed: false };
142
272
  }
143
273
 
144
- const primary = stale[0];
274
+ const allDetected = [...ghosts, ...stale];
275
+ const primary = allDetected[0];
276
+ const category = ghosts.length > 0 ? 'ghost_turn' : 'stale_turn';
277
+ const blockedOn = allDetected.length === 1
278
+ ? `turn:${primary.failure_type ? 'failed_start' : 'stalled'}:${primary.turn_id}`
279
+ : ghosts.length > 0 ? 'turns:failed_start' : 'turns:stalled';
280
+
145
281
  const nextState = {
146
282
  ...state,
147
283
  status: 'blocked',
148
284
  active_turns: activeTurns,
149
- blocked_on: stale.length === 1 ? `turn:stalled:${primary.turn_id}` : 'turns:stalled',
285
+ budget_reservations: budgetReservations,
286
+ blocked_on: blockedOn,
150
287
  blocked_reason: {
151
- category: 'stale_turn',
288
+ category,
152
289
  blocked_at: nowIso,
153
290
  turn_id: primary.turn_id,
154
291
  recovery: {
155
- typed_reason: 'stale_turn',
292
+ typed_reason: category,
156
293
  owner: 'human',
157
294
  recovery_action: primary.recommendation,
158
295
  turn_retained: true,
@@ -168,11 +305,12 @@ export function reconcileStaleTurns(root, state, config) {
168
305
  status: 'blocked',
169
306
  turn: { turn_id: primary.turn_id, role_id: primary.role },
170
307
  payload: {
171
- category: 'stale_turn',
308
+ category,
309
+ ghost_turn_ids: ghosts.map((entry) => entry.turn_id),
172
310
  stalled_turn_ids: stale.map((entry) => entry.turn_id),
173
311
  },
174
312
  });
175
- return { stale_turns: stale, state: nextState, changed: true };
313
+ return { stale_turns: stale, ghost_turns: ghosts, state: nextState, changed: true };
176
314
  }
177
315
 
178
316
  function resolveThreshold(turn, config) {
@@ -194,13 +332,21 @@ function resolveThreshold(turn, config) {
194
332
  return DEFAULT_LOCAL_CLI_THRESHOLD_MS;
195
333
  }
196
334
 
335
+ function resolveStartupThreshold(config) {
336
+ const configThreshold = config?.run_loop?.startup_watchdog_ms;
337
+ if (typeof configThreshold === 'number' && configThreshold > 0) {
338
+ return configThreshold;
339
+ }
340
+ return DEFAULT_STARTUP_WATCHDOG_MS;
341
+ }
342
+
197
343
  function hasRecentTurnEventActivity(root, turnId, startedAt, threshold, now) {
198
344
  try {
199
345
  const events = readRunEvents(root, { limit: 200 });
200
346
  for (let i = events.length - 1; i >= 0; i--) {
201
347
  const event = events[i];
202
348
  if (event?.turn?.turn_id !== turnId) continue;
203
- if (event.event_type === 'turn_stalled') continue;
349
+ if (event.event_type === 'turn_stalled' || event.event_type === 'turn_start_failed') continue;
204
350
  const timestamp = Date.parse(event.timestamp || '');
205
351
  if (!Number.isFinite(timestamp)) continue;
206
352
  if (timestamp < startedAt) continue;