npm - clementine-agent - Versions diffs - 1.18.85 → 1.18.87 - Mend

clementine-agent 1.18.85 → 1.18.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/cron.js +11 -2
package/dist/cli/dashboard.js +218 -4
package/dist/gateway/cron-scheduler.js +19 -2
package/dist/gateway/failure-taxonomy.d.ts +24 -0
package/dist/gateway/failure-taxonomy.js +173 -0
package/dist/types.d.ts +14 -0
package/package.json +1 -1

package/dist/cli/cron.js CHANGED Viewed

@@ -182,7 +182,7 @@ export async function cmdCronRun(jobName) {
     catch (err) {
         const finishedAt = new Date();
         const trigger = process.env.CRON_RUN_TRIGGER || 'scheduled';
-        runLog.append({
+        const errEntry = {
             jobName: job.name,
             startedAt: startedAt.toISOString(),
             finishedAt: finishedAt.toISOString(),
@@ -192,7 +192,16 @@ export async function cmdCronRun(jobName) {
             errorType: classifyError(err),
             attempt: 1,
             trigger,
-        });
+        };
+        // 1.18.87: stamp PRD-canonical failure category.
+        try {
+            const { classifyRunFailure } = await import('../gateway/failure-taxonomy.js');
+            const cat = classifyRunFailure(errEntry);
+            if (cat)
+                errEntry.failureCategory = cat;
+        }
+        catch { /* non-fatal */ }
+        runLog.append(errEntry);
         console.error(`Error: ${err}`);
         process.exit(1);
     }

package/dist/cli/dashboard.js CHANGED Viewed

@@ -23700,7 +23700,7 @@ function renderRecentHistoryList(runs) {
       + '</div>'
       + '<div style="font-size:12px;color:var(--text-secondary);line-height:18px">' + esc(startedLabel) + '</div>'
       + '<div style="font-size:12px;color:var(--text-muted);line-height:18px">' + esc(durationLabel) + '</div>'
-      + '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openTraceViewer(\\x27' + safeName + '\\x27)" style="font-size:11px;padding:3px 8px">Trace</button></div>'
+      + '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openRunOrTrace(\\x27' + safeName + '\\x27,' + (entry.id ? '\\x27' + jsStr(entry.id) + '\\x27' : 'null') + ')" style="font-size:11px;padding:3px 8px">' + (entry.id ? 'Open run' : 'Trace') + '</button></div>'
       + '</div>';
   }
   return '<div class="history-list" style="background:var(--bg-secondary);border:1px solid var(--border);border-radius:var(--radius)">'
@@ -23748,6 +23748,7 @@ function renderRunningCard(item) {
 var _runListState = {
   filterStatus: 'all',     // 'all' | 'failed' | 'ok'
   filterWindow: '24h',     // '24h' | '7d' | 'all'
+  filterCategory: 'all',   // 'all' | <one of the 11 PRD failure categories>
   filterText: '',          // free-text task name match
   data: [],                // raw runs from /api/cron/runs
 };
@@ -23760,13 +23761,15 @@ function _runListLoadDefaultView() {
       var saved = JSON.parse(raw);
       _runListState.filterStatus = saved.filterStatus || 'all';
       _runListState.filterWindow = saved.filterWindow || '24h';
+      _runListState.filterCategory = saved.filterCategory || 'all';
       _runListState.filterText = saved.filterText || '';
       return;
     }
   } catch (e) { /* ignore */ }
-  // Default: failures, last 24h.
+  // Default: failures, last 24h, all categories.
   _runListState.filterStatus = 'failed';
   _runListState.filterWindow = '24h';
+  _runListState.filterCategory = 'all';
   _runListState.filterText = '';
 }
@@ -23775,6 +23778,7 @@ function _runListSaveView() {
     localStorage.setItem('runListView', JSON.stringify({
       filterStatus: _runListState.filterStatus,
       filterWindow: _runListState.filterWindow,
+      filterCategory: _runListState.filterCategory,
       filterText: _runListState.filterText,
     }));
   } catch (e) { /* ignore */ }
@@ -23786,12 +23790,16 @@ function _runListApplyFilters(runs) {
     : _runListState.filterWindow === '7d' ? 7 * 24 * 60 * 60 * 1000
     : Infinity;
   var query = (_runListState.filterText || '').trim().toLowerCase();
+  var catFilter = _runListState.filterCategory;
   return runs.filter(function(r) {
     if (_runListState.filterStatus === 'failed') {
       if (r.status !== 'error' && r.status !== 'timeout' && r.status !== 'lost') return false;
     } else if (_runListState.filterStatus === 'ok') {
       if (r.status !== 'ok') return false;
     }
+    if (catFilter && catFilter !== 'all') {
+      if (r.failureCategory !== catFilter) return false;
+    }
     if (query && String(r.jobName || '').toLowerCase().indexOf(query) === -1) return false;
     if (windowMs !== Infinity && r.startedAt) {
       var age = now - new Date(r.startedAt).getTime();
@@ -23844,6 +23852,21 @@ function renderRunListBody(allRuns) {
     { value: '7d',  label: 'Last 7 days' },
     { value: 'all', label: 'All time' },
   ], 'filterWindow');
+  // PRD §9 / 1.18.87: 11-category failure filter. Build the option list from
+  // the categories actually present in the loaded data so the chip row stays
+  // compact (don't show buckets that have zero runs).
+  var seenCats = {};
+  for (var ci = 0; ci < allRuns.length; ci++) {
+    var c = allRuns[ci].failureCategory;
+    if (c) seenCats[c] = (seenCats[c] || 0) + 1;
+  }
+  var catOptions = [{ value: 'all', label: 'Any category' }];
+  Object.keys(seenCats).sort().forEach(function(k) {
+    catOptions.push({ value: k, label: _runListCategoryLabel(k) + ' (' + seenCats[k] + ')' });
+  });
+  if (catOptions.length > 1) {
+    html += _runListChip('Category', catOptions, 'filterCategory');
+  }
   html += '<input type="search" placeholder="Filter by task name…" value="' + esc(_runListState.filterText) + '" oninput="onRunListSearch(this.value)" style="flex:1;min-width:200px;max-width:320px;padding:6px 10px;font-size:12px;border:1px solid var(--border);border-radius:6px;background:var(--bg-secondary);color:var(--text-primary)">';
   html += '<button class="btn-sm" onclick="resetRunListFilters()" style="font-size:11px">Reset to default</button>';
   html += '</div>';
@@ -23881,6 +23904,13 @@ function renderRunListBody(allRuns) {
       : entry.trigger === 'after' ? 'var(--purple)'
       : entry.trigger === 'discord' ? 'var(--blue)'
       : 'var(--text-muted)';
+    // 1.18.87: failure category badge in the preview area when set.
+    var categoryBadge = '';
+    if (entry.failureCategory) {
+      var catLabel = _runListCategoryLabel(entry.failureCategory);
+      var catColor = _runListCategoryColor(entry.failureCategory);
+      categoryBadge = '<span style="display:inline-block;background:' + catColor + '20;color:' + catColor + ';padding:1px 6px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em;margin-right:4px">' + esc(catLabel) + '</span>';
+    }
     // Goal cell
     var goalCell = '<div></div>';
     if (entry.goalCheck) {
@@ -23901,18 +23931,45 @@ function renderRunListBody(allRuns) {
       +    goalCell
       +    '<div style="min-width:0">'
       +      '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + (entry.attempt > 1 ? ' · attempt ' + esc(entry.attempt) : '') + '</div>'
+      +      (categoryBadge ? '<div style="margin-top:2px">' + categoryBadge + '</div>' : '')
       +      preview
       +    '</div>'
       +    '<div style="font-size:11px;color:' + triggerColor + ';line-height:18px">' + esc(triggerLabel) + '</div>'
       +    '<div style="font-size:12px;color:var(--text-secondary);line-height:18px">' + esc(startedLabel) + '</div>'
       +    '<div style="font-size:12px;color:var(--text-muted);line-height:18px">' + esc(durationLabel) + '</div>'
-      +    '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openTraceViewer(\\x27' + safeName + '\\x27)" style="font-size:11px;padding:3px 8px">Trace</button></div>'
+      +    '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openRunOrTrace(\\x27' + safeName + '\\x27,' + (entry.id ? '\\x27' + jsStr(entry.id) + '\\x27' : 'null') + ')" style="font-size:11px;padding:3px 8px">' + (entry.id ? 'Open run' : 'Trace') + '</button></div>'
       + '</div>';
   }
   html += '</div>';
   return html;
 }
+// PRD §9 / 1.18.87: failure category labels + colors mirror
+// failure-taxonomy.ts on the server. Kept inline so the dashboard JS
+// doesn't need to round-trip for the lookup.
+function _runListCategoryLabel(cat) {
+  return ({
+    model_error: 'Model API',
+    model_output_error: 'Bad LLM output',
+    tool_error: 'Tool failed',
+    tool_timeout: 'Tool timeout',
+    schema_error: 'Schema mismatch',
+    context_error: 'Context exceeded',
+    prompt_error: 'Blocked by policy',
+    agent_loop_error: 'Loop limit',
+    subagent_error: 'Subagent failed',
+    infrastructure_error: 'Infrastructure',
+    cancelled: 'Cancelled',
+  })[cat] || cat;
+}
+function _runListCategoryColor(cat) {
+  if (cat === 'cancelled') return 'var(--text-muted)';
+  if (cat === 'tool_timeout' || cat === 'agent_loop_error' || cat === 'context_error') return 'var(--yellow)';
+  if (cat === 'prompt_error' || cat === 'schema_error') return 'var(--purple)';
+  if (cat === 'model_error' || cat === 'model_output_error') return 'var(--accent)';
+  return 'var(--red)';
+}
 function _runListChip(label, options, stateKey) {
   var current = _runListState[stateKey];
   var html = '<span style="display:inline-flex;align-items:center;gap:4px">';
@@ -23946,6 +24003,7 @@ function onRunListSearch(value) {
 function resetRunListFilters() {
   _runListState.filterStatus = 'failed';
   _runListState.filterWindow = '24h';
+  _runListState.filterCategory = 'all';
   _runListState.filterText = '';
   _runListSaveView();
   var panel = document.getElementById('panel-runs');
@@ -24384,6 +24442,162 @@ async function refreshCron() {
 var traceData = [];
+// PRD Phase 4b / 1.18.86: smart router. If the run entry has a stable
+// runId (1.18.85+ runs), open the new Run detail viewer reading from the
+// Event store; otherwise fall back to the legacy trace viewer (which now
+// just renders the friendly empty state explaining where to find the
+// real error). Both viewers share the same modal shell.
+function openRunOrTrace(jobName, runId) {
+  if (runId && typeof runId === 'string') {
+    return openRunDetail(runId, jobName);
+  }
+  return openTraceViewer(jobName);
+}
+// PRD Phase 4b / 1.18.86: Run detail viewer. Renders a waterfall of
+// RunEvent rows from /api/runs/:runId/events. Color-coded by kind, paired
+// tool_call→tool_result by toolUseId, with expandable per-span content.
+async function openRunDetail(runId, jobName) {
+  document.getElementById('trace-modal-title').textContent = 'Run detail · ' + (jobName || runId);
+  document.getElementById('trace-run-selector').innerHTML = '';
+  document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--text-muted)">Loading run events…</div>';
+  document.getElementById('trace-modal').classList.add('show');
+  try {
+    var r = await apiFetch('/api/runs/' + encodeURIComponent(runId) + '/events');
+    var d = await r.json();
+    if (!r.ok || d.ok === false) {
+      document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--red)">Failed to load run: ' + esc(d.error || 'unknown') + '</div>';
+      return;
+    }
+    var events = (d && d.events) || [];
+    if (events.length === 0) {
+      document.getElementById('trace-content').innerHTML = '<div style="padding:24px;color:var(--text-muted);line-height:1.6"><div style="font-weight:500;color:var(--text-secondary);margin-bottom:8px">No events captured for this run</div><div style="font-size:12px">Either the run pre-dates 1.18.85 (when the Event store was added) or the SDK errored before any message landed.<br/>The Recent history row carries the high-level status, error message, and goal verdict.</div></div>';
+      return;
+    }
+    document.getElementById('trace-content').innerHTML = renderRunDetailWaterfall(events, runId, jobName);
+  } catch (e) {
+    document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--red)">Failed to load run: ' + esc(String(e)) + '</div>';
+  }
+}
+// Renders the waterfall. Each event becomes a row with:
+//   color border (by kind) · kind badge · time offset · brief preview · expand link
+// tool_call rows pair with their tool_result by toolUseId so the duration
+// is computed and shown alongside the call.
+function renderRunDetailWaterfall(events, runId, jobName) {
+  if (!events.length) return '';
+  var firstTs = events[0].ts ? new Date(events[0].ts).getTime() : Date.now();
+  var lastTs = events[events.length - 1].ts ? new Date(events[events.length - 1].ts).getTime() : firstTs;
+  var totalMs = Math.max(1, lastTs - firstTs);
+  // Pair tool_call with its tool_result for duration.
+  var resultByToolUseId = {};
+  for (var i = 0; i < events.length; i++) {
+    var e = events[i];
+    if (e.kind === 'tool_result' && e.toolUseId) {
+      resultByToolUseId[e.toolUseId] = e;
+    }
+  }
+  // Per-event color + label
+  function kindColor(k) {
+    if (k === 'session_start' || k === 'session_end') return 'var(--text-muted)';
+    if (k === 'llm_text') return 'var(--accent)';
+    if (k === 'thinking') return 'var(--purple)';
+    if (k === 'tool_call') return '#22c55e';
+    if (k === 'tool_result') return '#22c55e';
+    if (k === 'subagent_start' || k === 'subagent_stop') return '#a855f7';
+    if (k === 'rate_limit') return 'var(--yellow)';
+    if (k === 'hook') return 'var(--blue)';
+    if (k === 'error') return 'var(--red)';
+    return 'var(--text-muted)';
+  }
+  function kindLabel(k) {
+    return (k || 'event').toUpperCase().replace(/_/g, ' ');
+  }
+  // Header strip with summary
+  var startLabel = events[0].ts ? new Date(events[0].ts).toLocaleString() : '—';
+  var endEvent = events.find(function(e) { return e.kind === 'session_end'; });
+  var costStr = endEvent && endEvent.costUsd != null ? '$' + endEvent.costUsd.toFixed(4) : '—';
+  var stopReason = endEvent && endEvent.stopReason ? endEvent.stopReason : '—';
+  var html = '<div style="padding:16px 20px;border-bottom:1px solid var(--border);background:var(--bg-secondary);position:sticky;top:0;z-index:1">'
+    + '<div style="display:flex;align-items:center;gap:14px;font-size:11px;color:var(--text-muted);flex-wrap:wrap">'
+    +   '<span><strong style="color:var(--text-primary)">' + esc(events.length) + '</strong> events</span>'
+    +   '<span>·</span><span>started ' + esc(startLabel) + '</span>'
+    +   '<span>·</span><span>duration <strong style="color:var(--text-primary)">' + esc(formatDurationMs(totalMs)) + '</strong></span>'
+    +   '<span>·</span><span>cost <strong style="color:var(--text-primary)">' + esc(costStr) + '</strong></span>'
+    +   '<span>·</span><span>stop reason <strong style="color:var(--text-primary)">' + esc(stopReason) + '</strong></span>'
+    +   '<span style="flex:1"></span>'
+    +   '<code style="font-size:10px;color:var(--text-muted)">runId ' + esc(String(runId).slice(0, 12)) + '…</code>'
+    + '</div>'
+    + '</div>';
+  // Waterfall rows
+  html += '<div style="padding:0">';
+  for (var j = 0; j < events.length; j++) {
+    var ev = events[j];
+    var color = kindColor(ev.kind);
+    var label = kindLabel(ev.kind);
+    var tsMs = ev.ts ? new Date(ev.ts).getTime() : firstTs;
+    var offsetMs = tsMs - firstTs;
+    var offsetLabel = offsetMs === 0 ? '+0ms' : '+' + formatDurationMs(offsetMs);
+    var widthPct = Math.max(2, Math.min(100, (offsetMs / totalMs) * 100));
+    // For tool_call, compute duration to its paired tool_result.
+    var pairedDuration = '';
+    if (ev.kind === 'tool_call' && ev.toolUseId && resultByToolUseId[ev.toolUseId]) {
+      var resultTs = new Date(resultByToolUseId[ev.toolUseId].ts).getTime();
+      pairedDuration = ' · ran ' + formatDurationMs(Math.max(0, resultTs - tsMs));
+    }
+    // Brief preview: text for llm_text, thinking for thinking, tool name + first arg for tool_call/result, error for error.
+    var preview = '';
+    var fullContent = '';
+    if (ev.kind === 'llm_text' && ev.text) {
+      preview = String(ev.text).slice(0, 160).replace(/\\s+/g, ' ');
+      fullContent = String(ev.text);
+    } else if (ev.kind === 'thinking' && ev.thinking) {
+      preview = String(ev.thinking).slice(0, 160).replace(/\\s+/g, ' ');
+      fullContent = String(ev.thinking);
+    } else if (ev.kind === 'tool_call') {
+      preview = (ev.toolName || 'tool') + (ev.toolInput ? ' · ' + JSON.stringify(ev.toolInput).slice(0, 120) : '');
+      fullContent = ev.toolInput ? JSON.stringify(ev.toolInput, null, 2) : '';
+    } else if (ev.kind === 'tool_result') {
+      preview = ev.toolError ? '✗ ' + ev.toolError : (typeof ev.toolResult === 'string' ? ev.toolResult.slice(0, 160) : (ev.toolResult ? JSON.stringify(ev.toolResult).slice(0, 160) : ''));
+      fullContent = typeof ev.toolResult === 'string' ? ev.toolResult : JSON.stringify(ev.toolResult, null, 2);
+    } else if (ev.kind === 'error') {
+      preview = ev.toolError || '';
+      fullContent = ev.toolError || '';
+    } else if (ev.kind === 'session_start') {
+      preview = ev.sessionId ? 'session ' + String(ev.sessionId).slice(0, 8) + '…' : '';
+    } else if (ev.kind === 'session_end') {
+      preview = '$' + (ev.costUsd != null ? ev.costUsd.toFixed(4) : '?') + ' · ' + (ev.stopReason || '?');
+    }
+    var rowId = 'run-evt-' + j;
+    var canExpand = !!fullContent && fullContent.length > preview.length;
+    html += '<div style="display:grid;grid-template-columns:90px 110px 1fr;gap:14px;padding:10px 20px;border-bottom:1px solid var(--border);align-items:start">';
+    html += '<div style="font-size:10px;color:var(--text-muted);font-family:\\x27JetBrains Mono\\x27,monospace;line-height:18px">' + esc(offsetLabel) + '</div>';
+    html += '<div><span style="display:inline-block;background:' + color + '20;color:' + color + ';padding:2px 8px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em">' + esc(label) + '</span></div>';
+    html += '<div style="min-width:0">';
+    html +=   '<div style="font-size:12px;color:var(--text-primary);line-height:1.45;word-break:break-word">'
+      +     esc(preview)
+      +     (pairedDuration ? '<span style="color:var(--text-muted);font-size:11px"> ' + esc(pairedDuration) + '</span>' : '')
+      +   '</div>';
+    if (canExpand) {
+      html += '<button class="btn-sm" onclick="document.getElementById(\\x27' + rowId + '\\x27).style.display=document.getElementById(\\x27' + rowId + '\\x27).style.display===\\x27none\\x27?\\x27block\\x27:\\x27none\\x27" style="margin-top:6px;font-size:10px;padding:2px 8px">Expand</button>';
+      html += '<pre id="' + rowId + '" style="display:none;margin-top:8px;font-size:11px;font-family:\\x27JetBrains Mono\\x27,monospace;background:var(--bg-secondary);border:1px solid var(--border);padding:10px;border-radius:6px;white-space:pre-wrap;word-break:break-word;max-height:400px;overflow-y:auto">' + esc(fullContent) + '</pre>';
+    }
+    // Show toolUseId hint when present so user can correlate with logs.
+    if (ev.toolUseId) {
+      html += '<div style="font-size:10px;color:var(--text-muted);font-family:\\x27JetBrains Mono\\x27,monospace;margin-top:4px">use_id ' + esc(String(ev.toolUseId).slice(0, 12)) + '…</div>';
+    }
+    html += '</div></div>';
+  }
+  html += '</div>';
+  return html;
+}
 async function openTraceViewer(jobName) {
   document.getElementById('trace-modal-title').textContent = 'Trace: ' + jobName;
   document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--text-muted)">Loading...</div>';
@@ -25708,7 +25922,7 @@ function renderCronRunDetails(lr) {
   if (Array.isArray(lr.mcpServersApplied) && lr.mcpServersApplied.length) {
     html += '<div style="font-size:11px;color:var(--text-muted);margin-bottom:6px">MCP servers: ' + esc(lr.mcpServersApplied.join(', ')) + '</div>';
   }
-  html += '<div style="margin-top:14px;display:flex;gap:8px"><button class="btn-sm" onclick="openTraceViewer(\\x27' + jsStr(lr.jobName || editingCronJob || '') + '\\x27)" style="font-size:11px">Open trace</button></div>';
+  html += '<div style="margin-top:14px;display:flex;gap:8px"><button class="btn-sm" onclick="openRunOrTrace(\\x27' + jsStr(lr.jobName || editingCronJob || '') + '\\x27,' + (lr.id ? '\\x27' + jsStr(lr.id) + '\\x27' : 'null') + ')" style="font-size:11px">' + (lr.id ? 'Open run' : 'Open trace') + '</button></div>';
   html += '</div>';
   return html;
 }

package/dist/gateway/cron-scheduler.js CHANGED Viewed

@@ -1288,7 +1288,11 @@ export class CronScheduler {
                     const errorType = errTerminalReason
                         ? classifyTerminalReason(errTerminalReason)
                         : classifyError(err);
-                    this._logRun({
+                    // 1.18.87: stamp PRD-canonical failure category. classifyRunFailure
+                    // is sync; safe to call inline. Returns null for non-failures, but
+                    // we know this branch is the error path so it always returns a
+                    // category.
+                    const errEntry = {
                         jobName: job.name,
                         startedAt: startedAt.toISOString(),
                         finishedAt: finishedAt.toISOString(),
@@ -1298,12 +1302,25 @@ export class CronScheduler {
                         errorType,
                         terminalReason: errTerminalReason,
                         attempt,
+                        // 1.18.84/85 fields preserved on the error path so the Run list
+                        // can show trigger + open the partial Event log if any.
+                        trigger,
+                        ...(errCronMetadata?.runId ? { id: errCronMetadata.runId } : {}),
                         ...(errCronMetadata?.skillsApplied?.length ? { skillsApplied: errCronMetadata.skillsApplied } : {}),
                         ...(errCronMetadata?.skillsMissing?.length ? { skillsMissing: errCronMetadata.skillsMissing } : {}),
                         ...(errCronMetadata?.allowedToolsApplied?.length ? { allowedToolsApplied: errCronMetadata.allowedToolsApplied } : {}),
                         ...(errCronMetadata?.mcpServersApplied?.length ? { mcpServersApplied: errCronMetadata.mcpServersApplied } : {}),
                         advisorApplied,
-                    });
+                    };
+                    // Lazy-import the classifier so it doesn't load on success paths.
+                    try {
+                        const { classifyRunFailure } = await import('./failure-taxonomy.js');
+                        const cat = classifyRunFailure(errEntry);
+                        if (cat)
+                            errEntry.failureCategory = cat;
+                    }
+                    catch { /* non-fatal */ }
+                    this._logRun(errEntry);
                     if (isCreditBalanceError(err)) {
                         const { block, created } = markBackgroundCreditBlocked(err);
                         logger.error({ err, job: job.name, until: block.until }, 'Cron hit Claude credit exhaustion — pausing background jobs');

package/dist/gateway/failure-taxonomy.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * PRD §9 / Phase 4c: 11-category failure classifier.
+ *
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
+ * dashboard's Run list filter and Run detail viewer can group failures
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
+ * still produces the lower-level kind used by self-improve and the
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
+ *
+ * Source signals consulted, in priority order:
+ *  1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
+ *  2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
+ *  3. error string heuristics — last resort.
+ *
+ * Returns null when the run is not a failure (status='ok').
+ */
+import type { CronRunEntry, RunFailureCategory } from '../types.js';
+/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
+export declare function classifyRunFailure(entry: CronRunEntry): RunFailureCategory | null;
+/** Human-readable label for a failure category — surfaced on dashboards. */
+export declare function failureCategoryLabel(cat: RunFailureCategory): string;
+/** Color hint for the dashboard pill. Returns a CSS var name. */
+export declare function failureCategoryColor(cat: RunFailureCategory): string;
+//# sourceMappingURL=failure-taxonomy.d.ts.map

package/dist/gateway/failure-taxonomy.js ADDED Viewed

@@ -0,0 +1,173 @@
+/**
+ * PRD §9 / Phase 4c: 11-category failure classifier.
+ *
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
+ * dashboard's Run list filter and Run detail viewer can group failures
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
+ * still produces the lower-level kind used by self-improve and the
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
+ *
+ * Source signals consulted, in priority order:
+ *  1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
+ *  2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
+ *  3. error string heuristics — last resort.
+ *
+ * Returns null when the run is not a failure (status='ok').
+ */
+import { classifyRunHealth } from './job-health.js';
+/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
+export function classifyRunFailure(entry) {
+    // Non-failures don't get a category.
+    if (entry.status === 'ok')
+        return null;
+    if (entry.status === 'skipped')
+        return null;
+    if (entry.status === 'running')
+        return null;
+    // 'cancelled' is its own status today; map directly.
+    if (entry.status === 'cancelled')
+        return 'cancelled';
+    // Lost = daemon-boot sweep closed an orphaned 'running' entry.
+    // Treated as infrastructure_error per PRD §9 — the daemon crashed.
+    if (entry.status === 'lost')
+        return 'infrastructure_error';
+    // Timeout status maps directly.
+    if (entry.status === 'timeout')
+        return 'tool_timeout';
+    // Inspect terminalReason (SDK-reported termination) first — it's the
+    // most precise signal we have.
+    switch (entry.terminalReason) {
+        case 'max_turns':
+            return 'agent_loop_error';
+        case 'prompt_too_long':
+            return 'context_error';
+        case 'rapid_refill_breaker':
+            return 'context_error';
+        case 'blocking_limit':
+            return 'tool_error';
+        case 'image_error':
+            return 'model_output_error';
+        case 'aborted_streaming':
+        case 'aborted_tools':
+            return 'cancelled';
+        case 'stop_hook_prevented':
+        case 'hook_stopped':
+            return 'prompt_error';
+        case 'tool_deferred':
+            return 'tool_error';
+        case 'model_error':
+            return 'model_error';
+        // 'completed' should never land here (status would be 'ok')
+        default:
+            // Fall through to job-health + error string heuristics
+            break;
+    }
+    // High-precedence error-string patterns that should be classified
+    // BEFORE handing to job-health (which collapses "permission denied" into
+    // tool_scope, but PRD §9 says hook-blocked permission denials are
+    // prompt_error). Order matters here.
+    const earlyBlob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
+    if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(earlyBlob)) {
+        return 'prompt_error';
+    }
+    if (/^cancel|user (?:interrupt|abort|stopped)/.test(earlyBlob)) {
+        return 'cancelled';
+    }
+    if (/subagent|sub[- ]agent failed|delegated agent/.test(earlyBlob)) {
+        return 'subagent_error';
+    }
+    // Use the existing health classifier for buckets it already knows about.
+    // We use a stripped-down entry to avoid coupling to the full type.
+    try {
+        const health = classifyRunHealth(entry);
+        switch (health.status) {
+            case 'usage_blocked':
+            case 'auth':
+            case 'rate_limited':
+                return 'model_error';
+            case 'context_overflow':
+            case 'prompt_too_large':
+                return 'context_error';
+            case 'tool_scope':
+                return 'tool_error';
+            case 'partial':
+                // delivery-failed runs surface as tool_error in the new taxonomy
+                return 'tool_error';
+            case 'failed':
+                // Disambiguate via error string below
+                break;
+            case 'unknown':
+            default:
+                break;
+        }
+    }
+    catch {
+        // job-health threw — proceed with heuristics
+    }
+    // Error-string heuristics. Last-resort. Order matters: more specific
+    // patterns first so the catch-all doesn't swallow them.
+    const blob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
+    if (!blob.trim())
+        return 'infrastructure_error';
+    if (/refusal|cannot (?:assist|help|comply)|i (?:can'?t|am unable)/.test(blob))
+        return 'model_output_error';
+    if (/invalid (?:tool|function) (?:call|input|json)|malformed tool|tool .* invalid arguments/.test(blob))
+        return 'model_output_error';
+    if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(blob))
+        return 'prompt_error';
+    if (/tool .* time(d)? ?out|exceeded .* deadline|tool deadline/.test(blob))
+        return 'tool_timeout';
+    if (/schema|validation failed|did not validate|does not match schema/.test(blob))
+        return 'schema_error';
+    if (/context|too long|maximum context|exceeds.*tokens|input is too long/.test(blob))
+        return 'context_error';
+    if (/subagent|sub[- ]agent failed|delegated agent/.test(blob))
+        return 'subagent_error';
+    if (/cancel|user (?:interrupt|abort|stopped)/.test(blob))
+        return 'cancelled';
+    if (/oom|out of memory|enospc|enoent|enotfound|spawn .*ENOENT|process .* exited|terminated/.test(blob))
+        return 'infrastructure_error';
+    if (/401|403|unauthor|forbidden|invalid api key|api[- ]key/.test(blob))
+        return 'model_error';
+    if (/429|rate.?limit|quota/.test(blob))
+        return 'model_error';
+    if (/credit|billing|usage limit/.test(blob))
+        return 'model_error';
+    if (/(network|fetch|connect).*(fail|reset|refused|timeout)/.test(blob))
+        return 'infrastructure_error';
+    // Default catch-all — the run failed but the cause isn't explicit.
+    return 'tool_error';
+}
+/** Human-readable label for a failure category — surfaced on dashboards. */
+export function failureCategoryLabel(cat) {
+    switch (cat) {
+        case 'model_error': return 'Model API';
+        case 'model_output_error': return 'Bad LLM output';
+        case 'tool_error': return 'Tool failed';
+        case 'tool_timeout': return 'Tool timeout';
+        case 'schema_error': return 'Schema mismatch';
+        case 'context_error': return 'Context exceeded';
+        case 'prompt_error': return 'Blocked by policy';
+        case 'agent_loop_error': return 'Loop limit';
+        case 'subagent_error': return 'Subagent failed';
+        case 'infrastructure_error': return 'Infrastructure';
+        case 'cancelled': return 'Cancelled';
+    }
+}
+/** Color hint for the dashboard pill. Returns a CSS var name. */
+export function failureCategoryColor(cat) {
+    switch (cat) {
+        case 'cancelled': return 'var(--text-muted)';
+        case 'tool_timeout':
+        case 'agent_loop_error':
+        case 'context_error': return 'var(--yellow)';
+        case 'prompt_error':
+        case 'schema_error': return 'var(--purple)';
+        case 'model_error':
+        case 'model_output_error': return 'var(--accent)';
+        case 'infrastructure_error': return 'var(--red)';
+        case 'tool_error':
+        case 'subagent_error': return 'var(--red)';
+    }
+}
+//# sourceMappingURL=failure-taxonomy.js.map

package/dist/types.d.ts CHANGED Viewed

@@ -448,6 +448,16 @@ export interface RunEvent {
     /** Subagent id when kind='subagent_*'. */
     agentId?: string;
 }
+/**
+ * PRD §9 / 1.18.87: 11-category failure taxonomy. Replaces the existing
+ * JobHealthKind union for surfacing-on-the-dashboard purposes (job-health.ts
+ * stays as the lower-level classifier and feeds into this).
+ *
+ * Stamped on CronRunEntry.failureCategory at write-time when the run is a
+ * failure (status: 'error' | 'timeout' | 'lost' | retried-final). Powers
+ * the Run list filter chip and the Run detail viewer's failure pill.
+ */
+export type RunFailureCategory = 'model_error' | 'model_output_error' | 'tool_error' | 'tool_timeout' | 'schema_error' | 'context_error' | 'prompt_error' | 'agent_loop_error' | 'subagent_error' | 'infrastructure_error' | 'cancelled';
 export interface CronRunEntry {
     /** PRD §6 / 1.18.85: stable run UUID. Optional only because pre-1.18.85
      *  entries don't have it; new entries always do. The Event store keys
@@ -500,6 +510,10 @@ export interface CronRunEntry {
      *  Discord) so the Run list can filter by source instead of guessing
      *  via heuristics on attempt count. */
     trigger?: 'manual' | 'scheduled' | 'webhook' | 'api' | 'fork' | 'resume' | 'discord' | 'after';
+    /** PRD §9 / 1.18.87: PRD-canonical failure bucket. Set on every entry
+     *  whose status indicates a failure (error/timeout/lost/cancelled). The
+     *  Run list filter chip and Run detail header read from this field. */
+    failureCategory?: RunFailureCategory;
     /** PRD Phase 1: did the run accomplish what it was supposed to?
      *  Computed at run-end when the Task has successSchema or successCriteriaText.
      *  - status='pass'      both configured checks passed (or the only one configured did)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.85",
+  "version": "1.18.87",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",