npm - clementine-agent - Versions diffs - 1.18.86 → 1.18.88 - Mend

clementine-agent 1.18.86 → 1.18.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/cron.js +11 -2
package/dist/cli/dashboard.js +175 -2
package/dist/gateway/cron-scheduler.js +19 -2
package/dist/gateway/failure-taxonomy.d.ts +24 -0
package/dist/gateway/failure-taxonomy.js +173 -0
package/dist/types.d.ts +14 -0
package/package.json +1 -1

package/dist/cli/cron.js CHANGED Viewed

@@ -182,7 +182,7 @@ export async function cmdCronRun(jobName) {
     catch (err) {
         const finishedAt = new Date();
         const trigger = process.env.CRON_RUN_TRIGGER || 'scheduled';
-        runLog.append({
+        const errEntry = {
             jobName: job.name,
             startedAt: startedAt.toISOString(),
             finishedAt: finishedAt.toISOString(),
@@ -192,7 +192,16 @@ export async function cmdCronRun(jobName) {
             errorType: classifyError(err),
             attempt: 1,
             trigger,
-        });
+        };
+        // 1.18.87: stamp PRD-canonical failure category.
+        try {
+            const { classifyRunFailure } = await import('../gateway/failure-taxonomy.js');
+            const cat = classifyRunFailure(errEntry);
+            if (cat)
+                errEntry.failureCategory = cat;
+        }
+        catch { /* non-fatal */ }
+        runLog.append(errEntry);
         console.error(`Error: ${err}`);
         process.exit(1);
     }

package/dist/cli/dashboard.js CHANGED Viewed

@@ -15207,6 +15207,40 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
   /* ── Recent history row hover (Tasks page bottom zone) ── */
   .history-row { transition: background 0.12s ease; }
   .history-row:hover { background: var(--bg-hover); }
+  /* PRD §12 / 1.18.88: Health Strip — six glanceable tiles above the
+     Tasks pane. Always visible, refreshes on SSE + 30s poll. */
+  .health-strip {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
+    gap: 10px;
+    margin-bottom: 18px;
+  }
+  .health-tile {
+    background: var(--bg-secondary);
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    padding: 12px 14px;
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+  }
+  .health-tile-label {
+    font-size: 10px;
+    color: var(--text-muted);
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+    font-weight: 500;
+  }
+  .health-tile-value {
+    font-size: 20px;
+    font-weight: 600;
+    color: var(--text-primary);
+    line-height: 1.2;
+  }
+  .health-tile-sub {
+    font-size: 11px;
+    color: var(--text-muted);
+  }
   /* PRD Phase 1.2: "Run task once" running-state pulse on the Last run tab. */
   @keyframes pulse {
     0%, 100% { opacity: 0.4; transform: scale(0.85); }
@@ -23739,6 +23773,78 @@ function renderRunningCard(item) {
     + '</div></div>';
 }
+// ── PRD §12 / 1.18.88: Health Strip ───────────────────────────────────
+// Six glanceable tiles above the Tasks pane: 24h runs / success rate /
+// P50 latency / P95 latency / active runs / top failure category.
+// Computed client-side from /api/cron/runs (already fetched by refreshCron).
+async function refreshHealthStrip() {
+  var strip = document.getElementById('health-strip');
+  if (!strip) return;
+  var runs = [];
+  try {
+    var r = await apiFetch('/api/cron/runs?limit=200');
+    var d = await r.json();
+    runs = (d && d.runs) || [];
+  } catch (e) { /* leave the strip empty if fetch fails */ }
+  // Filter to last 24h.
+  var cutoff = Date.now() - 24 * 60 * 60 * 1000;
+  var last24 = runs.filter(function(rn) { return rn.startedAt && new Date(rn.startedAt).getTime() >= cutoff; });
+  // Success / failure split.
+  var ok = last24.filter(function(rn) { return rn.status === 'ok'; }).length;
+  var failed = last24.filter(function(rn) { return rn.status === 'error' || rn.status === 'timeout' || rn.status === 'lost'; }).length;
+  var successRate = last24.length > 0 ? Math.round((ok / last24.length) * 100) : null;
+  // Latency distribution (terminal runs only — exclude in-progress).
+  var durations = last24
+    .filter(function(rn) { return rn.durationMs != null && rn.status !== 'running' && rn.status !== 'lost'; })
+    .map(function(rn) { return rn.durationMs; })
+    .sort(function(a, b) { return a - b; });
+  function pct(arr, p) {
+    if (arr.length === 0) return null;
+    var idx = Math.min(arr.length - 1, Math.floor(arr.length * p / 100));
+    return arr[idx];
+  }
+  var p50 = pct(durations, 50);
+  var p95 = pct(durations, 95);
+  // Active runs come from /api/build/operations runningNow which refreshCron
+  // has already loaded into the cronJobsData side-channel; tap operations
+  // summary if it's hanging around in the DOM, otherwise fetch it ourselves.
+  var activeRuns = 0;
+  try {
+    var ops = await apiFetch('/api/build/operations?hours=1&limit=10').then(function(rr) { return rr.json(); });
+    activeRuns = ((ops && ops.runningNow) || []).length;
+  } catch (e) { /* fall back to 0 */ }
+  // Top failure category for the day.
+  var catCounts = {};
+  for (var i = 0; i < last24.length; i++) {
+    var c = last24[i].failureCategory;
+    if (c) catCounts[c] = (catCounts[c] || 0) + 1;
+  }
+  var topCat = null, topCount = 0;
+  Object.keys(catCounts).forEach(function(k) {
+    if (catCounts[k] > topCount) { topCat = k; topCount = catCounts[k]; }
+  });
+  // Render six tiles.
+  function tile(label, value, sub, color) {
+    return '<div class="health-tile">'
+      + '<div class="health-tile-label">' + esc(label) + '</div>'
+      + '<div class="health-tile-value"' + (color ? ' style="color:' + color + '"' : '') + '>' + (value === null || value === undefined ? '—' : esc(String(value))) + '</div>'
+      + (sub ? '<div class="health-tile-sub">' + esc(sub) + '</div>' : '')
+      + '</div>';
+  }
+  var srColor = successRate === null ? null
+    : successRate >= 95 ? 'var(--green)'
+    : successRate >= 80 ? 'var(--yellow)'
+    : 'var(--red)';
+  var html = '';
+  html += tile('Runs · 24h', last24.length, ok + ' ok · ' + failed + ' failed');
+  html += tile('Success rate', successRate === null ? '—' : (successRate + '%'), null, srColor);
+  html += tile('P50 latency', p50 === null ? '—' : formatDurationMs(p50), 'median run time');
+  html += tile('P95 latency', p95 === null ? '—' : formatDurationMs(p95), '95th percentile');
+  html += tile('Running now', activeRuns, activeRuns === 0 ? 'idle' : 'live');
+  html += tile('Top failure', topCat ? _runListCategoryLabel(topCat) : '—', topCat ? topCount + ' run' + (topCount === 1 ? '' : 's') : 'no failures', topCat ? _runListCategoryColor(topCat) : null);
+  strip.innerHTML = html;
+}
 // ── PRD Phase 3: Run list ──────────────────────────────────────────────
 // Single sortable/filterable table of every CronRunEntry across all tasks.
 // Filters: status, task name, time window. Browser-local saved views.
@@ -23748,6 +23854,7 @@ function renderRunningCard(item) {
 var _runListState = {
   filterStatus: 'all',     // 'all' | 'failed' | 'ok'
   filterWindow: '24h',     // '24h' | '7d' | 'all'
+  filterCategory: 'all',   // 'all' | <one of the 11 PRD failure categories>
   filterText: '',          // free-text task name match
   data: [],                // raw runs from /api/cron/runs
 };
@@ -23760,13 +23867,15 @@ function _runListLoadDefaultView() {
       var saved = JSON.parse(raw);
       _runListState.filterStatus = saved.filterStatus || 'all';
       _runListState.filterWindow = saved.filterWindow || '24h';
+      _runListState.filterCategory = saved.filterCategory || 'all';
       _runListState.filterText = saved.filterText || '';
       return;
     }
   } catch (e) { /* ignore */ }
-  // Default: failures, last 24h.
+  // Default: failures, last 24h, all categories.
   _runListState.filterStatus = 'failed';
   _runListState.filterWindow = '24h';
+  _runListState.filterCategory = 'all';
   _runListState.filterText = '';
 }
@@ -23775,6 +23884,7 @@ function _runListSaveView() {
     localStorage.setItem('runListView', JSON.stringify({
       filterStatus: _runListState.filterStatus,
       filterWindow: _runListState.filterWindow,
+      filterCategory: _runListState.filterCategory,
       filterText: _runListState.filterText,
     }));
   } catch (e) { /* ignore */ }
@@ -23786,12 +23896,16 @@ function _runListApplyFilters(runs) {
     : _runListState.filterWindow === '7d' ? 7 * 24 * 60 * 60 * 1000
     : Infinity;
   var query = (_runListState.filterText || '').trim().toLowerCase();
+  var catFilter = _runListState.filterCategory;
   return runs.filter(function(r) {
     if (_runListState.filterStatus === 'failed') {
       if (r.status !== 'error' && r.status !== 'timeout' && r.status !== 'lost') return false;
     } else if (_runListState.filterStatus === 'ok') {
       if (r.status !== 'ok') return false;
     }
+    if (catFilter && catFilter !== 'all') {
+      if (r.failureCategory !== catFilter) return false;
+    }
     if (query && String(r.jobName || '').toLowerCase().indexOf(query) === -1) return false;
     if (windowMs !== Infinity && r.startedAt) {
       var age = now - new Date(r.startedAt).getTime();
@@ -23844,6 +23958,21 @@ function renderRunListBody(allRuns) {
     { value: '7d',  label: 'Last 7 days' },
     { value: 'all', label: 'All time' },
   ], 'filterWindow');
+  // PRD §9 / 1.18.87: 11-category failure filter. Build the option list from
+  // the categories actually present in the loaded data so the chip row stays
+  // compact (don't show buckets that have zero runs).
+  var seenCats = {};
+  for (var ci = 0; ci < allRuns.length; ci++) {
+    var c = allRuns[ci].failureCategory;
+    if (c) seenCats[c] = (seenCats[c] || 0) + 1;
+  }
+  var catOptions = [{ value: 'all', label: 'Any category' }];
+  Object.keys(seenCats).sort().forEach(function(k) {
+    catOptions.push({ value: k, label: _runListCategoryLabel(k) + ' (' + seenCats[k] + ')' });
+  });
+  if (catOptions.length > 1) {
+    html += _runListChip('Category', catOptions, 'filterCategory');
+  }
   html += '<input type="search" placeholder="Filter by task name…" value="' + esc(_runListState.filterText) + '" oninput="onRunListSearch(this.value)" style="flex:1;min-width:200px;max-width:320px;padding:6px 10px;font-size:12px;border:1px solid var(--border);border-radius:6px;background:var(--bg-secondary);color:var(--text-primary)">';
   html += '<button class="btn-sm" onclick="resetRunListFilters()" style="font-size:11px">Reset to default</button>';
   html += '</div>';
@@ -23881,6 +24010,13 @@ function renderRunListBody(allRuns) {
       : entry.trigger === 'after' ? 'var(--purple)'
       : entry.trigger === 'discord' ? 'var(--blue)'
       : 'var(--text-muted)';
+    // 1.18.87: failure category badge in the preview area when set.
+    var categoryBadge = '';
+    if (entry.failureCategory) {
+      var catLabel = _runListCategoryLabel(entry.failureCategory);
+      var catColor = _runListCategoryColor(entry.failureCategory);
+      categoryBadge = '<span style="display:inline-block;background:' + catColor + '20;color:' + catColor + ';padding:1px 6px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em;margin-right:4px">' + esc(catLabel) + '</span>';
+    }
     // Goal cell
     var goalCell = '<div></div>';
     if (entry.goalCheck) {
@@ -23901,6 +24037,7 @@ function renderRunListBody(allRuns) {
       +    goalCell
       +    '<div style="min-width:0">'
       +      '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + (entry.attempt > 1 ? ' · attempt ' + esc(entry.attempt) : '') + '</div>'
+      +      (categoryBadge ? '<div style="margin-top:2px">' + categoryBadge + '</div>' : '')
       +      preview
       +    '</div>'
       +    '<div style="font-size:11px;color:' + triggerColor + ';line-height:18px">' + esc(triggerLabel) + '</div>'
@@ -23913,6 +24050,32 @@ function renderRunListBody(allRuns) {
   return html;
 }
+// PRD §9 / 1.18.87: failure category labels + colors mirror
+// failure-taxonomy.ts on the server. Kept inline so the dashboard JS
+// doesn't need to round-trip for the lookup.
+function _runListCategoryLabel(cat) {
+  return ({
+    model_error: 'Model API',
+    model_output_error: 'Bad LLM output',
+    tool_error: 'Tool failed',
+    tool_timeout: 'Tool timeout',
+    schema_error: 'Schema mismatch',
+    context_error: 'Context exceeded',
+    prompt_error: 'Blocked by policy',
+    agent_loop_error: 'Loop limit',
+    subagent_error: 'Subagent failed',
+    infrastructure_error: 'Infrastructure',
+    cancelled: 'Cancelled',
+  })[cat] || cat;
+}
+function _runListCategoryColor(cat) {
+  if (cat === 'cancelled') return 'var(--text-muted)';
+  if (cat === 'tool_timeout' || cat === 'agent_loop_error' || cat === 'context_error') return 'var(--yellow)';
+  if (cat === 'prompt_error' || cat === 'schema_error') return 'var(--purple)';
+  if (cat === 'model_error' || cat === 'model_output_error') return 'var(--accent)';
+  return 'var(--red)';
+}
 function _runListChip(label, options, stateKey) {
   var current = _runListState[stateKey];
   var html = '<span style="display:inline-flex;align-items:center;gap:4px">';
@@ -23946,6 +24109,7 @@ function onRunListSearch(value) {
 function resetRunListFilters() {
   _runListState.filterStatus = 'failed';
   _runListState.filterWindow = '24h';
+  _runListState.filterCategory = 'all';
   _runListState.filterText = '';
   _runListSaveView();
   var panel = document.getElementById('panel-runs');
@@ -24317,7 +24481,11 @@ async function refreshCron() {
     var visibleRunning = ownerScoped ? (ops.runningNow || []).filter(function(i) { return buildOpsOwnerMatches(i.owner || ''); }) : (ops.runningNow || []);
     var ownerFilter = getBuildOwnerFilter();
-    var html = renderOperationsSummary(ops);
+    // PRD §12 / 1.18.88: Health Strip placeholder. The runs payload from
+    // /api/cron/runs (already fetched alongside ops) feeds the metrics.
+    // Render an empty shell first; refreshHealthStrip fills it in.
+    var html = '<div id="health-strip" class="health-strip"></div>';
+    html += renderOperationsSummary(ops);
     // ── Zone 1 — Running now (promoted to top, primary "what's live" view) ──
     if (visibleRunning.length > 0) {
@@ -24366,6 +24534,11 @@ async function refreshCron() {
     html += renderRecentHistoryList(historyData);
     panel.innerHTML = html;
+    // PRD §12 / 1.18.88: populate the Health Strip after the panel renders.
+    // Fire-and-forget — the strip lives at the top, fills in async.
+    if (typeof refreshHealthStrip === 'function') {
+      refreshHealthStrip().catch(function() { /* non-fatal */ });
+    }
     panel.onclick = function(ev) {
       var target = ev.target;
       while (target && target.id !== 'panel-cron') {

package/dist/gateway/cron-scheduler.js CHANGED Viewed

@@ -1288,7 +1288,11 @@ export class CronScheduler {
                     const errorType = errTerminalReason
                         ? classifyTerminalReason(errTerminalReason)
                         : classifyError(err);
-                    this._logRun({
+                    // 1.18.87: stamp PRD-canonical failure category. classifyRunFailure
+                    // is sync; safe to call inline. Returns null for non-failures, but
+                    // we know this branch is the error path so it always returns a
+                    // category.
+                    const errEntry = {
                         jobName: job.name,
                         startedAt: startedAt.toISOString(),
                         finishedAt: finishedAt.toISOString(),
@@ -1298,12 +1302,25 @@ export class CronScheduler {
                         errorType,
                         terminalReason: errTerminalReason,
                         attempt,
+                        // 1.18.84/85 fields preserved on the error path so the Run list
+                        // can show trigger + open the partial Event log if any.
+                        trigger,
+                        ...(errCronMetadata?.runId ? { id: errCronMetadata.runId } : {}),
                         ...(errCronMetadata?.skillsApplied?.length ? { skillsApplied: errCronMetadata.skillsApplied } : {}),
                         ...(errCronMetadata?.skillsMissing?.length ? { skillsMissing: errCronMetadata.skillsMissing } : {}),
                         ...(errCronMetadata?.allowedToolsApplied?.length ? { allowedToolsApplied: errCronMetadata.allowedToolsApplied } : {}),
                         ...(errCronMetadata?.mcpServersApplied?.length ? { mcpServersApplied: errCronMetadata.mcpServersApplied } : {}),
                         advisorApplied,
-                    });
+                    };
+                    // Lazy-import the classifier so it doesn't load on success paths.
+                    try {
+                        const { classifyRunFailure } = await import('./failure-taxonomy.js');
+                        const cat = classifyRunFailure(errEntry);
+                        if (cat)
+                            errEntry.failureCategory = cat;
+                    }
+                    catch { /* non-fatal */ }
+                    this._logRun(errEntry);
                     if (isCreditBalanceError(err)) {
                         const { block, created } = markBackgroundCreditBlocked(err);
                         logger.error({ err, job: job.name, until: block.until }, 'Cron hit Claude credit exhaustion — pausing background jobs');

package/dist/gateway/failure-taxonomy.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * PRD §9 / Phase 4c: 11-category failure classifier.
+ *
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
+ * dashboard's Run list filter and Run detail viewer can group failures
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
+ * still produces the lower-level kind used by self-improve and the
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
+ *
+ * Source signals consulted, in priority order:
+ *  1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
+ *  2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
+ *  3. error string heuristics — last resort.
+ *
+ * Returns null when the run is not a failure (status='ok').
+ */
+import type { CronRunEntry, RunFailureCategory } from '../types.js';
+/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
+export declare function classifyRunFailure(entry: CronRunEntry): RunFailureCategory | null;
+/** Human-readable label for a failure category — surfaced on dashboards. */
+export declare function failureCategoryLabel(cat: RunFailureCategory): string;
+/** Color hint for the dashboard pill. Returns a CSS var name. */
+export declare function failureCategoryColor(cat: RunFailureCategory): string;
+//# sourceMappingURL=failure-taxonomy.d.ts.map

package/dist/gateway/failure-taxonomy.js ADDED Viewed

@@ -0,0 +1,173 @@
+/**
+ * PRD §9 / Phase 4c: 11-category failure classifier.
+ *
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
+ * dashboard's Run list filter and Run detail viewer can group failures
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
+ * still produces the lower-level kind used by self-improve and the
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
+ *
+ * Source signals consulted, in priority order:
+ *  1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
+ *  2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
+ *  3. error string heuristics — last resort.
+ *
+ * Returns null when the run is not a failure (status='ok').
+ */
+import { classifyRunHealth } from './job-health.js';
+/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
+export function classifyRunFailure(entry) {
+    // Non-failures don't get a category.
+    if (entry.status === 'ok')
+        return null;
+    if (entry.status === 'skipped')
+        return null;
+    if (entry.status === 'running')
+        return null;
+    // 'cancelled' is its own status today; map directly.
+    if (entry.status === 'cancelled')
+        return 'cancelled';
+    // Lost = daemon-boot sweep closed an orphaned 'running' entry.
+    // Treated as infrastructure_error per PRD §9 — the daemon crashed.
+    if (entry.status === 'lost')
+        return 'infrastructure_error';
+    // Timeout status maps directly.
+    if (entry.status === 'timeout')
+        return 'tool_timeout';
+    // Inspect terminalReason (SDK-reported termination) first — it's the
+    // most precise signal we have.
+    switch (entry.terminalReason) {
+        case 'max_turns':
+            return 'agent_loop_error';
+        case 'prompt_too_long':
+            return 'context_error';
+        case 'rapid_refill_breaker':
+            return 'context_error';
+        case 'blocking_limit':
+            return 'tool_error';
+        case 'image_error':
+            return 'model_output_error';
+        case 'aborted_streaming':
+        case 'aborted_tools':
+            return 'cancelled';
+        case 'stop_hook_prevented':
+        case 'hook_stopped':
+            return 'prompt_error';
+        case 'tool_deferred':
+            return 'tool_error';
+        case 'model_error':
+            return 'model_error';
+        // 'completed' should never land here (status would be 'ok')
+        default:
+            // Fall through to job-health + error string heuristics
+            break;
+    }
+    // High-precedence error-string patterns that should be classified
+    // BEFORE handing to job-health (which collapses "permission denied" into
+    // tool_scope, but PRD §9 says hook-blocked permission denials are
+    // prompt_error). Order matters here.
+    const earlyBlob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
+    if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(earlyBlob)) {
+        return 'prompt_error';
+    }
+    if (/^cancel|user (?:interrupt|abort|stopped)/.test(earlyBlob)) {
+        return 'cancelled';
+    }
+    if (/subagent|sub[- ]agent failed|delegated agent/.test(earlyBlob)) {
+        return 'subagent_error';
+    }
+    // Use the existing health classifier for buckets it already knows about.
+    // We use a stripped-down entry to avoid coupling to the full type.
+    try {
+        const health = classifyRunHealth(entry);
+        switch (health.status) {
+            case 'usage_blocked':
+            case 'auth':
+            case 'rate_limited':
+                return 'model_error';
+            case 'context_overflow':
+            case 'prompt_too_large':
+                return 'context_error';
+            case 'tool_scope':
+                return 'tool_error';
+            case 'partial':
+                // delivery-failed runs surface as tool_error in the new taxonomy
+                return 'tool_error';
+            case 'failed':
+                // Disambiguate via error string below
+                break;
+            case 'unknown':
+            default:
+                break;
+        }
+    }
+    catch {
+        // job-health threw — proceed with heuristics
+    }
+    // Error-string heuristics. Last-resort. Order matters: more specific
+    // patterns first so the catch-all doesn't swallow them.
+    const blob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
+    if (!blob.trim())
+        return 'infrastructure_error';
+    if (/refusal|cannot (?:assist|help|comply)|i (?:can'?t|am unable)/.test(blob))
+        return 'model_output_error';
+    if (/invalid (?:tool|function) (?:call|input|json)|malformed tool|tool .* invalid arguments/.test(blob))
+        return 'model_output_error';
+    if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(blob))
+        return 'prompt_error';
+    if (/tool .* time(d)? ?out|exceeded .* deadline|tool deadline/.test(blob))
+        return 'tool_timeout';
+    if (/schema|validation failed|did not validate|does not match schema/.test(blob))
+        return 'schema_error';
+    if (/context|too long|maximum context|exceeds.*tokens|input is too long/.test(blob))
+        return 'context_error';
+    if (/subagent|sub[- ]agent failed|delegated agent/.test(blob))
+        return 'subagent_error';
+    if (/cancel|user (?:interrupt|abort|stopped)/.test(blob))
+        return 'cancelled';
+    if (/oom|out of memory|enospc|enoent|enotfound|spawn .*ENOENT|process .* exited|terminated/.test(blob))
+        return 'infrastructure_error';
+    if (/401|403|unauthor|forbidden|invalid api key|api[- ]key/.test(blob))
+        return 'model_error';
+    if (/429|rate.?limit|quota/.test(blob))
+        return 'model_error';
+    if (/credit|billing|usage limit/.test(blob))
+        return 'model_error';
+    if (/(network|fetch|connect).*(fail|reset|refused|timeout)/.test(blob))
+        return 'infrastructure_error';
+    // Default catch-all — the run failed but the cause isn't explicit.
+    return 'tool_error';
+}
+/** Human-readable label for a failure category — surfaced on dashboards. */
+export function failureCategoryLabel(cat) {
+    switch (cat) {
+        case 'model_error': return 'Model API';
+        case 'model_output_error': return 'Bad LLM output';
+        case 'tool_error': return 'Tool failed';
+        case 'tool_timeout': return 'Tool timeout';
+        case 'schema_error': return 'Schema mismatch';
+        case 'context_error': return 'Context exceeded';
+        case 'prompt_error': return 'Blocked by policy';
+        case 'agent_loop_error': return 'Loop limit';
+        case 'subagent_error': return 'Subagent failed';
+        case 'infrastructure_error': return 'Infrastructure';
+        case 'cancelled': return 'Cancelled';
+    }
+}
+/** Color hint for the dashboard pill. Returns a CSS var name. */
+export function failureCategoryColor(cat) {
+    switch (cat) {
+        case 'cancelled': return 'var(--text-muted)';
+        case 'tool_timeout':
+        case 'agent_loop_error':
+        case 'context_error': return 'var(--yellow)';
+        case 'prompt_error':
+        case 'schema_error': return 'var(--purple)';
+        case 'model_error':
+        case 'model_output_error': return 'var(--accent)';
+        case 'infrastructure_error': return 'var(--red)';
+        case 'tool_error':
+        case 'subagent_error': return 'var(--red)';
+    }
+}
+//# sourceMappingURL=failure-taxonomy.js.map

package/dist/types.d.ts CHANGED Viewed

@@ -448,6 +448,16 @@ export interface RunEvent {
     /** Subagent id when kind='subagent_*'. */
     agentId?: string;
 }
+/**
+ * PRD §9 / 1.18.87: 11-category failure taxonomy. Replaces the existing
+ * JobHealthKind union for surfacing-on-the-dashboard purposes (job-health.ts
+ * stays as the lower-level classifier and feeds into this).
+ *
+ * Stamped on CronRunEntry.failureCategory at write-time when the run is a
+ * failure (status: 'error' | 'timeout' | 'lost' | retried-final). Powers
+ * the Run list filter chip and the Run detail viewer's failure pill.
+ */
+export type RunFailureCategory = 'model_error' | 'model_output_error' | 'tool_error' | 'tool_timeout' | 'schema_error' | 'context_error' | 'prompt_error' | 'agent_loop_error' | 'subagent_error' | 'infrastructure_error' | 'cancelled';
 export interface CronRunEntry {
     /** PRD §6 / 1.18.85: stable run UUID. Optional only because pre-1.18.85
      *  entries don't have it; new entries always do. The Event store keys
@@ -500,6 +510,10 @@ export interface CronRunEntry {
      *  Discord) so the Run list can filter by source instead of guessing
      *  via heuristics on attempt count. */
     trigger?: 'manual' | 'scheduled' | 'webhook' | 'api' | 'fork' | 'resume' | 'discord' | 'after';
+    /** PRD §9 / 1.18.87: PRD-canonical failure bucket. Set on every entry
+     *  whose status indicates a failure (error/timeout/lost/cancelled). The
+     *  Run list filter chip and Run detail header read from this field. */
+    failureCategory?: RunFailureCategory;
     /** PRD Phase 1: did the run accomplish what it was supposed to?
      *  Computed at run-end when the Task has successSchema or successCriteriaText.
      *  - status='pass'      both configured checks passed (or the only one configured did)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.86",
+  "version": "1.18.88",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",