clementine-agent 1.18.86 → 1.18.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/cron.js CHANGED
@@ -182,7 +182,7 @@ export async function cmdCronRun(jobName) {
182
182
  catch (err) {
183
183
  const finishedAt = new Date();
184
184
  const trigger = process.env.CRON_RUN_TRIGGER || 'scheduled';
185
- runLog.append({
185
+ const errEntry = {
186
186
  jobName: job.name,
187
187
  startedAt: startedAt.toISOString(),
188
188
  finishedAt: finishedAt.toISOString(),
@@ -192,7 +192,16 @@ export async function cmdCronRun(jobName) {
192
192
  errorType: classifyError(err),
193
193
  attempt: 1,
194
194
  trigger,
195
- });
195
+ };
196
+ // 1.18.87: stamp PRD-canonical failure category.
197
+ try {
198
+ const { classifyRunFailure } = await import('../gateway/failure-taxonomy.js');
199
+ const cat = classifyRunFailure(errEntry);
200
+ if (cat)
201
+ errEntry.failureCategory = cat;
202
+ }
203
+ catch { /* non-fatal */ }
204
+ runLog.append(errEntry);
196
205
  console.error(`Error: ${err}`);
197
206
  process.exit(1);
198
207
  }
@@ -23748,6 +23748,7 @@ function renderRunningCard(item) {
23748
23748
  var _runListState = {
23749
23749
  filterStatus: 'all', // 'all' | 'failed' | 'ok'
23750
23750
  filterWindow: '24h', // '24h' | '7d' | 'all'
23751
+ filterCategory: 'all', // 'all' | <one of the 11 PRD failure categories>
23751
23752
  filterText: '', // free-text task name match
23752
23753
  data: [], // raw runs from /api/cron/runs
23753
23754
  };
@@ -23760,13 +23761,15 @@ function _runListLoadDefaultView() {
23760
23761
  var saved = JSON.parse(raw);
23761
23762
  _runListState.filterStatus = saved.filterStatus || 'all';
23762
23763
  _runListState.filterWindow = saved.filterWindow || '24h';
23764
+ _runListState.filterCategory = saved.filterCategory || 'all';
23763
23765
  _runListState.filterText = saved.filterText || '';
23764
23766
  return;
23765
23767
  }
23766
23768
  } catch (e) { /* ignore */ }
23767
- // Default: failures, last 24h.
23769
+ // Default: failures, last 24h, all categories.
23768
23770
  _runListState.filterStatus = 'failed';
23769
23771
  _runListState.filterWindow = '24h';
23772
+ _runListState.filterCategory = 'all';
23770
23773
  _runListState.filterText = '';
23771
23774
  }
23772
23775
 
@@ -23775,6 +23778,7 @@ function _runListSaveView() {
23775
23778
  localStorage.setItem('runListView', JSON.stringify({
23776
23779
  filterStatus: _runListState.filterStatus,
23777
23780
  filterWindow: _runListState.filterWindow,
23781
+ filterCategory: _runListState.filterCategory,
23778
23782
  filterText: _runListState.filterText,
23779
23783
  }));
23780
23784
  } catch (e) { /* ignore */ }
@@ -23786,12 +23790,16 @@ function _runListApplyFilters(runs) {
23786
23790
  : _runListState.filterWindow === '7d' ? 7 * 24 * 60 * 60 * 1000
23787
23791
  : Infinity;
23788
23792
  var query = (_runListState.filterText || '').trim().toLowerCase();
23793
+ var catFilter = _runListState.filterCategory;
23789
23794
  return runs.filter(function(r) {
23790
23795
  if (_runListState.filterStatus === 'failed') {
23791
23796
  if (r.status !== 'error' && r.status !== 'timeout' && r.status !== 'lost') return false;
23792
23797
  } else if (_runListState.filterStatus === 'ok') {
23793
23798
  if (r.status !== 'ok') return false;
23794
23799
  }
23800
+ if (catFilter && catFilter !== 'all') {
23801
+ if (r.failureCategory !== catFilter) return false;
23802
+ }
23795
23803
  if (query && String(r.jobName || '').toLowerCase().indexOf(query) === -1) return false;
23796
23804
  if (windowMs !== Infinity && r.startedAt) {
23797
23805
  var age = now - new Date(r.startedAt).getTime();
@@ -23844,6 +23852,21 @@ function renderRunListBody(allRuns) {
23844
23852
  { value: '7d', label: 'Last 7 days' },
23845
23853
  { value: 'all', label: 'All time' },
23846
23854
  ], 'filterWindow');
23855
+ // PRD §9 / 1.18.87: 11-category failure filter. Build the option list from
23856
+ // the categories actually present in the loaded data so the chip row stays
23857
+ // compact (don't show buckets that have zero runs).
23858
+ var seenCats = {};
23859
+ for (var ci = 0; ci < allRuns.length; ci++) {
23860
+ var c = allRuns[ci].failureCategory;
23861
+ if (c) seenCats[c] = (seenCats[c] || 0) + 1;
23862
+ }
23863
+ var catOptions = [{ value: 'all', label: 'Any category' }];
23864
+ Object.keys(seenCats).sort().forEach(function(k) {
23865
+ catOptions.push({ value: k, label: _runListCategoryLabel(k) + ' (' + seenCats[k] + ')' });
23866
+ });
23867
+ if (catOptions.length > 1) {
23868
+ html += _runListChip('Category', catOptions, 'filterCategory');
23869
+ }
23847
23870
  html += '<input type="search" placeholder="Filter by task name…" value="' + esc(_runListState.filterText) + '" oninput="onRunListSearch(this.value)" style="flex:1;min-width:200px;max-width:320px;padding:6px 10px;font-size:12px;border:1px solid var(--border);border-radius:6px;background:var(--bg-secondary);color:var(--text-primary)">';
23848
23871
  html += '<button class="btn-sm" onclick="resetRunListFilters()" style="font-size:11px">Reset to default</button>';
23849
23872
  html += '</div>';
@@ -23881,6 +23904,13 @@ function renderRunListBody(allRuns) {
23881
23904
  : entry.trigger === 'after' ? 'var(--purple)'
23882
23905
  : entry.trigger === 'discord' ? 'var(--blue)'
23883
23906
  : 'var(--text-muted)';
23907
+ // 1.18.87: failure category badge in the preview area when set.
23908
+ var categoryBadge = '';
23909
+ if (entry.failureCategory) {
23910
+ var catLabel = _runListCategoryLabel(entry.failureCategory);
23911
+ var catColor = _runListCategoryColor(entry.failureCategory);
23912
+ categoryBadge = '<span style="display:inline-block;background:' + catColor + '20;color:' + catColor + ';padding:1px 6px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em;margin-right:4px">' + esc(catLabel) + '</span>';
23913
+ }
23884
23914
  // Goal cell
23885
23915
  var goalCell = '<div></div>';
23886
23916
  if (entry.goalCheck) {
@@ -23901,6 +23931,7 @@ function renderRunListBody(allRuns) {
23901
23931
  + goalCell
23902
23932
  + '<div style="min-width:0">'
23903
23933
  + '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + (entry.attempt > 1 ? ' · attempt ' + esc(entry.attempt) : '') + '</div>'
23934
+ + (categoryBadge ? '<div style="margin-top:2px">' + categoryBadge + '</div>' : '')
23904
23935
  + preview
23905
23936
  + '</div>'
23906
23937
  + '<div style="font-size:11px;color:' + triggerColor + ';line-height:18px">' + esc(triggerLabel) + '</div>'
@@ -23913,6 +23944,32 @@ function renderRunListBody(allRuns) {
23913
23944
  return html;
23914
23945
  }
23915
23946
 
23947
+ // PRD §9 / 1.18.87: failure category labels + colors mirror
23948
+ // failure-taxonomy.ts on the server. Kept inline so the dashboard JS
23949
+ // doesn't need to round-trip for the lookup.
23950
+ function _runListCategoryLabel(cat) {
23951
+ return ({
23952
+ model_error: 'Model API',
23953
+ model_output_error: 'Bad LLM output',
23954
+ tool_error: 'Tool failed',
23955
+ tool_timeout: 'Tool timeout',
23956
+ schema_error: 'Schema mismatch',
23957
+ context_error: 'Context exceeded',
23958
+ prompt_error: 'Blocked by policy',
23959
+ agent_loop_error: 'Loop limit',
23960
+ subagent_error: 'Subagent failed',
23961
+ infrastructure_error: 'Infrastructure',
23962
+ cancelled: 'Cancelled',
23963
+ })[cat] || cat;
23964
+ }
23965
+ function _runListCategoryColor(cat) {
23966
+ if (cat === 'cancelled') return 'var(--text-muted)';
23967
+ if (cat === 'tool_timeout' || cat === 'agent_loop_error' || cat === 'context_error') return 'var(--yellow)';
23968
+ if (cat === 'prompt_error' || cat === 'schema_error') return 'var(--purple)';
23969
+ if (cat === 'model_error' || cat === 'model_output_error') return 'var(--accent)';
23970
+ return 'var(--red)';
23971
+ }
23972
+
23916
23973
  function _runListChip(label, options, stateKey) {
23917
23974
  var current = _runListState[stateKey];
23918
23975
  var html = '<span style="display:inline-flex;align-items:center;gap:4px">';
@@ -23946,6 +24003,7 @@ function onRunListSearch(value) {
23946
24003
  function resetRunListFilters() {
23947
24004
  _runListState.filterStatus = 'failed';
23948
24005
  _runListState.filterWindow = '24h';
24006
+ _runListState.filterCategory = 'all';
23949
24007
  _runListState.filterText = '';
23950
24008
  _runListSaveView();
23951
24009
  var panel = document.getElementById('panel-runs');
@@ -1288,7 +1288,11 @@ export class CronScheduler {
1288
1288
  const errorType = errTerminalReason
1289
1289
  ? classifyTerminalReason(errTerminalReason)
1290
1290
  : classifyError(err);
1291
- this._logRun({
1291
+ // 1.18.87: stamp PRD-canonical failure category. classifyRunFailure
1292
+ // is sync; safe to call inline. Returns null for non-failures, but
1293
+ // we know this branch is the error path so it always returns a
1294
+ // category.
1295
+ const errEntry = {
1292
1296
  jobName: job.name,
1293
1297
  startedAt: startedAt.toISOString(),
1294
1298
  finishedAt: finishedAt.toISOString(),
@@ -1298,12 +1302,25 @@ export class CronScheduler {
1298
1302
  errorType,
1299
1303
  terminalReason: errTerminalReason,
1300
1304
  attempt,
1305
+ // 1.18.84/85 fields preserved on the error path so the Run list
1306
+ // can show trigger + open the partial Event log if any.
1307
+ trigger,
1308
+ ...(errCronMetadata?.runId ? { id: errCronMetadata.runId } : {}),
1301
1309
  ...(errCronMetadata?.skillsApplied?.length ? { skillsApplied: errCronMetadata.skillsApplied } : {}),
1302
1310
  ...(errCronMetadata?.skillsMissing?.length ? { skillsMissing: errCronMetadata.skillsMissing } : {}),
1303
1311
  ...(errCronMetadata?.allowedToolsApplied?.length ? { allowedToolsApplied: errCronMetadata.allowedToolsApplied } : {}),
1304
1312
  ...(errCronMetadata?.mcpServersApplied?.length ? { mcpServersApplied: errCronMetadata.mcpServersApplied } : {}),
1305
1313
  advisorApplied,
1306
- });
1314
+ };
1315
+ // Lazy-import the classifier so it doesn't load on success paths.
1316
+ try {
1317
+ const { classifyRunFailure } = await import('./failure-taxonomy.js');
1318
+ const cat = classifyRunFailure(errEntry);
1319
+ if (cat)
1320
+ errEntry.failureCategory = cat;
1321
+ }
1322
+ catch { /* non-fatal */ }
1323
+ this._logRun(errEntry);
1307
1324
  if (isCreditBalanceError(err)) {
1308
1325
  const { block, created } = markBackgroundCreditBlocked(err);
1309
1326
  logger.error({ err, job: job.name, until: block.until }, 'Cron hit Claude credit exhaustion — pausing background jobs');
@@ -0,0 +1,24 @@
1
+ /**
2
+ * PRD §9 / Phase 4c: 11-category failure classifier.
3
+ *
4
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
5
+ * dashboard's Run list filter and Run detail viewer can group failures
6
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
7
+ * still produces the lower-level kind used by self-improve and the
8
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
9
+ *
10
+ * Source signals consulted, in priority order:
11
+ * 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
12
+ * 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
13
+ * 3. error string heuristics — last resort.
14
+ *
15
+ * Returns null when the run is not a failure (status='ok').
16
+ */
17
+ import type { CronRunEntry, RunFailureCategory } from '../types.js';
18
+ /** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
19
+ export declare function classifyRunFailure(entry: CronRunEntry): RunFailureCategory | null;
20
+ /** Human-readable label for a failure category — surfaced on dashboards. */
21
+ export declare function failureCategoryLabel(cat: RunFailureCategory): string;
22
+ /** Color hint for the dashboard pill. Returns a CSS var name. */
23
+ export declare function failureCategoryColor(cat: RunFailureCategory): string;
24
+ //# sourceMappingURL=failure-taxonomy.d.ts.map
@@ -0,0 +1,173 @@
1
+ /**
2
+ * PRD §9 / Phase 4c: 11-category failure classifier.
3
+ *
4
+ * Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
5
+ * dashboard's Run list filter and Run detail viewer can group failures
6
+ * meaningfully. Sits ABOVE the existing job-health.ts classifier (which
7
+ * still produces the lower-level kind used by self-improve and the
8
+ * advisor) — this module re-buckets job-health output into PRD vocabulary.
9
+ *
10
+ * Source signals consulted, in priority order:
11
+ * 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
12
+ * 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
13
+ * 3. error string heuristics — last resort.
14
+ *
15
+ * Returns null when the run is not a failure (status='ok').
16
+ */
17
+ import { classifyRunHealth } from './job-health.js';
18
+ /** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
19
+ export function classifyRunFailure(entry) {
20
+ // Non-failures don't get a category.
21
+ if (entry.status === 'ok')
22
+ return null;
23
+ if (entry.status === 'skipped')
24
+ return null;
25
+ if (entry.status === 'running')
26
+ return null;
27
+ // 'cancelled' is its own status today; map directly.
28
+ if (entry.status === 'cancelled')
29
+ return 'cancelled';
30
+ // Lost = daemon-boot sweep closed an orphaned 'running' entry.
31
+ // Treated as infrastructure_error per PRD §9 — the daemon crashed.
32
+ if (entry.status === 'lost')
33
+ return 'infrastructure_error';
34
+ // Timeout status maps directly.
35
+ if (entry.status === 'timeout')
36
+ return 'tool_timeout';
37
+ // Inspect terminalReason (SDK-reported termination) first — it's the
38
+ // most precise signal we have.
39
+ switch (entry.terminalReason) {
40
+ case 'max_turns':
41
+ return 'agent_loop_error';
42
+ case 'prompt_too_long':
43
+ return 'context_error';
44
+ case 'rapid_refill_breaker':
45
+ return 'context_error';
46
+ case 'blocking_limit':
47
+ return 'tool_error';
48
+ case 'image_error':
49
+ return 'model_output_error';
50
+ case 'aborted_streaming':
51
+ case 'aborted_tools':
52
+ return 'cancelled';
53
+ case 'stop_hook_prevented':
54
+ case 'hook_stopped':
55
+ return 'prompt_error';
56
+ case 'tool_deferred':
57
+ return 'tool_error';
58
+ case 'model_error':
59
+ return 'model_error';
60
+ // 'completed' should never land here (status would be 'ok')
61
+ default:
62
+ // Fall through to job-health + error string heuristics
63
+ break;
64
+ }
65
+ // High-precedence error-string patterns that should be classified
66
+ // BEFORE handing to job-health (which collapses "permission denied" into
67
+ // tool_scope, but PRD §9 says hook-blocked permission denials are
68
+ // prompt_error). Order matters here.
69
+ const earlyBlob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
70
+ if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(earlyBlob)) {
71
+ return 'prompt_error';
72
+ }
73
+ if (/^cancel|user (?:interrupt|abort|stopped)/.test(earlyBlob)) {
74
+ return 'cancelled';
75
+ }
76
+ if (/subagent|sub[- ]agent failed|delegated agent/.test(earlyBlob)) {
77
+ return 'subagent_error';
78
+ }
79
+ // Use the existing health classifier for buckets it already knows about.
80
+ // We use a stripped-down entry to avoid coupling to the full type.
81
+ try {
82
+ const health = classifyRunHealth(entry);
83
+ switch (health.status) {
84
+ case 'usage_blocked':
85
+ case 'auth':
86
+ case 'rate_limited':
87
+ return 'model_error';
88
+ case 'context_overflow':
89
+ case 'prompt_too_large':
90
+ return 'context_error';
91
+ case 'tool_scope':
92
+ return 'tool_error';
93
+ case 'partial':
94
+ // delivery-failed runs surface as tool_error in the new taxonomy
95
+ return 'tool_error';
96
+ case 'failed':
97
+ // Disambiguate via error string below
98
+ break;
99
+ case 'unknown':
100
+ default:
101
+ break;
102
+ }
103
+ }
104
+ catch {
105
+ // job-health threw — proceed with heuristics
106
+ }
107
+ // Error-string heuristics. Last-resort. Order matters: more specific
108
+ // patterns first so the catch-all doesn't swallow them.
109
+ const blob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
110
+ if (!blob.trim())
111
+ return 'infrastructure_error';
112
+ if (/refusal|cannot (?:assist|help|comply)|i (?:can'?t|am unable)/.test(blob))
113
+ return 'model_output_error';
114
+ if (/invalid (?:tool|function) (?:call|input|json)|malformed tool|tool .* invalid arguments/.test(blob))
115
+ return 'model_output_error';
116
+ if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(blob))
117
+ return 'prompt_error';
118
+ if (/tool .* time(d)? ?out|exceeded .* deadline|tool deadline/.test(blob))
119
+ return 'tool_timeout';
120
+ if (/schema|validation failed|did not validate|does not match schema/.test(blob))
121
+ return 'schema_error';
122
+ if (/context|too long|maximum context|exceeds.*tokens|input is too long/.test(blob))
123
+ return 'context_error';
124
+ if (/subagent|sub[- ]agent failed|delegated agent/.test(blob))
125
+ return 'subagent_error';
126
+ if (/cancel|user (?:interrupt|abort|stopped)/.test(blob))
127
+ return 'cancelled';
128
+ if (/oom|out of memory|enospc|enoent|enotfound|spawn .*ENOENT|process .* exited|terminated/.test(blob))
129
+ return 'infrastructure_error';
130
+ if (/401|403|unauthor|forbidden|invalid api key|api[- ]key/.test(blob))
131
+ return 'model_error';
132
+ if (/429|rate.?limit|quota/.test(blob))
133
+ return 'model_error';
134
+ if (/credit|billing|usage limit/.test(blob))
135
+ return 'model_error';
136
+ if (/(network|fetch|connect).*(fail|reset|refused|timeout)/.test(blob))
137
+ return 'infrastructure_error';
138
+ // Default catch-all — the run failed but the cause isn't explicit.
139
+ return 'tool_error';
140
+ }
141
+ /** Human-readable label for a failure category — surfaced on dashboards. */
142
+ export function failureCategoryLabel(cat) {
143
+ switch (cat) {
144
+ case 'model_error': return 'Model API';
145
+ case 'model_output_error': return 'Bad LLM output';
146
+ case 'tool_error': return 'Tool failed';
147
+ case 'tool_timeout': return 'Tool timeout';
148
+ case 'schema_error': return 'Schema mismatch';
149
+ case 'context_error': return 'Context exceeded';
150
+ case 'prompt_error': return 'Blocked by policy';
151
+ case 'agent_loop_error': return 'Loop limit';
152
+ case 'subagent_error': return 'Subagent failed';
153
+ case 'infrastructure_error': return 'Infrastructure';
154
+ case 'cancelled': return 'Cancelled';
155
+ }
156
+ }
157
+ /** Color hint for the dashboard pill. Returns a CSS var name. */
158
+ export function failureCategoryColor(cat) {
159
+ switch (cat) {
160
+ case 'cancelled': return 'var(--text-muted)';
161
+ case 'tool_timeout':
162
+ case 'agent_loop_error':
163
+ case 'context_error': return 'var(--yellow)';
164
+ case 'prompt_error':
165
+ case 'schema_error': return 'var(--purple)';
166
+ case 'model_error':
167
+ case 'model_output_error': return 'var(--accent)';
168
+ case 'infrastructure_error': return 'var(--red)';
169
+ case 'tool_error':
170
+ case 'subagent_error': return 'var(--red)';
171
+ }
172
+ }
173
+ //# sourceMappingURL=failure-taxonomy.js.map
package/dist/types.d.ts CHANGED
@@ -448,6 +448,16 @@ export interface RunEvent {
448
448
  /** Subagent id when kind='subagent_*'. */
449
449
  agentId?: string;
450
450
  }
451
+ /**
452
+ * PRD §9 / 1.18.87: 11-category failure taxonomy. Replaces the existing
453
+ * JobHealthKind union for surfacing-on-the-dashboard purposes (job-health.ts
454
+ * stays as the lower-level classifier and feeds into this).
455
+ *
456
+ * Stamped on CronRunEntry.failureCategory at write-time when the run is a
457
+ * failure (status: 'error' | 'timeout' | 'lost' | retried-final). Powers
458
+ * the Run list filter chip and the Run detail viewer's failure pill.
459
+ */
460
+ export type RunFailureCategory = 'model_error' | 'model_output_error' | 'tool_error' | 'tool_timeout' | 'schema_error' | 'context_error' | 'prompt_error' | 'agent_loop_error' | 'subagent_error' | 'infrastructure_error' | 'cancelled';
451
461
  export interface CronRunEntry {
452
462
  /** PRD §6 / 1.18.85: stable run UUID. Optional only because pre-1.18.85
453
463
  * entries don't have it; new entries always do. The Event store keys
@@ -500,6 +510,10 @@ export interface CronRunEntry {
500
510
  * Discord) so the Run list can filter by source instead of guessing
501
511
  * via heuristics on attempt count. */
502
512
  trigger?: 'manual' | 'scheduled' | 'webhook' | 'api' | 'fork' | 'resume' | 'discord' | 'after';
513
+ /** PRD §9 / 1.18.87: PRD-canonical failure bucket. Set on every entry
514
+ * whose status indicates a failure (error/timeout/lost/cancelled). The
515
+ * Run list filter chip and Run detail header read from this field. */
516
+ failureCategory?: RunFailureCategory;
503
517
  /** PRD Phase 1: did the run accomplish what it was supposed to?
504
518
  * Computed at run-end when the Task has successSchema or successCriteriaText.
505
519
  * - status='pass' both configured checks passed (or the only one configured did)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.86",
3
+ "version": "1.18.87",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",