clementine-agent 1.18.85 → 1.18.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cron.js +11 -2
- package/dist/cli/dashboard.js +218 -4
- package/dist/gateway/cron-scheduler.js +19 -2
- package/dist/gateway/failure-taxonomy.d.ts +24 -0
- package/dist/gateway/failure-taxonomy.js +173 -0
- package/dist/types.d.ts +14 -0
- package/package.json +1 -1
package/dist/cli/cron.js
CHANGED
|
@@ -182,7 +182,7 @@ export async function cmdCronRun(jobName) {
|
|
|
182
182
|
catch (err) {
|
|
183
183
|
const finishedAt = new Date();
|
|
184
184
|
const trigger = process.env.CRON_RUN_TRIGGER || 'scheduled';
|
|
185
|
-
|
|
185
|
+
const errEntry = {
|
|
186
186
|
jobName: job.name,
|
|
187
187
|
startedAt: startedAt.toISOString(),
|
|
188
188
|
finishedAt: finishedAt.toISOString(),
|
|
@@ -192,7 +192,16 @@ export async function cmdCronRun(jobName) {
|
|
|
192
192
|
errorType: classifyError(err),
|
|
193
193
|
attempt: 1,
|
|
194
194
|
trigger,
|
|
195
|
-
}
|
|
195
|
+
};
|
|
196
|
+
// 1.18.87: stamp PRD-canonical failure category.
|
|
197
|
+
try {
|
|
198
|
+
const { classifyRunFailure } = await import('../gateway/failure-taxonomy.js');
|
|
199
|
+
const cat = classifyRunFailure(errEntry);
|
|
200
|
+
if (cat)
|
|
201
|
+
errEntry.failureCategory = cat;
|
|
202
|
+
}
|
|
203
|
+
catch { /* non-fatal */ }
|
|
204
|
+
runLog.append(errEntry);
|
|
196
205
|
console.error(`Error: ${err}`);
|
|
197
206
|
process.exit(1);
|
|
198
207
|
}
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -23700,7 +23700,7 @@ function renderRecentHistoryList(runs) {
|
|
|
23700
23700
|
+ '</div>'
|
|
23701
23701
|
+ '<div style="font-size:12px;color:var(--text-secondary);line-height:18px">' + esc(startedLabel) + '</div>'
|
|
23702
23702
|
+ '<div style="font-size:12px;color:var(--text-muted);line-height:18px">' + esc(durationLabel) + '</div>'
|
|
23703
|
-
+ '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();
|
|
23703
|
+
+ '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openRunOrTrace(\\x27' + safeName + '\\x27,' + (entry.id ? '\\x27' + jsStr(entry.id) + '\\x27' : 'null') + ')" style="font-size:11px;padding:3px 8px">' + (entry.id ? 'Open run' : 'Trace') + '</button></div>'
|
|
23704
23704
|
+ '</div>';
|
|
23705
23705
|
}
|
|
23706
23706
|
return '<div class="history-list" style="background:var(--bg-secondary);border:1px solid var(--border);border-radius:var(--radius)">'
|
|
@@ -23748,6 +23748,7 @@ function renderRunningCard(item) {
|
|
|
23748
23748
|
var _runListState = {
|
|
23749
23749
|
filterStatus: 'all', // 'all' | 'failed' | 'ok'
|
|
23750
23750
|
filterWindow: '24h', // '24h' | '7d' | 'all'
|
|
23751
|
+
filterCategory: 'all', // 'all' | <one of the 11 PRD failure categories>
|
|
23751
23752
|
filterText: '', // free-text task name match
|
|
23752
23753
|
data: [], // raw runs from /api/cron/runs
|
|
23753
23754
|
};
|
|
@@ -23760,13 +23761,15 @@ function _runListLoadDefaultView() {
|
|
|
23760
23761
|
var saved = JSON.parse(raw);
|
|
23761
23762
|
_runListState.filterStatus = saved.filterStatus || 'all';
|
|
23762
23763
|
_runListState.filterWindow = saved.filterWindow || '24h';
|
|
23764
|
+
_runListState.filterCategory = saved.filterCategory || 'all';
|
|
23763
23765
|
_runListState.filterText = saved.filterText || '';
|
|
23764
23766
|
return;
|
|
23765
23767
|
}
|
|
23766
23768
|
} catch (e) { /* ignore */ }
|
|
23767
|
-
// Default: failures, last 24h.
|
|
23769
|
+
// Default: failures, last 24h, all categories.
|
|
23768
23770
|
_runListState.filterStatus = 'failed';
|
|
23769
23771
|
_runListState.filterWindow = '24h';
|
|
23772
|
+
_runListState.filterCategory = 'all';
|
|
23770
23773
|
_runListState.filterText = '';
|
|
23771
23774
|
}
|
|
23772
23775
|
|
|
@@ -23775,6 +23778,7 @@ function _runListSaveView() {
|
|
|
23775
23778
|
localStorage.setItem('runListView', JSON.stringify({
|
|
23776
23779
|
filterStatus: _runListState.filterStatus,
|
|
23777
23780
|
filterWindow: _runListState.filterWindow,
|
|
23781
|
+
filterCategory: _runListState.filterCategory,
|
|
23778
23782
|
filterText: _runListState.filterText,
|
|
23779
23783
|
}));
|
|
23780
23784
|
} catch (e) { /* ignore */ }
|
|
@@ -23786,12 +23790,16 @@ function _runListApplyFilters(runs) {
|
|
|
23786
23790
|
: _runListState.filterWindow === '7d' ? 7 * 24 * 60 * 60 * 1000
|
|
23787
23791
|
: Infinity;
|
|
23788
23792
|
var query = (_runListState.filterText || '').trim().toLowerCase();
|
|
23793
|
+
var catFilter = _runListState.filterCategory;
|
|
23789
23794
|
return runs.filter(function(r) {
|
|
23790
23795
|
if (_runListState.filterStatus === 'failed') {
|
|
23791
23796
|
if (r.status !== 'error' && r.status !== 'timeout' && r.status !== 'lost') return false;
|
|
23792
23797
|
} else if (_runListState.filterStatus === 'ok') {
|
|
23793
23798
|
if (r.status !== 'ok') return false;
|
|
23794
23799
|
}
|
|
23800
|
+
if (catFilter && catFilter !== 'all') {
|
|
23801
|
+
if (r.failureCategory !== catFilter) return false;
|
|
23802
|
+
}
|
|
23795
23803
|
if (query && String(r.jobName || '').toLowerCase().indexOf(query) === -1) return false;
|
|
23796
23804
|
if (windowMs !== Infinity && r.startedAt) {
|
|
23797
23805
|
var age = now - new Date(r.startedAt).getTime();
|
|
@@ -23844,6 +23852,21 @@ function renderRunListBody(allRuns) {
|
|
|
23844
23852
|
{ value: '7d', label: 'Last 7 days' },
|
|
23845
23853
|
{ value: 'all', label: 'All time' },
|
|
23846
23854
|
], 'filterWindow');
|
|
23855
|
+
// PRD §9 / 1.18.87: 11-category failure filter. Build the option list from
|
|
23856
|
+
// the categories actually present in the loaded data so the chip row stays
|
|
23857
|
+
// compact (don't show buckets that have zero runs).
|
|
23858
|
+
var seenCats = {};
|
|
23859
|
+
for (var ci = 0; ci < allRuns.length; ci++) {
|
|
23860
|
+
var c = allRuns[ci].failureCategory;
|
|
23861
|
+
if (c) seenCats[c] = (seenCats[c] || 0) + 1;
|
|
23862
|
+
}
|
|
23863
|
+
var catOptions = [{ value: 'all', label: 'Any category' }];
|
|
23864
|
+
Object.keys(seenCats).sort().forEach(function(k) {
|
|
23865
|
+
catOptions.push({ value: k, label: _runListCategoryLabel(k) + ' (' + seenCats[k] + ')' });
|
|
23866
|
+
});
|
|
23867
|
+
if (catOptions.length > 1) {
|
|
23868
|
+
html += _runListChip('Category', catOptions, 'filterCategory');
|
|
23869
|
+
}
|
|
23847
23870
|
html += '<input type="search" placeholder="Filter by task name…" value="' + esc(_runListState.filterText) + '" oninput="onRunListSearch(this.value)" style="flex:1;min-width:200px;max-width:320px;padding:6px 10px;font-size:12px;border:1px solid var(--border);border-radius:6px;background:var(--bg-secondary);color:var(--text-primary)">';
|
|
23848
23871
|
html += '<button class="btn-sm" onclick="resetRunListFilters()" style="font-size:11px">Reset to default</button>';
|
|
23849
23872
|
html += '</div>';
|
|
@@ -23881,6 +23904,13 @@ function renderRunListBody(allRuns) {
|
|
|
23881
23904
|
: entry.trigger === 'after' ? 'var(--purple)'
|
|
23882
23905
|
: entry.trigger === 'discord' ? 'var(--blue)'
|
|
23883
23906
|
: 'var(--text-muted)';
|
|
23907
|
+
// 1.18.87: failure category badge in the preview area when set.
|
|
23908
|
+
var categoryBadge = '';
|
|
23909
|
+
if (entry.failureCategory) {
|
|
23910
|
+
var catLabel = _runListCategoryLabel(entry.failureCategory);
|
|
23911
|
+
var catColor = _runListCategoryColor(entry.failureCategory);
|
|
23912
|
+
categoryBadge = '<span style="display:inline-block;background:' + catColor + '20;color:' + catColor + ';padding:1px 6px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em;margin-right:4px">' + esc(catLabel) + '</span>';
|
|
23913
|
+
}
|
|
23884
23914
|
// Goal cell
|
|
23885
23915
|
var goalCell = '<div></div>';
|
|
23886
23916
|
if (entry.goalCheck) {
|
|
@@ -23901,18 +23931,45 @@ function renderRunListBody(allRuns) {
|
|
|
23901
23931
|
+ goalCell
|
|
23902
23932
|
+ '<div style="min-width:0">'
|
|
23903
23933
|
+ '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + (entry.attempt > 1 ? ' · attempt ' + esc(entry.attempt) : '') + '</div>'
|
|
23934
|
+
+ (categoryBadge ? '<div style="margin-top:2px">' + categoryBadge + '</div>' : '')
|
|
23904
23935
|
+ preview
|
|
23905
23936
|
+ '</div>'
|
|
23906
23937
|
+ '<div style="font-size:11px;color:' + triggerColor + ';line-height:18px">' + esc(triggerLabel) + '</div>'
|
|
23907
23938
|
+ '<div style="font-size:12px;color:var(--text-secondary);line-height:18px">' + esc(startedLabel) + '</div>'
|
|
23908
23939
|
+ '<div style="font-size:12px;color:var(--text-muted);line-height:18px">' + esc(durationLabel) + '</div>'
|
|
23909
|
-
+ '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();
|
|
23940
|
+
+ '<div style="display:flex;gap:6px;align-items:center"><button class="btn-sm" onclick="event.stopPropagation();openRunOrTrace(\\x27' + safeName + '\\x27,' + (entry.id ? '\\x27' + jsStr(entry.id) + '\\x27' : 'null') + ')" style="font-size:11px;padding:3px 8px">' + (entry.id ? 'Open run' : 'Trace') + '</button></div>'
|
|
23910
23941
|
+ '</div>';
|
|
23911
23942
|
}
|
|
23912
23943
|
html += '</div>';
|
|
23913
23944
|
return html;
|
|
23914
23945
|
}
|
|
23915
23946
|
|
|
23947
|
+
// PRD §9 / 1.18.87: failure category labels + colors mirror
|
|
23948
|
+
// failure-taxonomy.ts on the server. Kept inline so the dashboard JS
|
|
23949
|
+
// doesn't need to round-trip for the lookup.
|
|
23950
|
+
function _runListCategoryLabel(cat) {
|
|
23951
|
+
return ({
|
|
23952
|
+
model_error: 'Model API',
|
|
23953
|
+
model_output_error: 'Bad LLM output',
|
|
23954
|
+
tool_error: 'Tool failed',
|
|
23955
|
+
tool_timeout: 'Tool timeout',
|
|
23956
|
+
schema_error: 'Schema mismatch',
|
|
23957
|
+
context_error: 'Context exceeded',
|
|
23958
|
+
prompt_error: 'Blocked by policy',
|
|
23959
|
+
agent_loop_error: 'Loop limit',
|
|
23960
|
+
subagent_error: 'Subagent failed',
|
|
23961
|
+
infrastructure_error: 'Infrastructure',
|
|
23962
|
+
cancelled: 'Cancelled',
|
|
23963
|
+
})[cat] || cat;
|
|
23964
|
+
}
|
|
23965
|
+
function _runListCategoryColor(cat) {
|
|
23966
|
+
if (cat === 'cancelled') return 'var(--text-muted)';
|
|
23967
|
+
if (cat === 'tool_timeout' || cat === 'agent_loop_error' || cat === 'context_error') return 'var(--yellow)';
|
|
23968
|
+
if (cat === 'prompt_error' || cat === 'schema_error') return 'var(--purple)';
|
|
23969
|
+
if (cat === 'model_error' || cat === 'model_output_error') return 'var(--accent)';
|
|
23970
|
+
return 'var(--red)';
|
|
23971
|
+
}
|
|
23972
|
+
|
|
23916
23973
|
function _runListChip(label, options, stateKey) {
|
|
23917
23974
|
var current = _runListState[stateKey];
|
|
23918
23975
|
var html = '<span style="display:inline-flex;align-items:center;gap:4px">';
|
|
@@ -23946,6 +24003,7 @@ function onRunListSearch(value) {
|
|
|
23946
24003
|
function resetRunListFilters() {
|
|
23947
24004
|
_runListState.filterStatus = 'failed';
|
|
23948
24005
|
_runListState.filterWindow = '24h';
|
|
24006
|
+
_runListState.filterCategory = 'all';
|
|
23949
24007
|
_runListState.filterText = '';
|
|
23950
24008
|
_runListSaveView();
|
|
23951
24009
|
var panel = document.getElementById('panel-runs');
|
|
@@ -24384,6 +24442,162 @@ async function refreshCron() {
|
|
|
24384
24442
|
|
|
24385
24443
|
var traceData = [];
|
|
24386
24444
|
|
|
24445
|
+
// PRD Phase 4b / 1.18.86: smart router. If the run entry has a stable
|
|
24446
|
+
// runId (1.18.85+ runs), open the new Run detail viewer reading from the
|
|
24447
|
+
// Event store; otherwise fall back to the legacy trace viewer (which now
|
|
24448
|
+
// just renders the friendly empty state explaining where to find the
|
|
24449
|
+
// real error). Both viewers share the same modal shell.
|
|
24450
|
+
function openRunOrTrace(jobName, runId) {
|
|
24451
|
+
if (runId && typeof runId === 'string') {
|
|
24452
|
+
return openRunDetail(runId, jobName);
|
|
24453
|
+
}
|
|
24454
|
+
return openTraceViewer(jobName);
|
|
24455
|
+
}
|
|
24456
|
+
|
|
24457
|
+
// PRD Phase 4b / 1.18.86: Run detail viewer. Renders a waterfall of
|
|
24458
|
+
// RunEvent rows from /api/runs/:runId/events. Color-coded by kind, paired
|
|
24459
|
+
// tool_call→tool_result by toolUseId, with expandable per-span content.
|
|
24460
|
+
async function openRunDetail(runId, jobName) {
|
|
24461
|
+
document.getElementById('trace-modal-title').textContent = 'Run detail · ' + (jobName || runId);
|
|
24462
|
+
document.getElementById('trace-run-selector').innerHTML = '';
|
|
24463
|
+
document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--text-muted)">Loading run events…</div>';
|
|
24464
|
+
document.getElementById('trace-modal').classList.add('show');
|
|
24465
|
+
try {
|
|
24466
|
+
var r = await apiFetch('/api/runs/' + encodeURIComponent(runId) + '/events');
|
|
24467
|
+
var d = await r.json();
|
|
24468
|
+
if (!r.ok || d.ok === false) {
|
|
24469
|
+
document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--red)">Failed to load run: ' + esc(d.error || 'unknown') + '</div>';
|
|
24470
|
+
return;
|
|
24471
|
+
}
|
|
24472
|
+
var events = (d && d.events) || [];
|
|
24473
|
+
if (events.length === 0) {
|
|
24474
|
+
document.getElementById('trace-content').innerHTML = '<div style="padding:24px;color:var(--text-muted);line-height:1.6"><div style="font-weight:500;color:var(--text-secondary);margin-bottom:8px">No events captured for this run</div><div style="font-size:12px">Either the run pre-dates 1.18.85 (when the Event store was added) or the SDK errored before any message landed.<br/>The Recent history row carries the high-level status, error message, and goal verdict.</div></div>';
|
|
24475
|
+
return;
|
|
24476
|
+
}
|
|
24477
|
+
document.getElementById('trace-content').innerHTML = renderRunDetailWaterfall(events, runId, jobName);
|
|
24478
|
+
} catch (e) {
|
|
24479
|
+
document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--red)">Failed to load run: ' + esc(String(e)) + '</div>';
|
|
24480
|
+
}
|
|
24481
|
+
}
|
|
24482
|
+
|
|
24483
|
+
// Renders the waterfall. Each event becomes a row with:
|
|
24484
|
+
// color border (by kind) · kind badge · time offset · brief preview · expand link
|
|
24485
|
+
// tool_call rows pair with their tool_result by toolUseId so the duration
|
|
24486
|
+
// is computed and shown alongside the call.
|
|
24487
|
+
function renderRunDetailWaterfall(events, runId, jobName) {
|
|
24488
|
+
if (!events.length) return '';
|
|
24489
|
+
var firstTs = events[0].ts ? new Date(events[0].ts).getTime() : Date.now();
|
|
24490
|
+
var lastTs = events[events.length - 1].ts ? new Date(events[events.length - 1].ts).getTime() : firstTs;
|
|
24491
|
+
var totalMs = Math.max(1, lastTs - firstTs);
|
|
24492
|
+
|
|
24493
|
+
// Pair tool_call with its tool_result for duration.
|
|
24494
|
+
var resultByToolUseId = {};
|
|
24495
|
+
for (var i = 0; i < events.length; i++) {
|
|
24496
|
+
var e = events[i];
|
|
24497
|
+
if (e.kind === 'tool_result' && e.toolUseId) {
|
|
24498
|
+
resultByToolUseId[e.toolUseId] = e;
|
|
24499
|
+
}
|
|
24500
|
+
}
|
|
24501
|
+
|
|
24502
|
+
// Per-event color + label
|
|
24503
|
+
function kindColor(k) {
|
|
24504
|
+
if (k === 'session_start' || k === 'session_end') return 'var(--text-muted)';
|
|
24505
|
+
if (k === 'llm_text') return 'var(--accent)';
|
|
24506
|
+
if (k === 'thinking') return 'var(--purple)';
|
|
24507
|
+
if (k === 'tool_call') return '#22c55e';
|
|
24508
|
+
if (k === 'tool_result') return '#22c55e';
|
|
24509
|
+
if (k === 'subagent_start' || k === 'subagent_stop') return '#a855f7';
|
|
24510
|
+
if (k === 'rate_limit') return 'var(--yellow)';
|
|
24511
|
+
if (k === 'hook') return 'var(--blue)';
|
|
24512
|
+
if (k === 'error') return 'var(--red)';
|
|
24513
|
+
return 'var(--text-muted)';
|
|
24514
|
+
}
|
|
24515
|
+
function kindLabel(k) {
|
|
24516
|
+
return (k || 'event').toUpperCase().replace(/_/g, ' ');
|
|
24517
|
+
}
|
|
24518
|
+
|
|
24519
|
+
// Header strip with summary
|
|
24520
|
+
var startLabel = events[0].ts ? new Date(events[0].ts).toLocaleString() : '—';
|
|
24521
|
+
var endEvent = events.find(function(e) { return e.kind === 'session_end'; });
|
|
24522
|
+
var costStr = endEvent && endEvent.costUsd != null ? '$' + endEvent.costUsd.toFixed(4) : '—';
|
|
24523
|
+
var stopReason = endEvent && endEvent.stopReason ? endEvent.stopReason : '—';
|
|
24524
|
+
var html = '<div style="padding:16px 20px;border-bottom:1px solid var(--border);background:var(--bg-secondary);position:sticky;top:0;z-index:1">'
|
|
24525
|
+
+ '<div style="display:flex;align-items:center;gap:14px;font-size:11px;color:var(--text-muted);flex-wrap:wrap">'
|
|
24526
|
+
+ '<span><strong style="color:var(--text-primary)">' + esc(events.length) + '</strong> events</span>'
|
|
24527
|
+
+ '<span>·</span><span>started ' + esc(startLabel) + '</span>'
|
|
24528
|
+
+ '<span>·</span><span>duration <strong style="color:var(--text-primary)">' + esc(formatDurationMs(totalMs)) + '</strong></span>'
|
|
24529
|
+
+ '<span>·</span><span>cost <strong style="color:var(--text-primary)">' + esc(costStr) + '</strong></span>'
|
|
24530
|
+
+ '<span>·</span><span>stop reason <strong style="color:var(--text-primary)">' + esc(stopReason) + '</strong></span>'
|
|
24531
|
+
+ '<span style="flex:1"></span>'
|
|
24532
|
+
+ '<code style="font-size:10px;color:var(--text-muted)">runId ' + esc(String(runId).slice(0, 12)) + '…</code>'
|
|
24533
|
+
+ '</div>'
|
|
24534
|
+
+ '</div>';
|
|
24535
|
+
|
|
24536
|
+
// Waterfall rows
|
|
24537
|
+
html += '<div style="padding:0">';
|
|
24538
|
+
for (var j = 0; j < events.length; j++) {
|
|
24539
|
+
var ev = events[j];
|
|
24540
|
+
var color = kindColor(ev.kind);
|
|
24541
|
+
var label = kindLabel(ev.kind);
|
|
24542
|
+
var tsMs = ev.ts ? new Date(ev.ts).getTime() : firstTs;
|
|
24543
|
+
var offsetMs = tsMs - firstTs;
|
|
24544
|
+
var offsetLabel = offsetMs === 0 ? '+0ms' : '+' + formatDurationMs(offsetMs);
|
|
24545
|
+
var widthPct = Math.max(2, Math.min(100, (offsetMs / totalMs) * 100));
|
|
24546
|
+
// For tool_call, compute duration to its paired tool_result.
|
|
24547
|
+
var pairedDuration = '';
|
|
24548
|
+
if (ev.kind === 'tool_call' && ev.toolUseId && resultByToolUseId[ev.toolUseId]) {
|
|
24549
|
+
var resultTs = new Date(resultByToolUseId[ev.toolUseId].ts).getTime();
|
|
24550
|
+
pairedDuration = ' · ran ' + formatDurationMs(Math.max(0, resultTs - tsMs));
|
|
24551
|
+
}
|
|
24552
|
+
|
|
24553
|
+
// Brief preview: text for llm_text, thinking for thinking, tool name + first arg for tool_call/result, error for error.
|
|
24554
|
+
var preview = '';
|
|
24555
|
+
var fullContent = '';
|
|
24556
|
+
if (ev.kind === 'llm_text' && ev.text) {
|
|
24557
|
+
preview = String(ev.text).slice(0, 160).replace(/\\s+/g, ' ');
|
|
24558
|
+
fullContent = String(ev.text);
|
|
24559
|
+
} else if (ev.kind === 'thinking' && ev.thinking) {
|
|
24560
|
+
preview = String(ev.thinking).slice(0, 160).replace(/\\s+/g, ' ');
|
|
24561
|
+
fullContent = String(ev.thinking);
|
|
24562
|
+
} else if (ev.kind === 'tool_call') {
|
|
24563
|
+
preview = (ev.toolName || 'tool') + (ev.toolInput ? ' · ' + JSON.stringify(ev.toolInput).slice(0, 120) : '');
|
|
24564
|
+
fullContent = ev.toolInput ? JSON.stringify(ev.toolInput, null, 2) : '';
|
|
24565
|
+
} else if (ev.kind === 'tool_result') {
|
|
24566
|
+
preview = ev.toolError ? '✗ ' + ev.toolError : (typeof ev.toolResult === 'string' ? ev.toolResult.slice(0, 160) : (ev.toolResult ? JSON.stringify(ev.toolResult).slice(0, 160) : ''));
|
|
24567
|
+
fullContent = typeof ev.toolResult === 'string' ? ev.toolResult : JSON.stringify(ev.toolResult, null, 2);
|
|
24568
|
+
} else if (ev.kind === 'error') {
|
|
24569
|
+
preview = ev.toolError || '';
|
|
24570
|
+
fullContent = ev.toolError || '';
|
|
24571
|
+
} else if (ev.kind === 'session_start') {
|
|
24572
|
+
preview = ev.sessionId ? 'session ' + String(ev.sessionId).slice(0, 8) + '…' : '';
|
|
24573
|
+
} else if (ev.kind === 'session_end') {
|
|
24574
|
+
preview = '$' + (ev.costUsd != null ? ev.costUsd.toFixed(4) : '?') + ' · ' + (ev.stopReason || '?');
|
|
24575
|
+
}
|
|
24576
|
+
|
|
24577
|
+
var rowId = 'run-evt-' + j;
|
|
24578
|
+
var canExpand = !!fullContent && fullContent.length > preview.length;
|
|
24579
|
+
html += '<div style="display:grid;grid-template-columns:90px 110px 1fr;gap:14px;padding:10px 20px;border-bottom:1px solid var(--border);align-items:start">';
|
|
24580
|
+
html += '<div style="font-size:10px;color:var(--text-muted);font-family:\\x27JetBrains Mono\\x27,monospace;line-height:18px">' + esc(offsetLabel) + '</div>';
|
|
24581
|
+
html += '<div><span style="display:inline-block;background:' + color + '20;color:' + color + ';padding:2px 8px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em">' + esc(label) + '</span></div>';
|
|
24582
|
+
html += '<div style="min-width:0">';
|
|
24583
|
+
html += '<div style="font-size:12px;color:var(--text-primary);line-height:1.45;word-break:break-word">'
|
|
24584
|
+
+ esc(preview)
|
|
24585
|
+
+ (pairedDuration ? '<span style="color:var(--text-muted);font-size:11px"> ' + esc(pairedDuration) + '</span>' : '')
|
|
24586
|
+
+ '</div>';
|
|
24587
|
+
if (canExpand) {
|
|
24588
|
+
html += '<button class="btn-sm" onclick="document.getElementById(\\x27' + rowId + '\\x27).style.display=document.getElementById(\\x27' + rowId + '\\x27).style.display===\\x27none\\x27?\\x27block\\x27:\\x27none\\x27" style="margin-top:6px;font-size:10px;padding:2px 8px">Expand</button>';
|
|
24589
|
+
html += '<pre id="' + rowId + '" style="display:none;margin-top:8px;font-size:11px;font-family:\\x27JetBrains Mono\\x27,monospace;background:var(--bg-secondary);border:1px solid var(--border);padding:10px;border-radius:6px;white-space:pre-wrap;word-break:break-word;max-height:400px;overflow-y:auto">' + esc(fullContent) + '</pre>';
|
|
24590
|
+
}
|
|
24591
|
+
// Show toolUseId hint when present so user can correlate with logs.
|
|
24592
|
+
if (ev.toolUseId) {
|
|
24593
|
+
html += '<div style="font-size:10px;color:var(--text-muted);font-family:\\x27JetBrains Mono\\x27,monospace;margin-top:4px">use_id ' + esc(String(ev.toolUseId).slice(0, 12)) + '…</div>';
|
|
24594
|
+
}
|
|
24595
|
+
html += '</div></div>';
|
|
24596
|
+
}
|
|
24597
|
+
html += '</div>';
|
|
24598
|
+
return html;
|
|
24599
|
+
}
|
|
24600
|
+
|
|
24387
24601
|
async function openTraceViewer(jobName) {
|
|
24388
24602
|
document.getElementById('trace-modal-title').textContent = 'Trace: ' + jobName;
|
|
24389
24603
|
document.getElementById('trace-content').innerHTML = '<div style="padding:20px;color:var(--text-muted)">Loading...</div>';
|
|
@@ -25708,7 +25922,7 @@ function renderCronRunDetails(lr) {
|
|
|
25708
25922
|
if (Array.isArray(lr.mcpServersApplied) && lr.mcpServersApplied.length) {
|
|
25709
25923
|
html += '<div style="font-size:11px;color:var(--text-muted);margin-bottom:6px">MCP servers: ' + esc(lr.mcpServersApplied.join(', ')) + '</div>';
|
|
25710
25924
|
}
|
|
25711
|
-
html += '<div style="margin-top:14px;display:flex;gap:8px"><button class="btn-sm" onclick="
|
|
25925
|
+
html += '<div style="margin-top:14px;display:flex;gap:8px"><button class="btn-sm" onclick="openRunOrTrace(\\x27' + jsStr(lr.jobName || editingCronJob || '') + '\\x27,' + (lr.id ? '\\x27' + jsStr(lr.id) + '\\x27' : 'null') + ')" style="font-size:11px">' + (lr.id ? 'Open run' : 'Open trace') + '</button></div>';
|
|
25712
25926
|
html += '</div>';
|
|
25713
25927
|
return html;
|
|
25714
25928
|
}
|
|
@@ -1288,7 +1288,11 @@ export class CronScheduler {
|
|
|
1288
1288
|
const errorType = errTerminalReason
|
|
1289
1289
|
? classifyTerminalReason(errTerminalReason)
|
|
1290
1290
|
: classifyError(err);
|
|
1291
|
-
|
|
1291
|
+
// 1.18.87: stamp PRD-canonical failure category. classifyRunFailure
|
|
1292
|
+
// is sync; safe to call inline. Returns null for non-failures, but
|
|
1293
|
+
// we know this branch is the error path so it always returns a
|
|
1294
|
+
// category.
|
|
1295
|
+
const errEntry = {
|
|
1292
1296
|
jobName: job.name,
|
|
1293
1297
|
startedAt: startedAt.toISOString(),
|
|
1294
1298
|
finishedAt: finishedAt.toISOString(),
|
|
@@ -1298,12 +1302,25 @@ export class CronScheduler {
|
|
|
1298
1302
|
errorType,
|
|
1299
1303
|
terminalReason: errTerminalReason,
|
|
1300
1304
|
attempt,
|
|
1305
|
+
// 1.18.84/85 fields preserved on the error path so the Run list
|
|
1306
|
+
// can show trigger + open the partial Event log if any.
|
|
1307
|
+
trigger,
|
|
1308
|
+
...(errCronMetadata?.runId ? { id: errCronMetadata.runId } : {}),
|
|
1301
1309
|
...(errCronMetadata?.skillsApplied?.length ? { skillsApplied: errCronMetadata.skillsApplied } : {}),
|
|
1302
1310
|
...(errCronMetadata?.skillsMissing?.length ? { skillsMissing: errCronMetadata.skillsMissing } : {}),
|
|
1303
1311
|
...(errCronMetadata?.allowedToolsApplied?.length ? { allowedToolsApplied: errCronMetadata.allowedToolsApplied } : {}),
|
|
1304
1312
|
...(errCronMetadata?.mcpServersApplied?.length ? { mcpServersApplied: errCronMetadata.mcpServersApplied } : {}),
|
|
1305
1313
|
advisorApplied,
|
|
1306
|
-
}
|
|
1314
|
+
};
|
|
1315
|
+
// Lazy-import the classifier so it doesn't load on success paths.
|
|
1316
|
+
try {
|
|
1317
|
+
const { classifyRunFailure } = await import('./failure-taxonomy.js');
|
|
1318
|
+
const cat = classifyRunFailure(errEntry);
|
|
1319
|
+
if (cat)
|
|
1320
|
+
errEntry.failureCategory = cat;
|
|
1321
|
+
}
|
|
1322
|
+
catch { /* non-fatal */ }
|
|
1323
|
+
this._logRun(errEntry);
|
|
1307
1324
|
if (isCreditBalanceError(err)) {
|
|
1308
1325
|
const { block, created } = markBackgroundCreditBlocked(err);
|
|
1309
1326
|
logger.error({ err, job: job.name, until: block.until }, 'Cron hit Claude credit exhaustion — pausing background jobs');
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRD §9 / Phase 4c: 11-category failure classifier.
|
|
3
|
+
*
|
|
4
|
+
* Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
|
|
5
|
+
* dashboard's Run list filter and Run detail viewer can group failures
|
|
6
|
+
* meaningfully. Sits ABOVE the existing job-health.ts classifier (which
|
|
7
|
+
* still produces the lower-level kind used by self-improve and the
|
|
8
|
+
* advisor) — this module re-buckets job-health output into PRD vocabulary.
|
|
9
|
+
*
|
|
10
|
+
* Source signals consulted, in priority order:
|
|
11
|
+
* 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
|
|
12
|
+
* 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
|
|
13
|
+
* 3. error string heuristics — last resort.
|
|
14
|
+
*
|
|
15
|
+
* Returns null when the run is not a failure (status='ok').
|
|
16
|
+
*/
|
|
17
|
+
import type { CronRunEntry, RunFailureCategory } from '../types.js';
|
|
18
|
+
/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
|
|
19
|
+
export declare function classifyRunFailure(entry: CronRunEntry): RunFailureCategory | null;
|
|
20
|
+
/** Human-readable label for a failure category — surfaced on dashboards. */
|
|
21
|
+
export declare function failureCategoryLabel(cat: RunFailureCategory): string;
|
|
22
|
+
/** Color hint for the dashboard pill. Returns a CSS var name. */
|
|
23
|
+
export declare function failureCategoryColor(cat: RunFailureCategory): string;
|
|
24
|
+
//# sourceMappingURL=failure-taxonomy.d.ts.map
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRD §9 / Phase 4c: 11-category failure classifier.
|
|
3
|
+
*
|
|
4
|
+
* Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
|
|
5
|
+
* dashboard's Run list filter and Run detail viewer can group failures
|
|
6
|
+
* meaningfully. Sits ABOVE the existing job-health.ts classifier (which
|
|
7
|
+
* still produces the lower-level kind used by self-improve and the
|
|
8
|
+
* advisor) — this module re-buckets job-health output into PRD vocabulary.
|
|
9
|
+
*
|
|
10
|
+
* Source signals consulted, in priority order:
|
|
11
|
+
* 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
|
|
12
|
+
* 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
|
|
13
|
+
* 3. error string heuristics — last resort.
|
|
14
|
+
*
|
|
15
|
+
* Returns null when the run is not a failure (status='ok').
|
|
16
|
+
*/
|
|
17
|
+
import { classifyRunHealth } from './job-health.js';
|
|
18
|
+
/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
|
|
19
|
+
export function classifyRunFailure(entry) {
|
|
20
|
+
// Non-failures don't get a category.
|
|
21
|
+
if (entry.status === 'ok')
|
|
22
|
+
return null;
|
|
23
|
+
if (entry.status === 'skipped')
|
|
24
|
+
return null;
|
|
25
|
+
if (entry.status === 'running')
|
|
26
|
+
return null;
|
|
27
|
+
// 'cancelled' is its own status today; map directly.
|
|
28
|
+
if (entry.status === 'cancelled')
|
|
29
|
+
return 'cancelled';
|
|
30
|
+
// Lost = daemon-boot sweep closed an orphaned 'running' entry.
|
|
31
|
+
// Treated as infrastructure_error per PRD §9 — the daemon crashed.
|
|
32
|
+
if (entry.status === 'lost')
|
|
33
|
+
return 'infrastructure_error';
|
|
34
|
+
// Timeout status maps directly.
|
|
35
|
+
if (entry.status === 'timeout')
|
|
36
|
+
return 'tool_timeout';
|
|
37
|
+
// Inspect terminalReason (SDK-reported termination) first — it's the
|
|
38
|
+
// most precise signal we have.
|
|
39
|
+
switch (entry.terminalReason) {
|
|
40
|
+
case 'max_turns':
|
|
41
|
+
return 'agent_loop_error';
|
|
42
|
+
case 'prompt_too_long':
|
|
43
|
+
return 'context_error';
|
|
44
|
+
case 'rapid_refill_breaker':
|
|
45
|
+
return 'context_error';
|
|
46
|
+
case 'blocking_limit':
|
|
47
|
+
return 'tool_error';
|
|
48
|
+
case 'image_error':
|
|
49
|
+
return 'model_output_error';
|
|
50
|
+
case 'aborted_streaming':
|
|
51
|
+
case 'aborted_tools':
|
|
52
|
+
return 'cancelled';
|
|
53
|
+
case 'stop_hook_prevented':
|
|
54
|
+
case 'hook_stopped':
|
|
55
|
+
return 'prompt_error';
|
|
56
|
+
case 'tool_deferred':
|
|
57
|
+
return 'tool_error';
|
|
58
|
+
case 'model_error':
|
|
59
|
+
return 'model_error';
|
|
60
|
+
// 'completed' should never land here (status would be 'ok')
|
|
61
|
+
default:
|
|
62
|
+
// Fall through to job-health + error string heuristics
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
// High-precedence error-string patterns that should be classified
|
|
66
|
+
// BEFORE handing to job-health (which collapses "permission denied" into
|
|
67
|
+
// tool_scope, but PRD §9 says hook-blocked permission denials are
|
|
68
|
+
// prompt_error). Order matters here.
|
|
69
|
+
const earlyBlob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
|
|
70
|
+
if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(earlyBlob)) {
|
|
71
|
+
return 'prompt_error';
|
|
72
|
+
}
|
|
73
|
+
if (/^cancel|user (?:interrupt|abort|stopped)/.test(earlyBlob)) {
|
|
74
|
+
return 'cancelled';
|
|
75
|
+
}
|
|
76
|
+
if (/subagent|sub[- ]agent failed|delegated agent/.test(earlyBlob)) {
|
|
77
|
+
return 'subagent_error';
|
|
78
|
+
}
|
|
79
|
+
// Use the existing health classifier for buckets it already knows about.
|
|
80
|
+
// We use a stripped-down entry to avoid coupling to the full type.
|
|
81
|
+
try {
|
|
82
|
+
const health = classifyRunHealth(entry);
|
|
83
|
+
switch (health.status) {
|
|
84
|
+
case 'usage_blocked':
|
|
85
|
+
case 'auth':
|
|
86
|
+
case 'rate_limited':
|
|
87
|
+
return 'model_error';
|
|
88
|
+
case 'context_overflow':
|
|
89
|
+
case 'prompt_too_large':
|
|
90
|
+
return 'context_error';
|
|
91
|
+
case 'tool_scope':
|
|
92
|
+
return 'tool_error';
|
|
93
|
+
case 'partial':
|
|
94
|
+
// delivery-failed runs surface as tool_error in the new taxonomy
|
|
95
|
+
return 'tool_error';
|
|
96
|
+
case 'failed':
|
|
97
|
+
// Disambiguate via error string below
|
|
98
|
+
break;
|
|
99
|
+
case 'unknown':
|
|
100
|
+
default:
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// job-health threw — proceed with heuristics
|
|
106
|
+
}
|
|
107
|
+
// Error-string heuristics. Last-resort. Order matters: more specific
|
|
108
|
+
// patterns first so the catch-all doesn't swallow them.
|
|
109
|
+
const blob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
|
|
110
|
+
if (!blob.trim())
|
|
111
|
+
return 'infrastructure_error';
|
|
112
|
+
if (/refusal|cannot (?:assist|help|comply)|i (?:can'?t|am unable)/.test(blob))
|
|
113
|
+
return 'model_output_error';
|
|
114
|
+
if (/invalid (?:tool|function) (?:call|input|json)|malformed tool|tool .* invalid arguments/.test(blob))
|
|
115
|
+
return 'model_output_error';
|
|
116
|
+
if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(blob))
|
|
117
|
+
return 'prompt_error';
|
|
118
|
+
if (/tool .* time(d)? ?out|exceeded .* deadline|tool deadline/.test(blob))
|
|
119
|
+
return 'tool_timeout';
|
|
120
|
+
if (/schema|validation failed|did not validate|does not match schema/.test(blob))
|
|
121
|
+
return 'schema_error';
|
|
122
|
+
if (/context|too long|maximum context|exceeds.*tokens|input is too long/.test(blob))
|
|
123
|
+
return 'context_error';
|
|
124
|
+
if (/subagent|sub[- ]agent failed|delegated agent/.test(blob))
|
|
125
|
+
return 'subagent_error';
|
|
126
|
+
if (/cancel|user (?:interrupt|abort|stopped)/.test(blob))
|
|
127
|
+
return 'cancelled';
|
|
128
|
+
if (/oom|out of memory|enospc|enoent|enotfound|spawn .*ENOENT|process .* exited|terminated/.test(blob))
|
|
129
|
+
return 'infrastructure_error';
|
|
130
|
+
if (/401|403|unauthor|forbidden|invalid api key|api[- ]key/.test(blob))
|
|
131
|
+
return 'model_error';
|
|
132
|
+
if (/429|rate.?limit|quota/.test(blob))
|
|
133
|
+
return 'model_error';
|
|
134
|
+
if (/credit|billing|usage limit/.test(blob))
|
|
135
|
+
return 'model_error';
|
|
136
|
+
if (/(network|fetch|connect).*(fail|reset|refused|timeout)/.test(blob))
|
|
137
|
+
return 'infrastructure_error';
|
|
138
|
+
// Default catch-all — the run failed but the cause isn't explicit.
|
|
139
|
+
return 'tool_error';
|
|
140
|
+
}
|
|
141
|
+
/** Human-readable label for a failure category — surfaced on dashboards. */
|
|
142
|
+
export function failureCategoryLabel(cat) {
|
|
143
|
+
switch (cat) {
|
|
144
|
+
case 'model_error': return 'Model API';
|
|
145
|
+
case 'model_output_error': return 'Bad LLM output';
|
|
146
|
+
case 'tool_error': return 'Tool failed';
|
|
147
|
+
case 'tool_timeout': return 'Tool timeout';
|
|
148
|
+
case 'schema_error': return 'Schema mismatch';
|
|
149
|
+
case 'context_error': return 'Context exceeded';
|
|
150
|
+
case 'prompt_error': return 'Blocked by policy';
|
|
151
|
+
case 'agent_loop_error': return 'Loop limit';
|
|
152
|
+
case 'subagent_error': return 'Subagent failed';
|
|
153
|
+
case 'infrastructure_error': return 'Infrastructure';
|
|
154
|
+
case 'cancelled': return 'Cancelled';
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/** Color hint for the dashboard pill. Returns a CSS var name. */
|
|
158
|
+
export function failureCategoryColor(cat) {
|
|
159
|
+
switch (cat) {
|
|
160
|
+
case 'cancelled': return 'var(--text-muted)';
|
|
161
|
+
case 'tool_timeout':
|
|
162
|
+
case 'agent_loop_error':
|
|
163
|
+
case 'context_error': return 'var(--yellow)';
|
|
164
|
+
case 'prompt_error':
|
|
165
|
+
case 'schema_error': return 'var(--purple)';
|
|
166
|
+
case 'model_error':
|
|
167
|
+
case 'model_output_error': return 'var(--accent)';
|
|
168
|
+
case 'infrastructure_error': return 'var(--red)';
|
|
169
|
+
case 'tool_error':
|
|
170
|
+
case 'subagent_error': return 'var(--red)';
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=failure-taxonomy.js.map
|
package/dist/types.d.ts
CHANGED
|
@@ -448,6 +448,16 @@ export interface RunEvent {
|
|
|
448
448
|
/** Subagent id when kind='subagent_*'. */
|
|
449
449
|
agentId?: string;
|
|
450
450
|
}
|
|
451
|
+
/**
|
|
452
|
+
* PRD §9 / 1.18.87: 11-category failure taxonomy. Replaces the existing
|
|
453
|
+
* JobHealthKind union for surfacing-on-the-dashboard purposes (job-health.ts
|
|
454
|
+
* stays as the lower-level classifier and feeds into this).
|
|
455
|
+
*
|
|
456
|
+
* Stamped on CronRunEntry.failureCategory at write-time when the run is a
|
|
457
|
+
* failure (status: 'error' | 'timeout' | 'lost' | retried-final). Powers
|
|
458
|
+
* the Run list filter chip and the Run detail viewer's failure pill.
|
|
459
|
+
*/
|
|
460
|
+
export type RunFailureCategory = 'model_error' | 'model_output_error' | 'tool_error' | 'tool_timeout' | 'schema_error' | 'context_error' | 'prompt_error' | 'agent_loop_error' | 'subagent_error' | 'infrastructure_error' | 'cancelled';
|
|
451
461
|
export interface CronRunEntry {
|
|
452
462
|
/** PRD §6 / 1.18.85: stable run UUID. Optional only because pre-1.18.85
|
|
453
463
|
* entries don't have it; new entries always do. The Event store keys
|
|
@@ -500,6 +510,10 @@ export interface CronRunEntry {
|
|
|
500
510
|
* Discord) so the Run list can filter by source instead of guessing
|
|
501
511
|
* via heuristics on attempt count. */
|
|
502
512
|
trigger?: 'manual' | 'scheduled' | 'webhook' | 'api' | 'fork' | 'resume' | 'discord' | 'after';
|
|
513
|
+
/** PRD §9 / 1.18.87: PRD-canonical failure bucket. Set on every entry
|
|
514
|
+
* whose status indicates a failure (error/timeout/lost/cancelled). The
|
|
515
|
+
* Run list filter chip and Run detail header read from this field. */
|
|
516
|
+
failureCategory?: RunFailureCategory;
|
|
503
517
|
/** PRD Phase 1: did the run accomplish what it was supposed to?
|
|
504
518
|
* Computed at run-end when the Task has successSchema or successCriteriaText.
|
|
505
519
|
* - status='pass' both configured checks passed (or the only one configured did)
|