clementine-agent 1.18.86 → 1.18.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cron.js +11 -2
- package/dist/cli/dashboard.js +175 -2
- package/dist/gateway/cron-scheduler.js +19 -2
- package/dist/gateway/failure-taxonomy.d.ts +24 -0
- package/dist/gateway/failure-taxonomy.js +173 -0
- package/dist/types.d.ts +14 -0
- package/package.json +1 -1
package/dist/cli/cron.js
CHANGED
|
@@ -182,7 +182,7 @@ export async function cmdCronRun(jobName) {
|
|
|
182
182
|
catch (err) {
|
|
183
183
|
const finishedAt = new Date();
|
|
184
184
|
const trigger = process.env.CRON_RUN_TRIGGER || 'scheduled';
|
|
185
|
-
|
|
185
|
+
const errEntry = {
|
|
186
186
|
jobName: job.name,
|
|
187
187
|
startedAt: startedAt.toISOString(),
|
|
188
188
|
finishedAt: finishedAt.toISOString(),
|
|
@@ -192,7 +192,16 @@ export async function cmdCronRun(jobName) {
|
|
|
192
192
|
errorType: classifyError(err),
|
|
193
193
|
attempt: 1,
|
|
194
194
|
trigger,
|
|
195
|
-
}
|
|
195
|
+
};
|
|
196
|
+
// 1.18.87: stamp PRD-canonical failure category.
|
|
197
|
+
try {
|
|
198
|
+
const { classifyRunFailure } = await import('../gateway/failure-taxonomy.js');
|
|
199
|
+
const cat = classifyRunFailure(errEntry);
|
|
200
|
+
if (cat)
|
|
201
|
+
errEntry.failureCategory = cat;
|
|
202
|
+
}
|
|
203
|
+
catch { /* non-fatal */ }
|
|
204
|
+
runLog.append(errEntry);
|
|
196
205
|
console.error(`Error: ${err}`);
|
|
197
206
|
process.exit(1);
|
|
198
207
|
}
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -15207,6 +15207,40 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
15207
15207
|
/* ── Recent history row hover (Tasks page bottom zone) ── */
|
|
15208
15208
|
.history-row { transition: background 0.12s ease; }
|
|
15209
15209
|
.history-row:hover { background: var(--bg-hover); }
|
|
15210
|
+
/* PRD §12 / 1.18.88: Health Strip — six glanceable tiles above the
|
|
15211
|
+
Tasks pane. Always visible, refreshes on SSE + 30s poll. */
|
|
15212
|
+
.health-strip {
|
|
15213
|
+
display: grid;
|
|
15214
|
+
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
|
|
15215
|
+
gap: 10px;
|
|
15216
|
+
margin-bottom: 18px;
|
|
15217
|
+
}
|
|
15218
|
+
.health-tile {
|
|
15219
|
+
background: var(--bg-secondary);
|
|
15220
|
+
border: 1px solid var(--border);
|
|
15221
|
+
border-radius: var(--radius);
|
|
15222
|
+
padding: 12px 14px;
|
|
15223
|
+
display: flex;
|
|
15224
|
+
flex-direction: column;
|
|
15225
|
+
gap: 4px;
|
|
15226
|
+
}
|
|
15227
|
+
.health-tile-label {
|
|
15228
|
+
font-size: 10px;
|
|
15229
|
+
color: var(--text-muted);
|
|
15230
|
+
text-transform: uppercase;
|
|
15231
|
+
letter-spacing: 0.05em;
|
|
15232
|
+
font-weight: 500;
|
|
15233
|
+
}
|
|
15234
|
+
.health-tile-value {
|
|
15235
|
+
font-size: 20px;
|
|
15236
|
+
font-weight: 600;
|
|
15237
|
+
color: var(--text-primary);
|
|
15238
|
+
line-height: 1.2;
|
|
15239
|
+
}
|
|
15240
|
+
.health-tile-sub {
|
|
15241
|
+
font-size: 11px;
|
|
15242
|
+
color: var(--text-muted);
|
|
15243
|
+
}
|
|
15210
15244
|
/* PRD Phase 1.2: "Run task once" running-state pulse on the Last run tab. */
|
|
15211
15245
|
@keyframes pulse {
|
|
15212
15246
|
0%, 100% { opacity: 0.4; transform: scale(0.85); }
|
|
@@ -23739,6 +23773,78 @@ function renderRunningCard(item) {
|
|
|
23739
23773
|
+ '</div></div>';
|
|
23740
23774
|
}
|
|
23741
23775
|
|
|
23776
|
+
// ── PRD §12 / 1.18.88: Health Strip ───────────────────────────────────
|
|
23777
|
+
// Six glanceable tiles above the Tasks pane: 24h runs / success rate /
|
|
23778
|
+
// P50 latency / P95 latency / active runs / top failure category.
|
|
23779
|
+
// Computed client-side from /api/cron/runs (already fetched by refreshCron).
|
|
23780
|
+
async function refreshHealthStrip() {
|
|
23781
|
+
var strip = document.getElementById('health-strip');
|
|
23782
|
+
if (!strip) return;
|
|
23783
|
+
var runs = [];
|
|
23784
|
+
try {
|
|
23785
|
+
var r = await apiFetch('/api/cron/runs?limit=200');
|
|
23786
|
+
var d = await r.json();
|
|
23787
|
+
runs = (d && d.runs) || [];
|
|
23788
|
+
} catch (e) { /* leave the strip empty if fetch fails */ }
|
|
23789
|
+
// Filter to last 24h.
|
|
23790
|
+
var cutoff = Date.now() - 24 * 60 * 60 * 1000;
|
|
23791
|
+
var last24 = runs.filter(function(rn) { return rn.startedAt && new Date(rn.startedAt).getTime() >= cutoff; });
|
|
23792
|
+
// Success / failure split.
|
|
23793
|
+
var ok = last24.filter(function(rn) { return rn.status === 'ok'; }).length;
|
|
23794
|
+
var failed = last24.filter(function(rn) { return rn.status === 'error' || rn.status === 'timeout' || rn.status === 'lost'; }).length;
|
|
23795
|
+
var successRate = last24.length > 0 ? Math.round((ok / last24.length) * 100) : null;
|
|
23796
|
+
// Latency distribution (terminal runs only — exclude in-progress).
|
|
23797
|
+
var durations = last24
|
|
23798
|
+
.filter(function(rn) { return rn.durationMs != null && rn.status !== 'running' && rn.status !== 'lost'; })
|
|
23799
|
+
.map(function(rn) { return rn.durationMs; })
|
|
23800
|
+
.sort(function(a, b) { return a - b; });
|
|
23801
|
+
function pct(arr, p) {
|
|
23802
|
+
if (arr.length === 0) return null;
|
|
23803
|
+
var idx = Math.min(arr.length - 1, Math.floor(arr.length * p / 100));
|
|
23804
|
+
return arr[idx];
|
|
23805
|
+
}
|
|
23806
|
+
var p50 = pct(durations, 50);
|
|
23807
|
+
var p95 = pct(durations, 95);
|
|
23808
|
+
// Active runs come from /api/build/operations runningNow which refreshCron
|
|
23809
|
+
// has already loaded into the cronJobsData side-channel; tap operations
|
|
23810
|
+
// summary if it's hanging around in the DOM, otherwise fetch it ourselves.
|
|
23811
|
+
var activeRuns = 0;
|
|
23812
|
+
try {
|
|
23813
|
+
var ops = await apiFetch('/api/build/operations?hours=1&limit=10').then(function(rr) { return rr.json(); });
|
|
23814
|
+
activeRuns = ((ops && ops.runningNow) || []).length;
|
|
23815
|
+
} catch (e) { /* fall back to 0 */ }
|
|
23816
|
+
// Top failure category for the day.
|
|
23817
|
+
var catCounts = {};
|
|
23818
|
+
for (var i = 0; i < last24.length; i++) {
|
|
23819
|
+
var c = last24[i].failureCategory;
|
|
23820
|
+
if (c) catCounts[c] = (catCounts[c] || 0) + 1;
|
|
23821
|
+
}
|
|
23822
|
+
var topCat = null, topCount = 0;
|
|
23823
|
+
Object.keys(catCounts).forEach(function(k) {
|
|
23824
|
+
if (catCounts[k] > topCount) { topCat = k; topCount = catCounts[k]; }
|
|
23825
|
+
});
|
|
23826
|
+
// Render six tiles.
|
|
23827
|
+
function tile(label, value, sub, color) {
|
|
23828
|
+
return '<div class="health-tile">'
|
|
23829
|
+
+ '<div class="health-tile-label">' + esc(label) + '</div>'
|
|
23830
|
+
+ '<div class="health-tile-value"' + (color ? ' style="color:' + color + '"' : '') + '>' + (value === null || value === undefined ? '—' : esc(String(value))) + '</div>'
|
|
23831
|
+
+ (sub ? '<div class="health-tile-sub">' + esc(sub) + '</div>' : '')
|
|
23832
|
+
+ '</div>';
|
|
23833
|
+
}
|
|
23834
|
+
var srColor = successRate === null ? null
|
|
23835
|
+
: successRate >= 95 ? 'var(--green)'
|
|
23836
|
+
: successRate >= 80 ? 'var(--yellow)'
|
|
23837
|
+
: 'var(--red)';
|
|
23838
|
+
var html = '';
|
|
23839
|
+
html += tile('Runs · 24h', last24.length, ok + ' ok · ' + failed + ' failed');
|
|
23840
|
+
html += tile('Success rate', successRate === null ? '—' : (successRate + '%'), null, srColor);
|
|
23841
|
+
html += tile('P50 latency', p50 === null ? '—' : formatDurationMs(p50), 'median run time');
|
|
23842
|
+
html += tile('P95 latency', p95 === null ? '—' : formatDurationMs(p95), '95th percentile');
|
|
23843
|
+
html += tile('Running now', activeRuns, activeRuns === 0 ? 'idle' : 'live');
|
|
23844
|
+
html += tile('Top failure', topCat ? _runListCategoryLabel(topCat) : '—', topCat ? topCount + ' run' + (topCount === 1 ? '' : 's') : 'no failures', topCat ? _runListCategoryColor(topCat) : null);
|
|
23845
|
+
strip.innerHTML = html;
|
|
23846
|
+
}
|
|
23847
|
+
|
|
23742
23848
|
// ── PRD Phase 3: Run list ──────────────────────────────────────────────
|
|
23743
23849
|
// Single sortable/filterable table of every CronRunEntry across all tasks.
|
|
23744
23850
|
// Filters: status, task name, time window. Browser-local saved views.
|
|
@@ -23748,6 +23854,7 @@ function renderRunningCard(item) {
|
|
|
23748
23854
|
var _runListState = {
|
|
23749
23855
|
filterStatus: 'all', // 'all' | 'failed' | 'ok'
|
|
23750
23856
|
filterWindow: '24h', // '24h' | '7d' | 'all'
|
|
23857
|
+
filterCategory: 'all', // 'all' | <one of the 11 PRD failure categories>
|
|
23751
23858
|
filterText: '', // free-text task name match
|
|
23752
23859
|
data: [], // raw runs from /api/cron/runs
|
|
23753
23860
|
};
|
|
@@ -23760,13 +23867,15 @@ function _runListLoadDefaultView() {
|
|
|
23760
23867
|
var saved = JSON.parse(raw);
|
|
23761
23868
|
_runListState.filterStatus = saved.filterStatus || 'all';
|
|
23762
23869
|
_runListState.filterWindow = saved.filterWindow || '24h';
|
|
23870
|
+
_runListState.filterCategory = saved.filterCategory || 'all';
|
|
23763
23871
|
_runListState.filterText = saved.filterText || '';
|
|
23764
23872
|
return;
|
|
23765
23873
|
}
|
|
23766
23874
|
} catch (e) { /* ignore */ }
|
|
23767
|
-
// Default: failures, last 24h.
|
|
23875
|
+
// Default: failures, last 24h, all categories.
|
|
23768
23876
|
_runListState.filterStatus = 'failed';
|
|
23769
23877
|
_runListState.filterWindow = '24h';
|
|
23878
|
+
_runListState.filterCategory = 'all';
|
|
23770
23879
|
_runListState.filterText = '';
|
|
23771
23880
|
}
|
|
23772
23881
|
|
|
@@ -23775,6 +23884,7 @@ function _runListSaveView() {
|
|
|
23775
23884
|
localStorage.setItem('runListView', JSON.stringify({
|
|
23776
23885
|
filterStatus: _runListState.filterStatus,
|
|
23777
23886
|
filterWindow: _runListState.filterWindow,
|
|
23887
|
+
filterCategory: _runListState.filterCategory,
|
|
23778
23888
|
filterText: _runListState.filterText,
|
|
23779
23889
|
}));
|
|
23780
23890
|
} catch (e) { /* ignore */ }
|
|
@@ -23786,12 +23896,16 @@ function _runListApplyFilters(runs) {
|
|
|
23786
23896
|
: _runListState.filterWindow === '7d' ? 7 * 24 * 60 * 60 * 1000
|
|
23787
23897
|
: Infinity;
|
|
23788
23898
|
var query = (_runListState.filterText || '').trim().toLowerCase();
|
|
23899
|
+
var catFilter = _runListState.filterCategory;
|
|
23789
23900
|
return runs.filter(function(r) {
|
|
23790
23901
|
if (_runListState.filterStatus === 'failed') {
|
|
23791
23902
|
if (r.status !== 'error' && r.status !== 'timeout' && r.status !== 'lost') return false;
|
|
23792
23903
|
} else if (_runListState.filterStatus === 'ok') {
|
|
23793
23904
|
if (r.status !== 'ok') return false;
|
|
23794
23905
|
}
|
|
23906
|
+
if (catFilter && catFilter !== 'all') {
|
|
23907
|
+
if (r.failureCategory !== catFilter) return false;
|
|
23908
|
+
}
|
|
23795
23909
|
if (query && String(r.jobName || '').toLowerCase().indexOf(query) === -1) return false;
|
|
23796
23910
|
if (windowMs !== Infinity && r.startedAt) {
|
|
23797
23911
|
var age = now - new Date(r.startedAt).getTime();
|
|
@@ -23844,6 +23958,21 @@ function renderRunListBody(allRuns) {
|
|
|
23844
23958
|
{ value: '7d', label: 'Last 7 days' },
|
|
23845
23959
|
{ value: 'all', label: 'All time' },
|
|
23846
23960
|
], 'filterWindow');
|
|
23961
|
+
// PRD §9 / 1.18.87: 11-category failure filter. Build the option list from
|
|
23962
|
+
// the categories actually present in the loaded data so the chip row stays
|
|
23963
|
+
// compact (don't show buckets that have zero runs).
|
|
23964
|
+
var seenCats = {};
|
|
23965
|
+
for (var ci = 0; ci < allRuns.length; ci++) {
|
|
23966
|
+
var c = allRuns[ci].failureCategory;
|
|
23967
|
+
if (c) seenCats[c] = (seenCats[c] || 0) + 1;
|
|
23968
|
+
}
|
|
23969
|
+
var catOptions = [{ value: 'all', label: 'Any category' }];
|
|
23970
|
+
Object.keys(seenCats).sort().forEach(function(k) {
|
|
23971
|
+
catOptions.push({ value: k, label: _runListCategoryLabel(k) + ' (' + seenCats[k] + ')' });
|
|
23972
|
+
});
|
|
23973
|
+
if (catOptions.length > 1) {
|
|
23974
|
+
html += _runListChip('Category', catOptions, 'filterCategory');
|
|
23975
|
+
}
|
|
23847
23976
|
html += '<input type="search" placeholder="Filter by task name…" value="' + esc(_runListState.filterText) + '" oninput="onRunListSearch(this.value)" style="flex:1;min-width:200px;max-width:320px;padding:6px 10px;font-size:12px;border:1px solid var(--border);border-radius:6px;background:var(--bg-secondary);color:var(--text-primary)">';
|
|
23848
23977
|
html += '<button class="btn-sm" onclick="resetRunListFilters()" style="font-size:11px">Reset to default</button>';
|
|
23849
23978
|
html += '</div>';
|
|
@@ -23881,6 +24010,13 @@ function renderRunListBody(allRuns) {
|
|
|
23881
24010
|
: entry.trigger === 'after' ? 'var(--purple)'
|
|
23882
24011
|
: entry.trigger === 'discord' ? 'var(--blue)'
|
|
23883
24012
|
: 'var(--text-muted)';
|
|
24013
|
+
// 1.18.87: failure category badge in the preview area when set.
|
|
24014
|
+
var categoryBadge = '';
|
|
24015
|
+
if (entry.failureCategory) {
|
|
24016
|
+
var catLabel = _runListCategoryLabel(entry.failureCategory);
|
|
24017
|
+
var catColor = _runListCategoryColor(entry.failureCategory);
|
|
24018
|
+
categoryBadge = '<span style="display:inline-block;background:' + catColor + '20;color:' + catColor + ';padding:1px 6px;border-radius:4px;font-size:10px;font-weight:600;letter-spacing:0.04em;margin-right:4px">' + esc(catLabel) + '</span>';
|
|
24019
|
+
}
|
|
23884
24020
|
// Goal cell
|
|
23885
24021
|
var goalCell = '<div></div>';
|
|
23886
24022
|
if (entry.goalCheck) {
|
|
@@ -23901,6 +24037,7 @@ function renderRunListBody(allRuns) {
|
|
|
23901
24037
|
+ goalCell
|
|
23902
24038
|
+ '<div style="min-width:0">'
|
|
23903
24039
|
+ '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + (entry.attempt > 1 ? ' · attempt ' + esc(entry.attempt) : '') + '</div>'
|
|
24040
|
+
+ (categoryBadge ? '<div style="margin-top:2px">' + categoryBadge + '</div>' : '')
|
|
23904
24041
|
+ preview
|
|
23905
24042
|
+ '</div>'
|
|
23906
24043
|
+ '<div style="font-size:11px;color:' + triggerColor + ';line-height:18px">' + esc(triggerLabel) + '</div>'
|
|
@@ -23913,6 +24050,32 @@ function renderRunListBody(allRuns) {
|
|
|
23913
24050
|
return html;
|
|
23914
24051
|
}
|
|
23915
24052
|
|
|
24053
|
+
// PRD §9 / 1.18.87: failure category labels + colors mirror
|
|
24054
|
+
// failure-taxonomy.ts on the server. Kept inline so the dashboard JS
|
|
24055
|
+
// doesn't need to round-trip for the lookup.
|
|
24056
|
+
function _runListCategoryLabel(cat) {
|
|
24057
|
+
return ({
|
|
24058
|
+
model_error: 'Model API',
|
|
24059
|
+
model_output_error: 'Bad LLM output',
|
|
24060
|
+
tool_error: 'Tool failed',
|
|
24061
|
+
tool_timeout: 'Tool timeout',
|
|
24062
|
+
schema_error: 'Schema mismatch',
|
|
24063
|
+
context_error: 'Context exceeded',
|
|
24064
|
+
prompt_error: 'Blocked by policy',
|
|
24065
|
+
agent_loop_error: 'Loop limit',
|
|
24066
|
+
subagent_error: 'Subagent failed',
|
|
24067
|
+
infrastructure_error: 'Infrastructure',
|
|
24068
|
+
cancelled: 'Cancelled',
|
|
24069
|
+
})[cat] || cat;
|
|
24070
|
+
}
|
|
24071
|
+
function _runListCategoryColor(cat) {
|
|
24072
|
+
if (cat === 'cancelled') return 'var(--text-muted)';
|
|
24073
|
+
if (cat === 'tool_timeout' || cat === 'agent_loop_error' || cat === 'context_error') return 'var(--yellow)';
|
|
24074
|
+
if (cat === 'prompt_error' || cat === 'schema_error') return 'var(--purple)';
|
|
24075
|
+
if (cat === 'model_error' || cat === 'model_output_error') return 'var(--accent)';
|
|
24076
|
+
return 'var(--red)';
|
|
24077
|
+
}
|
|
24078
|
+
|
|
23916
24079
|
function _runListChip(label, options, stateKey) {
|
|
23917
24080
|
var current = _runListState[stateKey];
|
|
23918
24081
|
var html = '<span style="display:inline-flex;align-items:center;gap:4px">';
|
|
@@ -23946,6 +24109,7 @@ function onRunListSearch(value) {
|
|
|
23946
24109
|
function resetRunListFilters() {
|
|
23947
24110
|
_runListState.filterStatus = 'failed';
|
|
23948
24111
|
_runListState.filterWindow = '24h';
|
|
24112
|
+
_runListState.filterCategory = 'all';
|
|
23949
24113
|
_runListState.filterText = '';
|
|
23950
24114
|
_runListSaveView();
|
|
23951
24115
|
var panel = document.getElementById('panel-runs');
|
|
@@ -24317,7 +24481,11 @@ async function refreshCron() {
|
|
|
24317
24481
|
var visibleRunning = ownerScoped ? (ops.runningNow || []).filter(function(i) { return buildOpsOwnerMatches(i.owner || ''); }) : (ops.runningNow || []);
|
|
24318
24482
|
var ownerFilter = getBuildOwnerFilter();
|
|
24319
24483
|
|
|
24320
|
-
|
|
24484
|
+
// PRD §12 / 1.18.88: Health Strip placeholder. The runs payload from
|
|
24485
|
+
// /api/cron/runs (already fetched alongside ops) feeds the metrics.
|
|
24486
|
+
// Render an empty shell first; refreshHealthStrip fills it in.
|
|
24487
|
+
var html = '<div id="health-strip" class="health-strip"></div>';
|
|
24488
|
+
html += renderOperationsSummary(ops);
|
|
24321
24489
|
|
|
24322
24490
|
// ── Zone 1 — Running now (promoted to top, primary "what's live" view) ──
|
|
24323
24491
|
if (visibleRunning.length > 0) {
|
|
@@ -24366,6 +24534,11 @@ async function refreshCron() {
|
|
|
24366
24534
|
html += renderRecentHistoryList(historyData);
|
|
24367
24535
|
|
|
24368
24536
|
panel.innerHTML = html;
|
|
24537
|
+
// PRD §12 / 1.18.88: populate the Health Strip after the panel renders.
|
|
24538
|
+
// Fire-and-forget — the strip lives at the top, fills in async.
|
|
24539
|
+
if (typeof refreshHealthStrip === 'function') {
|
|
24540
|
+
refreshHealthStrip().catch(function() { /* non-fatal */ });
|
|
24541
|
+
}
|
|
24369
24542
|
panel.onclick = function(ev) {
|
|
24370
24543
|
var target = ev.target;
|
|
24371
24544
|
while (target && target.id !== 'panel-cron') {
|
|
@@ -1288,7 +1288,11 @@ export class CronScheduler {
|
|
|
1288
1288
|
const errorType = errTerminalReason
|
|
1289
1289
|
? classifyTerminalReason(errTerminalReason)
|
|
1290
1290
|
: classifyError(err);
|
|
1291
|
-
|
|
1291
|
+
// 1.18.87: stamp PRD-canonical failure category. classifyRunFailure
|
|
1292
|
+
// is sync; safe to call inline. Returns null for non-failures, but
|
|
1293
|
+
// we know this branch is the error path so it always returns a
|
|
1294
|
+
// category.
|
|
1295
|
+
const errEntry = {
|
|
1292
1296
|
jobName: job.name,
|
|
1293
1297
|
startedAt: startedAt.toISOString(),
|
|
1294
1298
|
finishedAt: finishedAt.toISOString(),
|
|
@@ -1298,12 +1302,25 @@ export class CronScheduler {
|
|
|
1298
1302
|
errorType,
|
|
1299
1303
|
terminalReason: errTerminalReason,
|
|
1300
1304
|
attempt,
|
|
1305
|
+
// 1.18.84/85 fields preserved on the error path so the Run list
|
|
1306
|
+
// can show trigger + open the partial Event log if any.
|
|
1307
|
+
trigger,
|
|
1308
|
+
...(errCronMetadata?.runId ? { id: errCronMetadata.runId } : {}),
|
|
1301
1309
|
...(errCronMetadata?.skillsApplied?.length ? { skillsApplied: errCronMetadata.skillsApplied } : {}),
|
|
1302
1310
|
...(errCronMetadata?.skillsMissing?.length ? { skillsMissing: errCronMetadata.skillsMissing } : {}),
|
|
1303
1311
|
...(errCronMetadata?.allowedToolsApplied?.length ? { allowedToolsApplied: errCronMetadata.allowedToolsApplied } : {}),
|
|
1304
1312
|
...(errCronMetadata?.mcpServersApplied?.length ? { mcpServersApplied: errCronMetadata.mcpServersApplied } : {}),
|
|
1305
1313
|
advisorApplied,
|
|
1306
|
-
}
|
|
1314
|
+
};
|
|
1315
|
+
// Lazy-import the classifier so it doesn't load on success paths.
|
|
1316
|
+
try {
|
|
1317
|
+
const { classifyRunFailure } = await import('./failure-taxonomy.js');
|
|
1318
|
+
const cat = classifyRunFailure(errEntry);
|
|
1319
|
+
if (cat)
|
|
1320
|
+
errEntry.failureCategory = cat;
|
|
1321
|
+
}
|
|
1322
|
+
catch { /* non-fatal */ }
|
|
1323
|
+
this._logRun(errEntry);
|
|
1307
1324
|
if (isCreditBalanceError(err)) {
|
|
1308
1325
|
const { block, created } = markBackgroundCreditBlocked(err);
|
|
1309
1326
|
logger.error({ err, job: job.name, until: block.until }, 'Cron hit Claude credit exhaustion — pausing background jobs');
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRD §9 / Phase 4c: 11-category failure classifier.
|
|
3
|
+
*
|
|
4
|
+
* Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
|
|
5
|
+
* dashboard's Run list filter and Run detail viewer can group failures
|
|
6
|
+
* meaningfully. Sits ABOVE the existing job-health.ts classifier (which
|
|
7
|
+
* still produces the lower-level kind used by self-improve and the
|
|
8
|
+
* advisor) — this module re-buckets job-health output into PRD vocabulary.
|
|
9
|
+
*
|
|
10
|
+
* Source signals consulted, in priority order:
|
|
11
|
+
* 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
|
|
12
|
+
* 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
|
|
13
|
+
* 3. error string heuristics — last resort.
|
|
14
|
+
*
|
|
15
|
+
* Returns null when the run is not a failure (status='ok').
|
|
16
|
+
*/
|
|
17
|
+
import type { CronRunEntry, RunFailureCategory } from '../types.js';
|
|
18
|
+
/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
|
|
19
|
+
export declare function classifyRunFailure(entry: CronRunEntry): RunFailureCategory | null;
|
|
20
|
+
/** Human-readable label for a failure category — surfaced on dashboards. */
|
|
21
|
+
export declare function failureCategoryLabel(cat: RunFailureCategory): string;
|
|
22
|
+
/** Color hint for the dashboard pill. Returns a CSS var name. */
|
|
23
|
+
export declare function failureCategoryColor(cat: RunFailureCategory): string;
|
|
24
|
+
//# sourceMappingURL=failure-taxonomy.d.ts.map
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRD §9 / Phase 4c: 11-category failure classifier.
|
|
3
|
+
*
|
|
4
|
+
* Maps a CronRunEntry to one of the PRD-canonical failure buckets so the
|
|
5
|
+
* dashboard's Run list filter and Run detail viewer can group failures
|
|
6
|
+
* meaningfully. Sits ABOVE the existing job-health.ts classifier (which
|
|
7
|
+
* still produces the lower-level kind used by self-improve and the
|
|
8
|
+
* advisor) — this module re-buckets job-health output into PRD vocabulary.
|
|
9
|
+
*
|
|
10
|
+
* Source signals consulted, in priority order:
|
|
11
|
+
* 1. CronRunEntry.terminalReason — most precise, comes straight from SDK.
|
|
12
|
+
* 2. job-health classifyRunHealth — already has rate_limit / auth / context_overflow / etc.
|
|
13
|
+
* 3. error string heuristics — last resort.
|
|
14
|
+
*
|
|
15
|
+
* Returns null when the run is not a failure (status='ok').
|
|
16
|
+
*/
|
|
17
|
+
import { classifyRunHealth } from './job-health.js';
|
|
18
|
+
/** Returns the PRD-canonical failure bucket, or null if the run succeeded. */
|
|
19
|
+
export function classifyRunFailure(entry) {
|
|
20
|
+
// Non-failures don't get a category.
|
|
21
|
+
if (entry.status === 'ok')
|
|
22
|
+
return null;
|
|
23
|
+
if (entry.status === 'skipped')
|
|
24
|
+
return null;
|
|
25
|
+
if (entry.status === 'running')
|
|
26
|
+
return null;
|
|
27
|
+
// 'cancelled' is its own status today; map directly.
|
|
28
|
+
if (entry.status === 'cancelled')
|
|
29
|
+
return 'cancelled';
|
|
30
|
+
// Lost = daemon-boot sweep closed an orphaned 'running' entry.
|
|
31
|
+
// Treated as infrastructure_error per PRD §9 — the daemon crashed.
|
|
32
|
+
if (entry.status === 'lost')
|
|
33
|
+
return 'infrastructure_error';
|
|
34
|
+
// Timeout status maps directly.
|
|
35
|
+
if (entry.status === 'timeout')
|
|
36
|
+
return 'tool_timeout';
|
|
37
|
+
// Inspect terminalReason (SDK-reported termination) first — it's the
|
|
38
|
+
// most precise signal we have.
|
|
39
|
+
switch (entry.terminalReason) {
|
|
40
|
+
case 'max_turns':
|
|
41
|
+
return 'agent_loop_error';
|
|
42
|
+
case 'prompt_too_long':
|
|
43
|
+
return 'context_error';
|
|
44
|
+
case 'rapid_refill_breaker':
|
|
45
|
+
return 'context_error';
|
|
46
|
+
case 'blocking_limit':
|
|
47
|
+
return 'tool_error';
|
|
48
|
+
case 'image_error':
|
|
49
|
+
return 'model_output_error';
|
|
50
|
+
case 'aborted_streaming':
|
|
51
|
+
case 'aborted_tools':
|
|
52
|
+
return 'cancelled';
|
|
53
|
+
case 'stop_hook_prevented':
|
|
54
|
+
case 'hook_stopped':
|
|
55
|
+
return 'prompt_error';
|
|
56
|
+
case 'tool_deferred':
|
|
57
|
+
return 'tool_error';
|
|
58
|
+
case 'model_error':
|
|
59
|
+
return 'model_error';
|
|
60
|
+
// 'completed' should never land here (status would be 'ok')
|
|
61
|
+
default:
|
|
62
|
+
// Fall through to job-health + error string heuristics
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
// High-precedence error-string patterns that should be classified
|
|
66
|
+
// BEFORE handing to job-health (which collapses "permission denied" into
|
|
67
|
+
// tool_scope, but PRD §9 says hook-blocked permission denials are
|
|
68
|
+
// prompt_error). Order matters here.
|
|
69
|
+
const earlyBlob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
|
|
70
|
+
if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(earlyBlob)) {
|
|
71
|
+
return 'prompt_error';
|
|
72
|
+
}
|
|
73
|
+
if (/^cancel|user (?:interrupt|abort|stopped)/.test(earlyBlob)) {
|
|
74
|
+
return 'cancelled';
|
|
75
|
+
}
|
|
76
|
+
if (/subagent|sub[- ]agent failed|delegated agent/.test(earlyBlob)) {
|
|
77
|
+
return 'subagent_error';
|
|
78
|
+
}
|
|
79
|
+
// Use the existing health classifier for buckets it already knows about.
|
|
80
|
+
// We use a stripped-down entry to avoid coupling to the full type.
|
|
81
|
+
try {
|
|
82
|
+
const health = classifyRunHealth(entry);
|
|
83
|
+
switch (health.status) {
|
|
84
|
+
case 'usage_blocked':
|
|
85
|
+
case 'auth':
|
|
86
|
+
case 'rate_limited':
|
|
87
|
+
return 'model_error';
|
|
88
|
+
case 'context_overflow':
|
|
89
|
+
case 'prompt_too_large':
|
|
90
|
+
return 'context_error';
|
|
91
|
+
case 'tool_scope':
|
|
92
|
+
return 'tool_error';
|
|
93
|
+
case 'partial':
|
|
94
|
+
// delivery-failed runs surface as tool_error in the new taxonomy
|
|
95
|
+
return 'tool_error';
|
|
96
|
+
case 'failed':
|
|
97
|
+
// Disambiguate via error string below
|
|
98
|
+
break;
|
|
99
|
+
case 'unknown':
|
|
100
|
+
default:
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// job-health threw — proceed with heuristics
|
|
106
|
+
}
|
|
107
|
+
// Error-string heuristics. Last-resort. Order matters: more specific
|
|
108
|
+
// patterns first so the catch-all doesn't swallow them.
|
|
109
|
+
const blob = ((entry.error ?? '') + ' ' + (entry.outputPreview ?? '')).toLowerCase();
|
|
110
|
+
if (!blob.trim())
|
|
111
|
+
return 'infrastructure_error';
|
|
112
|
+
if (/refusal|cannot (?:assist|help|comply)|i (?:can'?t|am unable)/.test(blob))
|
|
113
|
+
return 'model_output_error';
|
|
114
|
+
if (/invalid (?:tool|function) (?:call|input|json)|malformed tool|tool .* invalid arguments/.test(blob))
|
|
115
|
+
return 'model_output_error';
|
|
116
|
+
if (/permission denied|policy violation|prompt[- ]injection|guardrail|blocked by hook/.test(blob))
|
|
117
|
+
return 'prompt_error';
|
|
118
|
+
if (/tool .* time(d)? ?out|exceeded .* deadline|tool deadline/.test(blob))
|
|
119
|
+
return 'tool_timeout';
|
|
120
|
+
if (/schema|validation failed|did not validate|does not match schema/.test(blob))
|
|
121
|
+
return 'schema_error';
|
|
122
|
+
if (/context|too long|maximum context|exceeds.*tokens|input is too long/.test(blob))
|
|
123
|
+
return 'context_error';
|
|
124
|
+
if (/subagent|sub[- ]agent failed|delegated agent/.test(blob))
|
|
125
|
+
return 'subagent_error';
|
|
126
|
+
if (/cancel|user (?:interrupt|abort|stopped)/.test(blob))
|
|
127
|
+
return 'cancelled';
|
|
128
|
+
if (/oom|out of memory|enospc|enoent|enotfound|spawn .*ENOENT|process .* exited|terminated/.test(blob))
|
|
129
|
+
return 'infrastructure_error';
|
|
130
|
+
if (/401|403|unauthor|forbidden|invalid api key|api[- ]key/.test(blob))
|
|
131
|
+
return 'model_error';
|
|
132
|
+
if (/429|rate.?limit|quota/.test(blob))
|
|
133
|
+
return 'model_error';
|
|
134
|
+
if (/credit|billing|usage limit/.test(blob))
|
|
135
|
+
return 'model_error';
|
|
136
|
+
if (/(network|fetch|connect).*(fail|reset|refused|timeout)/.test(blob))
|
|
137
|
+
return 'infrastructure_error';
|
|
138
|
+
// Default catch-all — the run failed but the cause isn't explicit.
|
|
139
|
+
return 'tool_error';
|
|
140
|
+
}
|
|
141
|
+
/** Human-readable label for a failure category — surfaced on dashboards. */
|
|
142
|
+
export function failureCategoryLabel(cat) {
|
|
143
|
+
switch (cat) {
|
|
144
|
+
case 'model_error': return 'Model API';
|
|
145
|
+
case 'model_output_error': return 'Bad LLM output';
|
|
146
|
+
case 'tool_error': return 'Tool failed';
|
|
147
|
+
case 'tool_timeout': return 'Tool timeout';
|
|
148
|
+
case 'schema_error': return 'Schema mismatch';
|
|
149
|
+
case 'context_error': return 'Context exceeded';
|
|
150
|
+
case 'prompt_error': return 'Blocked by policy';
|
|
151
|
+
case 'agent_loop_error': return 'Loop limit';
|
|
152
|
+
case 'subagent_error': return 'Subagent failed';
|
|
153
|
+
case 'infrastructure_error': return 'Infrastructure';
|
|
154
|
+
case 'cancelled': return 'Cancelled';
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/** Color hint for the dashboard pill. Returns a CSS var name. */
|
|
158
|
+
export function failureCategoryColor(cat) {
|
|
159
|
+
switch (cat) {
|
|
160
|
+
case 'cancelled': return 'var(--text-muted)';
|
|
161
|
+
case 'tool_timeout':
|
|
162
|
+
case 'agent_loop_error':
|
|
163
|
+
case 'context_error': return 'var(--yellow)';
|
|
164
|
+
case 'prompt_error':
|
|
165
|
+
case 'schema_error': return 'var(--purple)';
|
|
166
|
+
case 'model_error':
|
|
167
|
+
case 'model_output_error': return 'var(--accent)';
|
|
168
|
+
case 'infrastructure_error': return 'var(--red)';
|
|
169
|
+
case 'tool_error':
|
|
170
|
+
case 'subagent_error': return 'var(--red)';
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=failure-taxonomy.js.map
|
package/dist/types.d.ts
CHANGED
|
@@ -448,6 +448,16 @@ export interface RunEvent {
|
|
|
448
448
|
/** Subagent id when kind='subagent_*'. */
|
|
449
449
|
agentId?: string;
|
|
450
450
|
}
|
|
451
|
+
/**
|
|
452
|
+
* PRD §9 / 1.18.87: 11-category failure taxonomy. Replaces the existing
|
|
453
|
+
* JobHealthKind union for surfacing-on-the-dashboard purposes (job-health.ts
|
|
454
|
+
* stays as the lower-level classifier and feeds into this).
|
|
455
|
+
*
|
|
456
|
+
* Stamped on CronRunEntry.failureCategory at write-time when the run is a
|
|
457
|
+
* failure (status: 'error' | 'timeout' | 'lost' | retried-final). Powers
|
|
458
|
+
* the Run list filter chip and the Run detail viewer's failure pill.
|
|
459
|
+
*/
|
|
460
|
+
export type RunFailureCategory = 'model_error' | 'model_output_error' | 'tool_error' | 'tool_timeout' | 'schema_error' | 'context_error' | 'prompt_error' | 'agent_loop_error' | 'subagent_error' | 'infrastructure_error' | 'cancelled';
|
|
451
461
|
export interface CronRunEntry {
|
|
452
462
|
/** PRD §6 / 1.18.85: stable run UUID. Optional only because pre-1.18.85
|
|
453
463
|
* entries don't have it; new entries always do. The Event store keys
|
|
@@ -500,6 +510,10 @@ export interface CronRunEntry {
|
|
|
500
510
|
* Discord) so the Run list can filter by source instead of guessing
|
|
501
511
|
* via heuristics on attempt count. */
|
|
502
512
|
trigger?: 'manual' | 'scheduled' | 'webhook' | 'api' | 'fork' | 'resume' | 'discord' | 'after';
|
|
513
|
+
/** PRD §9 / 1.18.87: PRD-canonical failure bucket. Set on every entry
|
|
514
|
+
* whose status indicates a failure (error/timeout/lost/cancelled). The
|
|
515
|
+
* Run list filter chip and Run detail header read from this field. */
|
|
516
|
+
failureCategory?: RunFailureCategory;
|
|
503
517
|
/** PRD Phase 1: did the run accomplish what it was supposed to?
|
|
504
518
|
* Computed at run-end when the Task has successSchema or successCriteriaText.
|
|
505
519
|
* - status='pass' both configured checks passed (or the only one configured did)
|