clementine-agent 1.18.10 → 1.18.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +31 -13
- package/dist/cli/index.js +2 -0
- package/dist/gateway/cron-diagnostic-turn.d.ts +11 -0
- package/dist/gateway/cron-diagnostic-turn.js +242 -0
- package/dist/gateway/failure-diagnostics.d.ts +1 -0
- package/dist/gateway/failure-diagnostics.js +126 -11
- package/dist/gateway/router.js +28 -0
- package/dist/index.js +3 -0
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -1959,11 +1959,18 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
1959
1959
|
reason: bundles.length > 0 ? 'matched' : 'empty',
|
|
1960
1960
|
};
|
|
1961
1961
|
};
|
|
1962
|
+
const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
|
|
1962
1963
|
const promptToolRoute = routeToolSurface(promptScopeText);
|
|
1963
1964
|
const profileToolRoute = routeToolSurface(profileScopeText);
|
|
1964
1965
|
const contextToolRoute = routeToolSurface(contextRoutingText);
|
|
1966
|
+
const promptHasToolRoute = promptToolRoute.fullSurface || promptToolRoute.bundles.length > 0;
|
|
1967
|
+
const directFollowupNeedsContextTools = intentClassification?.type === 'followup'
|
|
1968
|
+
|| /^(yes|yep|yeah|go|go ahead|do it|continue|pick up|use that|run it|send it|same thing)\b/i.test(promptScopeText.trim());
|
|
1969
|
+
const allowContextToolRoute = autonomousToolRun || (!promptHasToolRoute && directFollowupNeedsContextTools);
|
|
1965
1970
|
const safeProfileToolRoute = profileToolRoute.fullSurface ? emptyToolRoute() : profileToolRoute;
|
|
1966
|
-
const safeContextToolRoute = contextToolRoute.fullSurface
|
|
1971
|
+
const safeContextToolRoute = allowContextToolRoute && !contextToolRoute.fullSurface
|
|
1972
|
+
? contextToolRoute
|
|
1973
|
+
: emptyToolRoute();
|
|
1967
1974
|
const toolRoute = mergeToolRoutes(promptToolRoute, mergeToolRoutes(safeProfileToolRoute, safeContextToolRoute));
|
|
1968
1975
|
let allowedTools = [];
|
|
1969
1976
|
const addAllowed = (...tools) => {
|
|
@@ -1977,10 +1984,9 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
1977
1984
|
};
|
|
1978
1985
|
const scopeText = [
|
|
1979
1986
|
directScopeText,
|
|
1980
|
-
contextRoutingText,
|
|
1987
|
+
allowContextToolRoute ? contextRoutingText : '',
|
|
1981
1988
|
].filter(Boolean).join('\n').toLowerCase();
|
|
1982
1989
|
const promptScopeLower = promptScopeText.toLowerCase();
|
|
1983
|
-
const autonomousToolRun = isHeartbeat || isCron || isPlanStep || isUnleashed;
|
|
1984
1990
|
const taskIntent = intentClassification?.type === 'task' || autonomousToolRun;
|
|
1985
1991
|
const memoryNeeded = autonomousToolRun
|
|
1986
1992
|
|| retrievalContext.trim().length > 0
|
|
@@ -3110,6 +3116,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3110
3116
|
// Flipped true on the first intervention; subsequent replies go through
|
|
3111
3117
|
// un-validated (but still logged).
|
|
3112
3118
|
let contradictionRetried = false;
|
|
3119
|
+
let contextRecoveryRetries = 0;
|
|
3113
3120
|
try {
|
|
3114
3121
|
for (let attempt = 0; attempt <= PersonalAssistant.RATE_LIMIT_MAX_RETRIES; attempt++) {
|
|
3115
3122
|
const sdkOptions = await this.buildOptions({
|
|
@@ -3501,7 +3508,8 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3501
3508
|
this.exchangeCounts.set(sessionKey, 0);
|
|
3502
3509
|
this._compactedSessions.delete(sessionKey);
|
|
3503
3510
|
}
|
|
3504
|
-
if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
|
|
3511
|
+
if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES && contextRecoveryRetries < 1) {
|
|
3512
|
+
contextRecoveryRetries++;
|
|
3505
3513
|
prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
|
|
3506
3514
|
preRotationSnapshot = null;
|
|
3507
3515
|
responseText = '';
|
|
@@ -3554,11 +3562,22 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3554
3562
|
if (staleSession && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
|
|
3555
3563
|
responseText = '';
|
|
3556
3564
|
if (contextRecovery) {
|
|
3557
|
-
|
|
3558
|
-
|
|
3559
|
-
|
|
3565
|
+
if (contextRecoveryRetries >= 1) {
|
|
3566
|
+
responseText = contextThrashRecoveryNotice();
|
|
3567
|
+
staleSession = false;
|
|
3568
|
+
contextRecovery = false;
|
|
3569
|
+
}
|
|
3570
|
+
else {
|
|
3571
|
+
contextRecoveryRetries++;
|
|
3572
|
+
prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
|
|
3573
|
+
preRotationSnapshot = null;
|
|
3574
|
+
contextRecovery = false;
|
|
3575
|
+
continue;
|
|
3576
|
+
}
|
|
3577
|
+
}
|
|
3578
|
+
else {
|
|
3579
|
+
continue;
|
|
3560
3580
|
}
|
|
3561
|
-
continue;
|
|
3562
3581
|
}
|
|
3563
3582
|
if (staleSession && contextRecovery && !responseText.trim()) {
|
|
3564
3583
|
responseText = contextThrashRecoveryNotice();
|
|
@@ -3588,11 +3607,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3588
3607
|
this.exchangeCounts.set(sessionKey, 0);
|
|
3589
3608
|
this._compactedSessions.delete(sessionKey);
|
|
3590
3609
|
}
|
|
3591
|
-
if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES) {
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
});
|
|
3610
|
+
if (attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES && contextRecoveryRetries < 1) {
|
|
3611
|
+
contextRecoveryRetries++;
|
|
3612
|
+
prompt = buildContextRecoveredPrompt(prompt, preRotationSnapshot);
|
|
3613
|
+
preRotationSnapshot = null;
|
|
3596
3614
|
responseText = '';
|
|
3597
3615
|
continue;
|
|
3598
3616
|
}
|
package/dist/cli/index.js
CHANGED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface CronDiagnosticRequest {
|
|
2
|
+
jobName: string;
|
|
3
|
+
wantsFix: boolean;
|
|
4
|
+
}
|
|
5
|
+
export declare function detectCronDiagnosticRequest(text: string, opts?: {
|
|
6
|
+
baseDir?: string;
|
|
7
|
+
}): CronDiagnosticRequest | null;
|
|
8
|
+
export declare function buildCronDiagnosticResponse(text: string, opts?: {
|
|
9
|
+
baseDir: string;
|
|
10
|
+
}): string | null;
|
|
11
|
+
//# sourceMappingURL=cron-diagnostic-turn.d.ts.map
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
const DIAGNOSTIC_RE = /\b(fix|repair|debug|diagnos(?:e|is|tic)?|what broke|why (?:did|is)|failed|failing|failure|broken|stuck|taking forever|too long|issue|problem)\b/i;
|
|
4
|
+
function normalizeText(text) {
|
|
5
|
+
return text.trim().toLowerCase().replace(/\s+/g, ' ');
|
|
6
|
+
}
|
|
7
|
+
function compactForMatch(text) {
|
|
8
|
+
return text
|
|
9
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
10
|
+
.toLowerCase()
|
|
11
|
+
.match(/[a-z0-9]+/g)
|
|
12
|
+
?.join('') ?? '';
|
|
13
|
+
}
|
|
14
|
+
function safeRunFileName(jobName) {
|
|
15
|
+
return jobName.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
16
|
+
}
|
|
17
|
+
function extractHyphenatedJobName(text) {
|
|
18
|
+
const match = text.match(/\b[a-z0-9]+(?:[-_:][a-z0-9]+){1,}\b/i);
|
|
19
|
+
if (!match)
|
|
20
|
+
return null;
|
|
21
|
+
const candidate = match[0].replace(/_/g, '-');
|
|
22
|
+
if (!candidate.includes('-') && !candidate.includes(':'))
|
|
23
|
+
return null;
|
|
24
|
+
return candidate;
|
|
25
|
+
}
|
|
26
|
+
function cleanYamlScalar(value) {
|
|
27
|
+
const trimmed = value.trim();
|
|
28
|
+
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
|
|
29
|
+
return trimmed.slice(1, -1);
|
|
30
|
+
}
|
|
31
|
+
return trimmed;
|
|
32
|
+
}
|
|
33
|
+
function readCronEntriesFromFile(file, agentSlug) {
|
|
34
|
+
if (!existsSync(file))
|
|
35
|
+
return [];
|
|
36
|
+
try {
|
|
37
|
+
const raw = readFileSync(file, 'utf-8');
|
|
38
|
+
const allLines = raw.split('\n');
|
|
39
|
+
const entries = [];
|
|
40
|
+
for (let start = 0; start < allLines.length; start++) {
|
|
41
|
+
const line = allLines[start] ?? '';
|
|
42
|
+
const match = line.match(/^(\s*)-\s+name:\s*(.+?)\s*$/);
|
|
43
|
+
if (!match)
|
|
44
|
+
continue;
|
|
45
|
+
const indent = match[1].length;
|
|
46
|
+
const bareName = cleanYamlScalar(match[2]);
|
|
47
|
+
if (!bareName)
|
|
48
|
+
continue;
|
|
49
|
+
let end = allLines.length;
|
|
50
|
+
for (let i = start + 1; i < allLines.length; i++) {
|
|
51
|
+
const next = allLines[i] ?? '';
|
|
52
|
+
const nextMatch = next.match(/^(\s*)-\s+name:\s+/);
|
|
53
|
+
if (nextMatch && nextMatch[1].length === indent) {
|
|
54
|
+
end = i;
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const scalarLines = [];
|
|
59
|
+
for (const entryLine of allLines.slice(start, end)) {
|
|
60
|
+
if (/^\s+prompt:\s*/.test(entryLine))
|
|
61
|
+
break;
|
|
62
|
+
if (/^\s+(- name:|schedule:|enabled:|tier:|mode:|max_hours:|max_turns:|max_retries:|agentSlug:|work_dir:|model:|timeout_ms:|always_deliver:)/.test(entryLine)) {
|
|
63
|
+
scalarLines.push(entryLine.trim());
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
entries.push({
|
|
67
|
+
jobName: agentSlug ? `${agentSlug}:${bareName}` : bareName,
|
|
68
|
+
bareName,
|
|
69
|
+
...(agentSlug ? { agentSlug } : {}),
|
|
70
|
+
scalarLines,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return entries;
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
function collectCronJobConfigs(baseDir) {
|
|
80
|
+
const configs = readCronEntriesFromFile(path.join(baseDir, 'vault', '00-System', 'CRON.md'));
|
|
81
|
+
const agentsDir = path.join(baseDir, 'vault', '00-System', 'agents');
|
|
82
|
+
if (!existsSync(agentsDir))
|
|
83
|
+
return configs;
|
|
84
|
+
try {
|
|
85
|
+
for (const slug of readdirSync(agentsDir)) {
|
|
86
|
+
configs.push(...readCronEntriesFromFile(path.join(agentsDir, slug, 'CRON.md'), slug));
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return configs;
|
|
91
|
+
}
|
|
92
|
+
return configs;
|
|
93
|
+
}
|
|
94
|
+
function configMatchesText(config, text) {
|
|
95
|
+
const compactText = compactForMatch(text);
|
|
96
|
+
const compactBare = compactForMatch(config.bareName);
|
|
97
|
+
const compactFull = compactForMatch(config.jobName);
|
|
98
|
+
if (compactBare.length >= 5 && compactText.includes(compactBare))
|
|
99
|
+
return true;
|
|
100
|
+
if (compactFull.length >= 5 && compactText.includes(compactFull))
|
|
101
|
+
return true;
|
|
102
|
+
const normalized = normalizeText(text);
|
|
103
|
+
const bare = config.bareName.toLowerCase();
|
|
104
|
+
const full = config.jobName.toLowerCase();
|
|
105
|
+
return normalized.includes(bare) || normalized.includes(full);
|
|
106
|
+
}
|
|
107
|
+
function resolveConfiguredJobName(text, baseDir) {
|
|
108
|
+
const matches = collectCronJobConfigs(baseDir)
|
|
109
|
+
.filter((config) => configMatchesText(config, text))
|
|
110
|
+
.sort((a, b) => compactForMatch(b.jobName).length - compactForMatch(a.jobName).length);
|
|
111
|
+
return matches[0]?.jobName ?? null;
|
|
112
|
+
}
|
|
113
|
+
export function detectCronDiagnosticRequest(text, opts = {}) {
|
|
114
|
+
const normalized = normalizeText(text);
|
|
115
|
+
if (!DIAGNOSTIC_RE.test(normalized))
|
|
116
|
+
return null;
|
|
117
|
+
const configuredJobName = opts.baseDir ? resolveConfiguredJobName(text, opts.baseDir) : null;
|
|
118
|
+
if (configuredJobName) {
|
|
119
|
+
return {
|
|
120
|
+
jobName: configuredJobName,
|
|
121
|
+
wantsFix: /\b(fix|repair)\b/i.test(normalized),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
const explicit = extractHyphenatedJobName(text);
|
|
125
|
+
if (!explicit)
|
|
126
|
+
return null;
|
|
127
|
+
const hasCronContext = /\b(cron|job|task|run|runs|schedule|scheduled)\b/i.test(normalized);
|
|
128
|
+
if (!hasCronContext)
|
|
129
|
+
return null;
|
|
130
|
+
return {
|
|
131
|
+
jobName: explicit,
|
|
132
|
+
wantsFix: /\b(fix|repair)\b/i.test(normalized),
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
function readRecentRuns(baseDir, jobName, limit = 20) {
|
|
136
|
+
const file = path.join(baseDir, 'cron', 'runs', `${safeRunFileName(jobName)}.jsonl`);
|
|
137
|
+
if (!existsSync(file))
|
|
138
|
+
return [];
|
|
139
|
+
try {
|
|
140
|
+
return readFileSync(file, 'utf-8')
|
|
141
|
+
.trim()
|
|
142
|
+
.split('\n')
|
|
143
|
+
.filter(Boolean)
|
|
144
|
+
.slice(-limit)
|
|
145
|
+
.map((line) => {
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(line);
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return {};
|
|
151
|
+
}
|
|
152
|
+
})
|
|
153
|
+
.filter((entry) => Object.keys(entry).length > 0);
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
return [];
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
function summarizeDuration(ms) {
|
|
160
|
+
if (typeof ms !== 'number' || !Number.isFinite(ms) || ms < 0)
|
|
161
|
+
return 'unknown duration';
|
|
162
|
+
if (ms < 1000)
|
|
163
|
+
return `${Math.round(ms)}ms`;
|
|
164
|
+
const seconds = Math.round(ms / 1000);
|
|
165
|
+
if (seconds < 90)
|
|
166
|
+
return `${seconds}s`;
|
|
167
|
+
const minutes = Math.round(seconds / 60);
|
|
168
|
+
return `${minutes}m`;
|
|
169
|
+
}
|
|
170
|
+
function oneLine(value, max = 220) {
|
|
171
|
+
return String(value ?? '')
|
|
172
|
+
.replace(/\s+/g, ' ')
|
|
173
|
+
.trim()
|
|
174
|
+
.slice(0, max);
|
|
175
|
+
}
|
|
176
|
+
function isContextThrashRun(run) {
|
|
177
|
+
if (!run)
|
|
178
|
+
return false;
|
|
179
|
+
const text = `${run.terminalReason ?? ''} ${run.error ?? ''} ${run.outputPreview ?? ''}`;
|
|
180
|
+
return /rapid_refill_breaker|autocompact\s+is\s+thrashing|context\s+refilled\s+to\s+the\s+limit/i.test(text);
|
|
181
|
+
}
|
|
182
|
+
function isMaxTurnsRun(run) {
|
|
183
|
+
if (!run)
|
|
184
|
+
return false;
|
|
185
|
+
const text = `${run.terminalReason ?? ''} ${run.error ?? ''} ${run.outputPreview ?? ''}`;
|
|
186
|
+
return /max_turns|maximum number of turns/i.test(text);
|
|
187
|
+
}
|
|
188
|
+
function readCronScalarConfig(baseDir, jobName) {
|
|
189
|
+
const bareName = jobName.includes(':') ? jobName.split(':').slice(1).join(':') : jobName;
|
|
190
|
+
const config = collectCronJobConfigs(baseDir).find((entry) => entry.jobName === jobName || entry.bareName === bareName);
|
|
191
|
+
return config?.scalarLines ?? [];
|
|
192
|
+
}
|
|
193
|
+
export function buildCronDiagnosticResponse(text, opts = { baseDir: process.env.CLEMENTINE_HOME || '' }) {
|
|
194
|
+
const request = detectCronDiagnosticRequest(text, { baseDir: opts.baseDir });
|
|
195
|
+
if (!request || !opts.baseDir)
|
|
196
|
+
return null;
|
|
197
|
+
const runs = readRecentRuns(opts.baseDir, request.jobName, 20);
|
|
198
|
+
const latest = runs.at(-1);
|
|
199
|
+
const previousSuccess = runs.slice(0, -1).reverse().find((run) => run.status === 'ok' && !isContextThrashRun(run));
|
|
200
|
+
const config = readCronScalarConfig(opts.baseDir, request.jobName);
|
|
201
|
+
const lines = [
|
|
202
|
+
`I found ${request.jobName}. I am not running the job.`,
|
|
203
|
+
];
|
|
204
|
+
if (!latest) {
|
|
205
|
+
lines.push('I do not see a run history file for that job yet, so there is nothing concrete to repair from logs.');
|
|
206
|
+
if (config.length > 0)
|
|
207
|
+
lines.push(`Current config: ${config.join(' | ')}`);
|
|
208
|
+
return lines.join('\n');
|
|
209
|
+
}
|
|
210
|
+
const latestLabel = [
|
|
211
|
+
latest.startedAt ? `started ${latest.startedAt}` : 'latest run',
|
|
212
|
+
latest.status ? `status ${latest.status}` : '',
|
|
213
|
+
latest.terminalReason ? `terminal ${latest.terminalReason}` : '',
|
|
214
|
+
summarizeDuration(latest.durationMs),
|
|
215
|
+
].filter(Boolean).join(', ');
|
|
216
|
+
lines.push(`Latest run: ${latestLabel}.`);
|
|
217
|
+
if (previousSuccess?.startedAt) {
|
|
218
|
+
lines.push(`Last clean success I see: ${previousSuccess.startedAt} (${summarizeDuration(previousSuccess.durationMs)}).`);
|
|
219
|
+
}
|
|
220
|
+
if (config.length > 0) {
|
|
221
|
+
lines.push(`Current config: ${config.join(' | ')}.`);
|
|
222
|
+
}
|
|
223
|
+
if (isContextThrashRun(latest)) {
|
|
224
|
+
lines.push('Root cause: context overflow/autocompact thrash, not a downstream integration failure unless the run error says so. The job or diagnostic path is letting broad file reads, run history, or raw tool output fill the context window.');
|
|
225
|
+
lines.push('Safe fix: tighten the job prompt or add a job prompt override. Keep reads bounded, cap large record pulls to small batches, summarize raw JSON from temp files, and do not treat this as a max_turns-only fix for an unleashed job.');
|
|
226
|
+
}
|
|
227
|
+
else if (isMaxTurnsRun(latest)) {
|
|
228
|
+
lines.push('Root cause: the job hit its turn cap before finishing. A max_turns bump can help only if the output is already bounded.');
|
|
229
|
+
}
|
|
230
|
+
else if (latest.status === 'ok') {
|
|
231
|
+
lines.push('The latest run is recorded as ok. I would not change the job unless the delivered result was wrong.');
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
const detail = oneLine(latest.error || latest.outputPreview);
|
|
235
|
+
lines.push(`Root cause from latest run: ${detail || 'the run failed without a useful error preview.'}`);
|
|
236
|
+
}
|
|
237
|
+
if (request.wantsFix) {
|
|
238
|
+
lines.push('Next change should be a config/prompt repair, not a retry. I should return this diagnosis quickly and only apply a bounded prompt/config patch when asked.');
|
|
239
|
+
}
|
|
240
|
+
return lines.join('\n');
|
|
241
|
+
}
|
|
242
|
+
//# sourceMappingURL=cron-diagnostic-turn.js.map
|
|
@@ -65,6 +65,7 @@ export interface Diagnosis {
|
|
|
65
65
|
riskLevel: 'low' | 'medium' | 'high';
|
|
66
66
|
generatedAt: string;
|
|
67
67
|
}
|
|
68
|
+
export declare function diagnoseKnownFailurePattern(broken: BrokenJob, jobDef: string | null, recentRuns: string): Diagnosis | null;
|
|
68
69
|
/**
|
|
69
70
|
* Diagnose one broken job. Returns a cached diagnosis if one exists and is
|
|
70
71
|
* fresher than 24h; otherwise invokes the LLM. Always best-effort — returns
|
|
@@ -79,12 +79,18 @@ function readJobDefinition(jobName) {
|
|
|
79
79
|
continue;
|
|
80
80
|
try {
|
|
81
81
|
const raw = readFileSync(file, 'utf-8');
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
82
|
+
const lines = raw.split('\n');
|
|
83
|
+
const start = lines.findIndex((line) => line.trim() === `- name: ${bareName}`);
|
|
84
|
+
if (start === -1)
|
|
85
|
+
continue;
|
|
86
|
+
let end = lines.length;
|
|
87
|
+
for (let i = start + 1; i < lines.length; i++) {
|
|
88
|
+
if (/^ - name:\s+/.test(lines[i] ?? '')) {
|
|
89
|
+
end = i;
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return lines.slice(start, end).join('\n').slice(0, 6000);
|
|
88
94
|
}
|
|
89
95
|
catch { /* skip */ }
|
|
90
96
|
}
|
|
@@ -117,10 +123,12 @@ function readRecentRuns(jobName, limit = 10) {
|
|
|
117
123
|
const summaries = recent.map(line => {
|
|
118
124
|
try {
|
|
119
125
|
const d = JSON.parse(line);
|
|
120
|
-
const
|
|
121
|
-
? `
|
|
122
|
-
|
|
123
|
-
|
|
126
|
+
const detailParts = [
|
|
127
|
+
d.terminalReason ? `terminal=${d.terminalReason}` : '',
|
|
128
|
+
d.error ? `error="${d.error.split('\n')[0].slice(0, 160)}"` : '',
|
|
129
|
+
d.outputPreview ? `preview="${d.outputPreview.slice(0, 160).replace(/\n/g, ' ')}"` : '',
|
|
130
|
+
].filter(Boolean);
|
|
131
|
+
return `${d.startedAt} ${d.status} (${Math.round(d.durationMs / 1000)}s) ${detailParts.join(' ')}`;
|
|
124
132
|
}
|
|
125
133
|
catch {
|
|
126
134
|
return line.slice(0, 160);
|
|
@@ -215,6 +223,101 @@ function buildPrompt(broken, jobDef, agentProfile, recentRuns) {
|
|
|
215
223
|
'}',
|
|
216
224
|
].filter(Boolean).join('\n');
|
|
217
225
|
}
|
|
226
|
+
function bareJobName(jobName) {
|
|
227
|
+
return jobName.includes(':') ? jobName.split(':').slice(1).join(':') : jobName;
|
|
228
|
+
}
|
|
229
|
+
function promptOverrideForContextOverflow(jobName) {
|
|
230
|
+
return {
|
|
231
|
+
kind: 'prompt-override',
|
|
232
|
+
scope: 'job',
|
|
233
|
+
scopeKey: bareJobName(jobName),
|
|
234
|
+
content: [
|
|
235
|
+
'# Bounded Run Guidance',
|
|
236
|
+
'',
|
|
237
|
+
'Keep this job inside the context window.',
|
|
238
|
+
'- Do not read full CRON.md, full run histories, or raw integration exports.',
|
|
239
|
+
'- Pull records in batches of 20 or fewer unless the job prompt gives a smaller cap.',
|
|
240
|
+
'- Redirect large command/API output to temp files and summarize IDs, counts, names, statuses, and next actions only.',
|
|
241
|
+
'- Never paste raw integration, email, browser, tool, or other large JSON output into the conversation.',
|
|
242
|
+
'- If context starts filling, stop with a concise partial summary and pending list instead of retrying broad reads.',
|
|
243
|
+
].join('\n'),
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
export function diagnoseKnownFailurePattern(broken, jobDef, recentRuns) {
|
|
247
|
+
const haystack = [
|
|
248
|
+
broken.jobName,
|
|
249
|
+
broken.lastAdvisorOpinion ?? '',
|
|
250
|
+
...broken.lastErrors,
|
|
251
|
+
recentRuns,
|
|
252
|
+
].join('\n').toLowerCase();
|
|
253
|
+
if (/rapid_refill_breaker|autocompact.*thrash|context refilled|prompt is too long|prompt too long|context.?length|maximum context|input is too long/.test(haystack)) {
|
|
254
|
+
const autoApply = jobDef ? promptOverrideForContextOverflow(broken.jobName) : undefined;
|
|
255
|
+
return {
|
|
256
|
+
rootCause: 'The job is overflowing the Claude context window. This is usually caused by broad file reads, full run-history reads, or raw integration output being pulled into the prompt.',
|
|
257
|
+
confidence: 'high',
|
|
258
|
+
proposedFix: {
|
|
259
|
+
type: autoApply ? 'prompt_override' : 'prompt_change',
|
|
260
|
+
details: 'Bound the job/diagnostic prompt: read tight chunks, cap batches at 20 records, summarize raw API output from temp files, and stop with a partial summary instead of retrying when context gets tight.',
|
|
261
|
+
...(autoApply ? { autoApply } : {}),
|
|
262
|
+
},
|
|
263
|
+
riskLevel: 'low',
|
|
264
|
+
generatedAt: new Date().toISOString(),
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
const maxTurns = haystack.match(/maximum number of turns\s*\(?(\d+)?\)?|max_turns/i);
|
|
268
|
+
if (maxTurns) {
|
|
269
|
+
const observed = Number(maxTurns[1]);
|
|
270
|
+
const next = Number.isFinite(observed) && observed > 0 ? Math.min(90, Math.max(15, observed * 3)) : 30;
|
|
271
|
+
return {
|
|
272
|
+
rootCause: 'The job reached its turn cap before finishing.',
|
|
273
|
+
confidence: 'high',
|
|
274
|
+
proposedFix: {
|
|
275
|
+
type: 'config_change',
|
|
276
|
+
details: `Raise max_turns to ${next} only if the prompt already keeps tool output bounded. If output is large, add bounded-output guidance first.`,
|
|
277
|
+
...(jobDef ? {
|
|
278
|
+
autoApply: {
|
|
279
|
+
kind: 'cron',
|
|
280
|
+
operations: [{ op: 'set', field: 'max_turns', value: next }],
|
|
281
|
+
},
|
|
282
|
+
} : {}),
|
|
283
|
+
},
|
|
284
|
+
riskLevel: 'medium',
|
|
285
|
+
generatedAt: new Date().toISOString(),
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
if (/\b(401|403)\b|not authenticated|invalid api key|credential|please run \/login|does not have access/.test(haystack)) {
|
|
289
|
+
return {
|
|
290
|
+
rootCause: 'The latest failures look credential-related.',
|
|
291
|
+
confidence: 'high',
|
|
292
|
+
proposedFix: {
|
|
293
|
+
type: 'credential_refresh',
|
|
294
|
+
details: 'Refresh the affected integration credentials, then run a small probe before re-enabling full job volume.',
|
|
295
|
+
},
|
|
296
|
+
riskLevel: 'low',
|
|
297
|
+
generatedAt: new Date().toISOString(),
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
if (/no local bash|permission denied|blocked|task_blocked/.test(haystack)) {
|
|
301
|
+
const autoApply = jobDef ? {
|
|
302
|
+
kind: 'prompt-override',
|
|
303
|
+
scope: 'job',
|
|
304
|
+
scopeKey: bareJobName(broken.jobName),
|
|
305
|
+
content: 'Use only tools available to this agent. If local shell access is unavailable, report BLOCKED with the missing capability and do not retry the same unavailable tool.',
|
|
306
|
+
} : undefined;
|
|
307
|
+
return {
|
|
308
|
+
rootCause: 'The job appears to be selecting a tool or capability that is unavailable in its current agent scope.',
|
|
309
|
+
confidence: 'medium',
|
|
310
|
+
proposedFix: {
|
|
311
|
+
type: autoApply ? 'prompt_override' : 'agent_scope',
|
|
312
|
+
details: 'Tighten the job prompt or agent scope so it only uses available tools, and make unavailable-tool failures stop instead of looping.',
|
|
313
|
+
...(autoApply ? { autoApply } : {}),
|
|
314
|
+
},
|
|
315
|
+
riskLevel: 'medium',
|
|
316
|
+
generatedAt: new Date().toISOString(),
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
return null;
|
|
320
|
+
}
|
|
218
321
|
function parseResponse(raw) {
|
|
219
322
|
try {
|
|
220
323
|
// The model sometimes wraps the JSON in markdown fences; extract the
|
|
@@ -348,12 +451,24 @@ export async function diagnoseBrokenJob(broken, gateway) {
|
|
|
348
451
|
const jobDef = readJobDefinition(broken.jobName);
|
|
349
452
|
const agentProfile = broken.agentSlug ? readAgentProfile(broken.agentSlug) : null;
|
|
350
453
|
const recentRuns = readRecentRuns(broken.jobName, 10);
|
|
454
|
+
const knownDiagnosis = diagnoseKnownFailurePattern(broken, jobDef, recentRuns);
|
|
455
|
+
if (knownDiagnosis) {
|
|
456
|
+
cache[broken.jobName] = knownDiagnosis;
|
|
457
|
+
saveCache(cache);
|
|
458
|
+
logger.info({
|
|
459
|
+
job: broken.jobName,
|
|
460
|
+
confidence: knownDiagnosis.confidence,
|
|
461
|
+
fixType: knownDiagnosis.proposedFix.type,
|
|
462
|
+
}, 'Broken-job diagnosis generated from known pattern');
|
|
463
|
+
return knownDiagnosis;
|
|
464
|
+
}
|
|
351
465
|
const prompt = buildPrompt(broken, jobDef, agentProfile, recentRuns);
|
|
352
466
|
let rawResponse;
|
|
353
467
|
try {
|
|
354
468
|
rawResponse = await gateway.handleCronJob(`diagnose:${broken.jobName}`, prompt, 1, // tier 1 — cheap
|
|
355
469
|
5, // maxTurns — diagnosis doesn't need tools typically
|
|
356
|
-
'haiku'
|
|
470
|
+
'haiku', // model — keep cost negligible
|
|
471
|
+
undefined, 'standard', undefined, undefined, undefined, undefined, { disableAllTools: true });
|
|
357
472
|
}
|
|
358
473
|
catch (err) {
|
|
359
474
|
logger.warn({ err, job: broken.jobName }, 'Diagnostic LLM call failed');
|
package/dist/gateway/router.js
CHANGED
|
@@ -20,6 +20,7 @@ import { events } from '../events/bus.js';
|
|
|
20
20
|
import { listBackgroundTasks } from '../agent/background-tasks.js';
|
|
21
21
|
import { applyAssistantExperienceUpdate, detectLocalTurn } from '../agent/local-turn.js';
|
|
22
22
|
import { updateClementineJson } from '../config/clementine-json.js';
|
|
23
|
+
import { buildCronDiagnosticResponse } from './cron-diagnostic-turn.js';
|
|
23
24
|
const logger = pino({ name: 'clementine.gateway' });
|
|
24
25
|
const INTERACTIVE_FAILURE_LOG = path.join(BASE_DIR, 'self-improve', 'interactive-failures.jsonl');
|
|
25
26
|
/** Idle timeout for interactive chat messages (10 minutes).
|
|
@@ -1006,6 +1007,33 @@ export class Gateway {
|
|
|
1006
1007
|
}, 'chat:latency');
|
|
1007
1008
|
return localResponse;
|
|
1008
1009
|
}
|
|
1010
|
+
// Cron "what broke / fix this job" asks should not spin up a broad SDK
|
|
1011
|
+
// session. They are bounded local diagnostics over run summaries and scalar
|
|
1012
|
+
// config only, and they intentionally do not execute the cron job.
|
|
1013
|
+
if (this.isTrustedPersonalSession(sessionKey)) {
|
|
1014
|
+
const cronDiagnostic = buildCronDiagnosticResponse(text, { baseDir: BASE_DIR });
|
|
1015
|
+
if (cronDiagnostic) {
|
|
1016
|
+
const current = this.sessions.get(sessionKey);
|
|
1017
|
+
if (current?.abortController && !current.abortController.signal.aborted) {
|
|
1018
|
+
current.abortController.abort('replaced-by-cron-diagnostic');
|
|
1019
|
+
logger.info({ sessionKey }, 'Interrupted active chat for local cron diagnostic');
|
|
1020
|
+
}
|
|
1021
|
+
if (onText) {
|
|
1022
|
+
try {
|
|
1023
|
+
await onText(cronDiagnostic);
|
|
1024
|
+
}
|
|
1025
|
+
catch { /* channel streaming is best-effort */ }
|
|
1026
|
+
}
|
|
1027
|
+
logger.info({
|
|
1028
|
+
sessionKey,
|
|
1029
|
+
totalMs: Date.now() - tInnerStart,
|
|
1030
|
+
chatMs: Date.now() - localTurnStarted,
|
|
1031
|
+
localCronDiagnostic: true,
|
|
1032
|
+
responseLen: cronDiagnostic.length,
|
|
1033
|
+
}, 'chat:latency');
|
|
1034
|
+
return cronDiagnostic;
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1009
1037
|
// Show "queued" status if either lane or session lock is contended,
|
|
1010
1038
|
// so the user doesn't stare at "thinking..." for up to 60s while a
|
|
1011
1039
|
// previous message is still processing.
|
package/dist/index.js
CHANGED
|
@@ -55,6 +55,9 @@ function killPid(pid) {
|
|
|
55
55
|
function stopLaunchdService() {
|
|
56
56
|
if (process.platform !== 'darwin')
|
|
57
57
|
return false;
|
|
58
|
+
if (process.env.XPC_SERVICE_NAME === LAUNCHD_LABEL || process.env.CLEMENTINE_LAUNCHD_MANAGED === '1') {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
58
61
|
const home = process.env.HOME ?? '';
|
|
59
62
|
const plist = path.join(home, 'Library', 'LaunchAgents', `${LAUNCHD_LABEL}.plist`);
|
|
60
63
|
if (!existsSync(plist))
|