clementine-agent 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -220,6 +220,26 @@ export class SelfImproveLoop {
|
|
|
220
220
|
// Check plateau
|
|
221
221
|
if (consecutiveLow >= this.config.plateauLimit) {
|
|
222
222
|
logger.info({ consecutiveLow }, 'Plateau detected — stopping');
|
|
223
|
+
// Record the plateau in the experiment log so it's not silently
|
|
224
|
+
// invisible. Helps the dashboard and failure monitor distinguish
|
|
225
|
+
// "exhausted diverse hypotheses" from "crashed mid-run".
|
|
226
|
+
const plateauExperiment = {
|
|
227
|
+
id: randomBytes(4).toString('hex'),
|
|
228
|
+
iteration: i,
|
|
229
|
+
startedAt: new Date(loopStart).toISOString(),
|
|
230
|
+
finishedAt: new Date().toISOString(),
|
|
231
|
+
durationMs: Date.now() - loopStart,
|
|
232
|
+
area: 'soul',
|
|
233
|
+
target: 'n/a',
|
|
234
|
+
hypothesis: 'No new hypothesis — diversity constraint exhausted',
|
|
235
|
+
proposedChange: '',
|
|
236
|
+
baselineScore: 0,
|
|
237
|
+
score: 0,
|
|
238
|
+
accepted: false,
|
|
239
|
+
approvalStatus: 'denied',
|
|
240
|
+
reason: 'Plateau: no novel improvement area remaining',
|
|
241
|
+
};
|
|
242
|
+
this.appendExperimentLog(plateauExperiment);
|
|
223
243
|
break;
|
|
224
244
|
}
|
|
225
245
|
state.currentIteration = i;
|
|
@@ -175,7 +175,20 @@ export function computeBrokenJobs(now = Date.now()) {
|
|
|
175
175
|
const lastRunMs = Date.parse(lastEntry.startedAt);
|
|
176
176
|
// Always consult the breaker state — a stuck breaker is the primary
|
|
177
177
|
// signal for "job has been silently broken for days".
|
|
178
|
-
|
|
178
|
+
let cb = lastCircuitBreakerEvent(jobName);
|
|
179
|
+
// Clear a "stuck" breaker flag if we see an ok run AFTER the last
|
|
180
|
+
// breaker engagement. The scheduler only logs a circuit-recovery
|
|
181
|
+
// event when consecutiveErrors >= 5 at recovery time — but a
|
|
182
|
+
// successful manual/probe run resets consecutiveErrors to 0 first,
|
|
183
|
+
// so the recovery branch never fires and the advisor log keeps the
|
|
184
|
+
// breaker appearing engaged forever. Fix: use run-log truth instead.
|
|
185
|
+
if (cb.engagedAt) {
|
|
186
|
+
const engagedMs = Date.parse(cb.engagedAt);
|
|
187
|
+
const hasOkSinceBreaker = entries.some(e => e.status === 'ok' && Date.parse(e.startedAt) > engagedMs);
|
|
188
|
+
if (hasOkSinceBreaker) {
|
|
189
|
+
cb = { engagedAt: null, lastOpinion: cb.lastOpinion };
|
|
190
|
+
}
|
|
191
|
+
}
|
|
179
192
|
if (!cb.engagedAt && Number.isFinite(lastRunMs) && lastRunMs < dormantCutoffMs) {
|
|
180
193
|
continue;
|
|
181
194
|
}
|
|
@@ -278,24 +291,29 @@ function detectSelfImproveBreakage(now) {
|
|
|
278
291
|
}
|
|
279
292
|
catch { /* non-fatal */ }
|
|
280
293
|
}
|
|
281
|
-
const lastRunMs = state.lastRunAt ? Date.parse(state.lastRunAt) : 0;
|
|
282
|
-
const lookback48h = now - 48 * 60 * 60 * 1000;
|
|
283
294
|
const staleLookback = now - 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
284
295
|
const recentExperiments = experiments.filter(e => {
|
|
285
296
|
const ts = e.startedAt ? Date.parse(e.startedAt) : 0;
|
|
286
297
|
return Number.isFinite(ts) && ts >= staleLookback;
|
|
287
298
|
});
|
|
288
299
|
const recentErrors = recentExperiments.filter(e => e.approvalStatus === 'denied' && (e.reason?.startsWith('Error') ?? false));
|
|
289
|
-
//
|
|
290
|
-
// a. state.infraError is set
|
|
291
|
-
// b.
|
|
292
|
-
// c.
|
|
300
|
+
// Break modes we care about:
|
|
301
|
+
// a. state.infraError is set — loop detected unfixable infra issue
|
|
302
|
+
// b. state.status === 'failed' — run threw, didn't complete normally
|
|
303
|
+
// c. all 3+ most recent experiments are errors — persistent iteration failures
|
|
304
|
+
//
|
|
305
|
+
// Deliberately NOT flagging "silent early exit" (lastRunAt recent but no new
|
|
306
|
+
// experiments) when state.status === 'completed'. That's the expected
|
|
307
|
+
// plateau state: the hypothesizer returns null for every iteration because
|
|
308
|
+
// the diversity constraint has blocked every previously-targeted area, the
|
|
309
|
+
// loop skips, plateau triggers, loop exits cleanly. Not broken — saturated.
|
|
310
|
+
// Forcing alarm on a saturated-but-healthy loop would make the monitor
|
|
311
|
+
// unusable long-term.
|
|
293
312
|
const hasInfraError = !!state.infraError;
|
|
313
|
+
const runFailed = state.status === 'failed';
|
|
294
314
|
const allRecentErrored = recentExperiments.length >= 3
|
|
295
315
|
&& recentExperiments.every(e => e.approvalStatus === 'denied');
|
|
296
|
-
|
|
297
|
-
&& recentExperiments.length === 0;
|
|
298
|
-
if (!hasInfraError && !allRecentErrored && !silentEarlyExit)
|
|
316
|
+
if (!hasInfraError && !runFailed && !allRecentErrored)
|
|
299
317
|
return null;
|
|
300
318
|
const lastErrors = [];
|
|
301
319
|
for (let i = experiments.length - 1; i >= 0 && lastErrors.length < 3; i--) {
|
|
@@ -304,21 +322,12 @@ function detectSelfImproveBreakage(now) {
|
|
|
304
322
|
continue;
|
|
305
323
|
lastErrors.push(err.slice(0, 400));
|
|
306
324
|
}
|
|
307
|
-
// If we don't have an explicit infraError but the last recorded error
|
|
308
|
-
// looks schema-related, surface it — this captures the state where all
|
|
309
|
-
// iterations died with the same API 400 but state.infraError never got
|
|
310
|
-
// persisted (happens when MAX_INFRA_ERRORS isn't crossed within a run).
|
|
311
|
-
const lastLoggedError = experiments.length > 0 ? (experiments[experiments.length - 1].error ?? '') : '';
|
|
312
|
-
const inferredInfraSchema = /input_schema|tools\.\d+\.custom/i.test(lastLoggedError);
|
|
313
325
|
let opinion;
|
|
314
326
|
if (hasInfraError) {
|
|
315
327
|
opinion = `infra: ${state.infraError.category} — ${state.infraError.diagnostic.slice(0, 200)}`;
|
|
316
328
|
}
|
|
317
|
-
else if (
|
|
318
|
-
opinion = 'loop
|
|
319
|
-
}
|
|
320
|
-
else if (silentEarlyExit) {
|
|
321
|
-
opinion = 'loop ran but produced no experiments — likely crashing before iteration (check metrics gathering or hypothesis generation)';
|
|
329
|
+
else if (runFailed) {
|
|
330
|
+
opinion = 'loop exited with status=failed — check daemon log for the thrown error';
|
|
322
331
|
}
|
|
323
332
|
else {
|
|
324
333
|
opinion = `${recentErrors.length}/${recentExperiments.length} recent iterations errored`;
|