clementine-agent 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -220,6 +220,26 @@ export class SelfImproveLoop {
220
220
  // Check plateau
221
221
  if (consecutiveLow >= this.config.plateauLimit) {
222
222
  logger.info({ consecutiveLow }, 'Plateau detected — stopping');
223
+ // Record the plateau in the experiment log so it's not silently
224
+ // invisible. Helps the dashboard and failure monitor distinguish
225
+ // "exhausted diverse hypotheses" from "crashed mid-run".
226
+ const plateauExperiment = {
227
+ id: randomBytes(4).toString('hex'),
228
+ iteration: i,
229
+ startedAt: new Date(loopStart).toISOString(),
230
+ finishedAt: new Date().toISOString(),
231
+ durationMs: Date.now() - loopStart,
232
+ area: 'soul',
233
+ target: 'n/a',
234
+ hypothesis: 'No new hypothesis — diversity constraint exhausted',
235
+ proposedChange: '',
236
+ baselineScore: 0,
237
+ score: 0,
238
+ accepted: false,
239
+ approvalStatus: 'denied',
240
+ reason: 'Plateau: no novel improvement area remaining',
241
+ };
242
+ this.appendExperimentLog(plateauExperiment);
223
243
  break;
224
244
  }
225
245
  state.currentIteration = i;
@@ -175,7 +175,20 @@ export function computeBrokenJobs(now = Date.now()) {
175
175
  const lastRunMs = Date.parse(lastEntry.startedAt);
176
176
  // Always consult the breaker state — a stuck breaker is the primary
177
177
  // signal for "job has been silently broken for days".
178
- const cb = lastCircuitBreakerEvent(jobName);
178
+ let cb = lastCircuitBreakerEvent(jobName);
179
+ // Clear a "stuck" breaker flag if we see an ok run AFTER the last
180
+ // breaker engagement. The scheduler only logs a circuit-recovery
181
+ // event when consecutiveErrors >= 5 at recovery time — but a
182
+ // successful manual/probe run resets consecutiveErrors to 0 first,
183
+ // so the recovery branch never fires and the advisor log keeps the
184
+ // breaker appearing engaged forever. Fix: use run-log truth instead.
185
+ if (cb.engagedAt) {
186
+ const engagedMs = Date.parse(cb.engagedAt);
187
+ const hasOkSinceBreaker = entries.some(e => e.status === 'ok' && Date.parse(e.startedAt) > engagedMs);
188
+ if (hasOkSinceBreaker) {
189
+ cb = { engagedAt: null, lastOpinion: cb.lastOpinion };
190
+ }
191
+ }
179
192
  if (!cb.engagedAt && Number.isFinite(lastRunMs) && lastRunMs < dormantCutoffMs) {
180
193
  continue;
181
194
  }
@@ -278,24 +291,29 @@ function detectSelfImproveBreakage(now) {
278
291
  }
279
292
  catch { /* non-fatal */ }
280
293
  }
281
- const lastRunMs = state.lastRunAt ? Date.parse(state.lastRunAt) : 0;
282
- const lookback48h = now - 48 * 60 * 60 * 1000;
283
294
  const staleLookback = now - 7 * 24 * 60 * 60 * 1000; // 7 days
284
295
  const recentExperiments = experiments.filter(e => {
285
296
  const ts = e.startedAt ? Date.parse(e.startedAt) : 0;
286
297
  return Number.isFinite(ts) && ts >= staleLookback;
287
298
  });
288
299
  const recentErrors = recentExperiments.filter(e => e.approvalStatus === 'denied' && (e.reason?.startsWith('Error') ?? false));
289
- // Three break modes:
290
- // a. state.infraError is set (loop detected unfixable infra issue)
291
- // b. all 3+ most recent experiments within lookback are errors
292
- // c. loop ran recently but no new experiments appeared (silent early-exit)
300
+ // Break modes we care about:
301
+ // a. state.infraError is set loop detected unfixable infra issue
302
+ // b. state.status === 'failed' run threw, didn't complete normally
303
+ // c. all 3+ most recent experiments are errors persistent iteration failures
304
+ //
305
+ // Deliberately NOT flagging "silent early exit" (lastRunAt recent but no new
306
+ // experiments) when state.status === 'completed'. That's the expected
307
+ // plateau state: the hypothesizer returns null for every iteration because
308
+ // the diversity constraint has blocked every previously-targeted area, the
309
+ // loop skips, plateau triggers, loop exits cleanly. Not broken — saturated.
310
+ // Forcing alarm on a saturated-but-healthy loop would make the monitor
311
+ // unusable long-term.
293
312
  const hasInfraError = !!state.infraError;
313
+ const runFailed = state.status === 'failed';
294
314
  const allRecentErrored = recentExperiments.length >= 3
295
315
  && recentExperiments.every(e => e.approvalStatus === 'denied');
296
- const silentEarlyExit = lastRunMs > lookback48h
297
- && recentExperiments.length === 0;
298
- if (!hasInfraError && !allRecentErrored && !silentEarlyExit)
316
+ if (!hasInfraError && !runFailed && !allRecentErrored)
299
317
  return null;
300
318
  const lastErrors = [];
301
319
  for (let i = experiments.length - 1; i >= 0 && lastErrors.length < 3; i--) {
@@ -304,21 +322,12 @@ function detectSelfImproveBreakage(now) {
304
322
  continue;
305
323
  lastErrors.push(err.slice(0, 400));
306
324
  }
307
- // If we don't have an explicit infraError but the last recorded error
308
- // looks schema-related, surface it — this captures the state where all
309
- // iterations died with the same API 400 but state.infraError never got
310
- // persisted (happens when MAX_INFRA_ERRORS isn't crossed within a run).
311
- const lastLoggedError = experiments.length > 0 ? (experiments[experiments.length - 1].error ?? '') : '';
312
- const inferredInfraSchema = /input_schema|tools\.\d+\.custom/i.test(lastLoggedError);
313
325
  let opinion;
314
326
  if (hasInfraError) {
315
327
  opinion = `infra: ${state.infraError.category} — ${state.infraError.diagnostic.slice(0, 200)}`;
316
328
  }
317
- else if (silentEarlyExit && inferredInfraSchema) {
318
- opinion = 'loop ran but produced no experiments last logged error was an MCP tool schema validation (API 400). Check external MCP servers (claude_desktop_config.json, Claude Code settings) for a recently-updated package exposing a malformed input_schema.';
319
- }
320
- else if (silentEarlyExit) {
321
- opinion = 'loop ran but produced no experiments — likely crashing before iteration (check metrics gathering or hypothesis generation)';
329
+ else if (runFailed) {
330
+ opinion = 'loop exited with status=failedcheck daemon log for the thrown error';
322
331
  }
323
332
  else {
324
333
  opinion = `${recentErrors.length}/${recentExperiments.length} recent iterations errored`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.15",
3
+ "version": "1.0.17",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",