clementine-agent 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -220,6 +220,26 @@ export class SelfImproveLoop {
220
220
  // Check plateau
221
221
  if (consecutiveLow >= this.config.plateauLimit) {
222
222
  logger.info({ consecutiveLow }, 'Plateau detected — stopping');
223
+ // Record the plateau in the experiment log so it's not silently
224
+ // invisible. Helps the dashboard and failure monitor distinguish
225
+ // "exhausted diverse hypotheses" from "crashed mid-run".
226
+ const plateauExperiment = {
227
+ id: randomBytes(4).toString('hex'),
228
+ iteration: i,
229
+ startedAt: new Date(loopStart).toISOString(),
230
+ finishedAt: new Date().toISOString(),
231
+ durationMs: Date.now() - loopStart,
232
+ area: 'soul',
233
+ target: 'n/a',
234
+ hypothesis: 'No new hypothesis — diversity constraint exhausted',
235
+ proposedChange: '',
236
+ baselineScore: 0,
237
+ score: 0,
238
+ accepted: false,
239
+ approvalStatus: 'denied',
240
+ reason: 'Plateau: no novel improvement area remaining',
241
+ };
242
+ this.appendExperimentLog(plateauExperiment);
223
243
  break;
224
244
  }
225
245
  state.currentIteration = i;
@@ -278,24 +278,29 @@ function detectSelfImproveBreakage(now) {
278
278
  }
279
279
  catch { /* non-fatal */ }
280
280
  }
281
- const lastRunMs = state.lastRunAt ? Date.parse(state.lastRunAt) : 0;
282
- const lookback48h = now - 48 * 60 * 60 * 1000;
283
281
  const staleLookback = now - 7 * 24 * 60 * 60 * 1000; // 7 days
284
282
  const recentExperiments = experiments.filter(e => {
285
283
  const ts = e.startedAt ? Date.parse(e.startedAt) : 0;
286
284
  return Number.isFinite(ts) && ts >= staleLookback;
287
285
  });
288
286
  const recentErrors = recentExperiments.filter(e => e.approvalStatus === 'denied' && (e.reason?.startsWith('Error') ?? false));
289
- // Three break modes:
290
- // a. state.infraError is set (loop detected unfixable infra issue)
291
- // b. all 3+ most recent experiments within lookback are errors
292
- // c. loop ran recently but no new experiments appeared (silent early-exit)
287
+ // Break modes we care about:
288
+ // a. state.infraError is set loop detected unfixable infra issue
289
+ // b. state.status === 'failed' run threw, didn't complete normally
290
+ // c. all 3+ most recent experiments are errors persistent iteration failures
291
+ //
292
+ // Deliberately NOT flagging "silent early exit" (lastRunAt recent but no new
293
+ // experiments) when state.status === 'completed'. That's the expected
294
+ // plateau state: the hypothesizer returns null for every iteration because
295
+ // the diversity constraint has blocked every previously-targeted area, the
296
+ // loop skips, plateau triggers, loop exits cleanly. Not broken — saturated.
297
+ // Forcing alarm on a saturated-but-healthy loop would make the monitor
298
+ // unusable long-term.
293
299
  const hasInfraError = !!state.infraError;
300
+ const runFailed = state.status === 'failed';
294
301
  const allRecentErrored = recentExperiments.length >= 3
295
302
  && recentExperiments.every(e => e.approvalStatus === 'denied');
296
- const silentEarlyExit = lastRunMs > lookback48h
297
- && recentExperiments.length === 0;
298
- if (!hasInfraError && !allRecentErrored && !silentEarlyExit)
303
+ if (!hasInfraError && !runFailed && !allRecentErrored)
299
304
  return null;
300
305
  const lastErrors = [];
301
306
  for (let i = experiments.length - 1; i >= 0 && lastErrors.length < 3; i--) {
@@ -304,21 +309,12 @@ function detectSelfImproveBreakage(now) {
304
309
  continue;
305
310
  lastErrors.push(err.slice(0, 400));
306
311
  }
307
- // If we don't have an explicit infraError but the last recorded error
308
- // looks schema-related, surface it — this captures the state where all
309
- // iterations died with the same API 400 but state.infraError never got
310
- // persisted (happens when MAX_INFRA_ERRORS isn't crossed within a run).
311
- const lastLoggedError = experiments.length > 0 ? (experiments[experiments.length - 1].error ?? '') : '';
312
- const inferredInfraSchema = /input_schema|tools\.\d+\.custom/i.test(lastLoggedError);
313
312
  let opinion;
314
313
  if (hasInfraError) {
315
314
  opinion = `infra: ${state.infraError.category} — ${state.infraError.diagnostic.slice(0, 200)}`;
316
315
  }
317
- else if (silentEarlyExit && inferredInfraSchema) {
318
- opinion = 'loop ran but produced no experiments last logged error was an MCP tool schema validation (API 400). Check external MCP servers (claude_desktop_config.json, Claude Code settings) for a recently-updated package exposing a malformed input_schema.';
319
- }
320
- else if (silentEarlyExit) {
321
- opinion = 'loop ran but produced no experiments — likely crashing before iteration (check metrics gathering or hypothesis generation)';
316
+ else if (runFailed) {
317
+ opinion = 'loop exited with status=failedcheck daemon log for the thrown error';
322
318
  }
323
319
  else {
324
320
  opinion = `${recentErrors.length}/${recentExperiments.length} recent iterations errored`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",