npm - clementine-agent - Versions diffs - 1.0.15 → 1.0.17 - Mend

clementine-agent 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/agent/self-improve.js +20 -0
package/dist/gateway/failure-monitor.js +30 -21
package/package.json +1 -1

package/dist/agent/self-improve.js CHANGED Viewed

@@ -220,6 +220,26 @@ export class SelfImproveLoop {
                 // Check plateau
                 if (consecutiveLow >= this.config.plateauLimit) {
                     logger.info({ consecutiveLow }, 'Plateau detected — stopping');
+                    // Record the plateau in the experiment log so it's not silently
+                    // invisible. Helps the dashboard and failure monitor distinguish
+                    // "exhausted diverse hypotheses" from "crashed mid-run".
+                    const plateauExperiment = {
+                        id: randomBytes(4).toString('hex'),
+                        iteration: i,
+                        startedAt: new Date(loopStart).toISOString(),
+                        finishedAt: new Date().toISOString(),
+                        durationMs: Date.now() - loopStart,
+                        area: 'soul',
+                        target: 'n/a',
+                        hypothesis: 'No new hypothesis — diversity constraint exhausted',
+                        proposedChange: '',
+                        baselineScore: 0,
+                        score: 0,
+                        accepted: false,
+                        approvalStatus: 'denied',
+                        reason: 'Plateau: no novel improvement area remaining',
+                    };
+                    this.appendExperimentLog(plateauExperiment);
                     break;
                 }
                 state.currentIteration = i;

package/dist/gateway/failure-monitor.js CHANGED Viewed

@@ -175,7 +175,20 @@ export function computeBrokenJobs(now = Date.now()) {
         const lastRunMs = Date.parse(lastEntry.startedAt);
         // Always consult the breaker state — a stuck breaker is the primary
         // signal for "job has been silently broken for days".
-        const cb = lastCircuitBreakerEvent(jobName);
+        let cb = lastCircuitBreakerEvent(jobName);
+        // Clear a "stuck" breaker flag if we see an ok run AFTER the last
+        // breaker engagement. The scheduler only logs a circuit-recovery
+        // event when consecutiveErrors >= 5 at recovery time — but a
+        // successful manual/probe run resets consecutiveErrors to 0 first,
+        // so the recovery branch never fires and the advisor log keeps the
+        // breaker appearing engaged forever. Fix: use run-log truth instead.
+        if (cb.engagedAt) {
+            const engagedMs = Date.parse(cb.engagedAt);
+            const hasOkSinceBreaker = entries.some(e => e.status === 'ok' && Date.parse(e.startedAt) > engagedMs);
+            if (hasOkSinceBreaker) {
+                cb = { engagedAt: null, lastOpinion: cb.lastOpinion };
+            }
+        }
         if (!cb.engagedAt && Number.isFinite(lastRunMs) && lastRunMs < dormantCutoffMs) {
             continue;
         }
@@ -278,24 +291,29 @@ function detectSelfImproveBreakage(now) {
         }
         catch { /* non-fatal */ }
     }
-    const lastRunMs = state.lastRunAt ? Date.parse(state.lastRunAt) : 0;
-    const lookback48h = now - 48 * 60 * 60 * 1000;
     const staleLookback = now - 7 * 24 * 60 * 60 * 1000; // 7 days
     const recentExperiments = experiments.filter(e => {
         const ts = e.startedAt ? Date.parse(e.startedAt) : 0;
         return Number.isFinite(ts) && ts >= staleLookback;
     });
     const recentErrors = recentExperiments.filter(e => e.approvalStatus === 'denied' && (e.reason?.startsWith('Error') ?? false));
-    // Three break modes:
-    //  a. state.infraError is set (loop detected unfixable infra issue)
-    //  b. all 3+ most recent experiments within lookback are errors
-    //  c. loop ran recently but no new experiments appeared (silent early-exit)
+    // Break modes we care about:
+    //  a. state.infraError is set — loop detected unfixable infra issue
+    //  b. state.status === 'failed' — run threw, didn't complete normally
+    //  c. all 3+ most recent experiments are errors — persistent iteration failures
+    //
+    // Deliberately NOT flagging "silent early exit" (lastRunAt recent but no new
+    // experiments) when state.status === 'completed'. That's the expected
+    // plateau state: the hypothesizer returns null for every iteration because
+    // the diversity constraint has blocked every previously-targeted area, the
+    // loop skips, plateau triggers, loop exits cleanly. Not broken — saturated.
+    // Forcing alarm on a saturated-but-healthy loop would make the monitor
+    // unusable long-term.
     const hasInfraError = !!state.infraError;
+    const runFailed = state.status === 'failed';
     const allRecentErrored = recentExperiments.length >= 3
         && recentExperiments.every(e => e.approvalStatus === 'denied');
-    const silentEarlyExit = lastRunMs > lookback48h
-        && recentExperiments.length === 0;
-    if (!hasInfraError && !allRecentErrored && !silentEarlyExit)
+    if (!hasInfraError && !runFailed && !allRecentErrored)
         return null;
     const lastErrors = [];
     for (let i = experiments.length - 1; i >= 0 && lastErrors.length < 3; i--) {
@@ -304,21 +322,12 @@ function detectSelfImproveBreakage(now) {
             continue;
         lastErrors.push(err.slice(0, 400));
     }
-    // If we don't have an explicit infraError but the last recorded error
-    // looks schema-related, surface it — this captures the state where all
-    // iterations died with the same API 400 but state.infraError never got
-    // persisted (happens when MAX_INFRA_ERRORS isn't crossed within a run).
-    const lastLoggedError = experiments.length > 0 ? (experiments[experiments.length - 1].error ?? '') : '';
-    const inferredInfraSchema = /input_schema|tools\.\d+\.custom/i.test(lastLoggedError);
     let opinion;
     if (hasInfraError) {
         opinion = `infra: ${state.infraError.category} — ${state.infraError.diagnostic.slice(0, 200)}`;
     }
-    else if (silentEarlyExit && inferredInfraSchema) {
-        opinion = 'loop ran but produced no experiments — last logged error was an MCP tool schema validation (API 400). Check external MCP servers (claude_desktop_config.json, Claude Code settings) for a recently-updated package exposing a malformed input_schema.';
-    }
-    else if (silentEarlyExit) {
-        opinion = 'loop ran but produced no experiments — likely crashing before iteration (check metrics gathering or hypothesis generation)';
+    else if (runFailed) {
+        opinion = 'loop exited with status=failed — check daemon log for the thrown error';
     }
     else {
         opinion = `${recentErrors.length}/${recentExperiments.length} recent iterations errored`;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.0.15",
+  "version": "1.0.17",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",