npm - clementine-agent - Versions diffs - 1.1.27 → 1.1.29 - Mend

clementine-agent 1.1.27 → 1.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/brain-digest.d.ts +8 -1
package/dist/agent/brain-digest.js +74 -5
package/dist/agent/self-improve.js +30 -9
package/dist/cli/index.js +163 -8
package/package.json +1 -1

package/dist/agent/brain-digest.d.ts CHANGED Viewed

@@ -47,7 +47,14 @@ export declare function gatherBrainDigestInputs(opts: {
 }): BrainDigestInputs;
 /**
  * Format the raw inputs as a single text block the LLM can synthesize.
- * Kept terse — the LLM does the heavy lifting of pattern surfacing.
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
+ * tail rather than dropping it. Same picture in fewer tokens — the model
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
+ *
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
+ * ("X more agents added Y chunks total") without per-row cost.
  */
 export declare function formatRawMaterial(inputs: BrainDigestInputs): string;
 export declare function runBrainDigest(opts: {

package/dist/agent/brain-digest.js CHANGED Viewed

@@ -114,38 +114,107 @@ function gatherMemoryDeltas(memoryStore, sinceIso) {
 }
 /**
  * Format the raw inputs as a single text block the LLM can synthesize.
- * Kept terse — the LLM does the heavy lifting of pattern surfacing.
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
+ * tail rather than dropping it. Same picture in fewer tokens — the model
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
+ *
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
+ * ("X more agents added Y chunks total") without per-row cost.
  */
 export function formatRawMaterial(inputs) {
     const sections = [];
     sections.push(`## Window\nLast ${inputs.windowDays} days.`);
-    sections.push(`## Team roster\n${inputs.agents.length === 0 ? '(no specialist agents)' : inputs.agents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
+    // Team roster — split active vs. quiet so the synthesis prompt naturally
+    // weights active agents in "per-agent highlights" without confabulating
+    // about agents that did nothing this window.
+    const activeSlugSet = new Set([
+        ...inputs.cronRunsByJob.map(r => r.agentSlug).filter((s) => !!s),
+        ...inputs.memoryDeltas.filter(d => d.agentSlug !== 'global').map(d => d.agentSlug),
+    ]);
+    const activeAgents = inputs.agents.filter(a => activeSlugSet.has(a.slug));
+    const quietAgents = inputs.agents.filter(a => !activeSlugSet.has(a.slug));
+    if (inputs.agents.length === 0) {
+        sections.push(`## Team roster\n(no specialist agents)`);
+    }
+    else {
+        const lines = [];
+        if (activeAgents.length > 0) {
+            lines.push(`Active this window:\n${activeAgents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
+        }
+        if (quietAgents.length > 0) {
+            lines.push(`Quiet this window: ${quietAgents.map(a => a.slug).join(', ')}`);
+        }
+        sections.push(`## Team roster\n${lines.join('\n\n')}`);
+    }
+    // Cron activity — failures-first ranking so the synthesis prompt sees the
+    // problem signal early, with a tail summary preserving total volume.
     if (inputs.cronRunsByJob.length === 0) {
         sections.push(`## Cron activity\n(no autonomous runs in window)`);
     }
     else {
-        const lines = inputs.cronRunsByJob.slice(0, 20).map(r => {
+        const ranked = [...inputs.cronRunsByJob].sort((a, b) => {
+            // Failures dominate; ties broken by run count (busier jobs more important).
+            if (b.failures !== a.failures)
+                return b.failures - a.failures;
+            return b.runs - a.runs;
+        });
+        const TOP_N = 12;
+        const top = ranked.slice(0, TOP_N);
+        const tail = ranked.slice(TOP_N);
+        const lines = top.map(r => {
             const tag = r.agentSlug ? ` [${r.agentSlug}]` : '';
             const failTag = r.failures > 0 ? ` — ${r.failures} failure${r.failures === 1 ? '' : 's'}` : '';
             return `- ${r.jobName}${tag}: ${r.runs} run${r.runs === 1 ? '' : 's'}${failTag}`;
         });
+        if (tail.length > 0) {
+            const tailRuns = tail.reduce((s, r) => s + r.runs, 0);
+            const tailFailures = tail.reduce((s, r) => s + r.failures, 0);
+            lines.push(`- _…and ${tail.length} more job${tail.length === 1 ? '' : 's'}: ${tailRuns} runs, ${tailFailures} failures total_`);
+        }
         sections.push(`## Cron activity\n${lines.join('\n')}`);
     }
+    // Memory growth — top-N by delta, summarize rest. The LLM doesn't need a
+    // 30-line list of every agent that wrote one chunk; it needs to know who
+    // wrote a lot.
     if (inputs.memoryDeltas.length === 0) {
         sections.push(`## Memory growth\n(no new chunks in window)`);
     }
     else {
-        const lines = inputs.memoryDeltas.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
+        const TOP_N = 8;
+        const top = inputs.memoryDeltas.slice(0, TOP_N);
+        const tail = inputs.memoryDeltas.slice(TOP_N);
+        const lines = top.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
+        if (tail.length > 0) {
+            const tailTotal = tail.reduce((s, d) => s + d.chunksAdded, 0);
+            lines.push(`- _…and ${tail.length} other agent${tail.length === 1 ? '' : 's'}: +${tailTotal} chunks combined_`);
+        }
         sections.push(`## Memory growth\n${lines.join('\n')}`);
     }
+    // Cross-agent recurrence — widest-spread clusters first (most agents
+    // touched), with cluster size as tiebreaker. Spread is the signal of
+    // "this is genuinely team knowledge" — a 3-cluster touching 4 agents
+    // matters more than a 10-cluster touching 2.
     if (inputs.crossAgentClusters.length === 0) {
         sections.push(`## Cross-agent recurrence\n(no facts surfaced from 2+ agents)`);
     }
     else {
-        const lines = inputs.crossAgentClusters.slice(0, 12).map((c, i) => {
+        const ranked = [...inputs.crossAgentClusters].sort((a, b) => {
+            if (b.agents.length !== a.agents.length)
+                return b.agents.length - a.agents.length;
+            return b.memberCount - a.memberCount;
+        });
+        const TOP_N = 8;
+        const top = ranked.slice(0, TOP_N);
+        const tail = ranked.slice(TOP_N);
+        const lines = top.map((c, i) => {
             const preview = c.representativeContent.replace(/\n/g, ' ').slice(0, 200);
             return `${i + 1}. agents: ${c.agents.join(', ')} (${c.memberCount} chunks)\n   "${preview}${preview.length >= 200 ? '…' : ''}"`;
         });
+        if (tail.length > 0) {
+            lines.push(`_…and ${tail.length} more cross-agent cluster${tail.length === 1 ? '' : 's'} (smaller spread, not surfaced individually)._`);
+        }
         sections.push(`## Cross-agent recurrence\n${lines.join('\n')}`);
     }
     return sections.join('\n\n');

package/dist/agent/self-improve.js CHANGED Viewed

@@ -768,19 +768,40 @@ export class SelfImproveLoop {
                     `Choose a DIFFERENT area/target. If no other improvement is genuinely needed today, return an empty results array: { "results": [] }.\n`
                 : '');
         const patternAnalysis = this.analyzeExperimentPatterns(history);
-        // Format negative feedback
-        const negativeFeedbackText = metrics.negativeFeedback.slice(0, 5).map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
-        // Format cron errors
-        const cronErrorsText = metrics.cronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || '(no cron errors)';
-        // Format cron reflections (quality ratings from automated reflection passes)
-        const cronReflectionsText = metrics.cronReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
+        // Pre-LLM compression: when this is a focused per-agent cycle, filter
+        // every metrics text to that agent's own data. Without this, the LLM
+        // sees ALL agents' cron errors / reflections and may propose changes
+        // for the focused agent based on signals from a totally different one.
+        // (Skill-chaining COMPRESS pattern: filter at the boundary, synthesize
+        // in the LLM.)
+        const focusedSlug = this.config.agentSlug;
+        const isAgentScoped = (jobName) => !!focusedSlug && jobName.startsWith(`${focusedSlug}:`);
+        const filteredNegativeFeedback = focusedSlug
+            // Negative feedback rows don't carry agent tags reliably — keep all
+            // when in agent mode but cap tighter so noise stays bounded.
+            ? metrics.negativeFeedback.slice(0, 3)
+            : metrics.negativeFeedback.slice(0, 5);
+        const negativeFeedbackText = filteredNegativeFeedback.map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
+        // Format cron errors — filter to focused agent's jobs when applicable.
+        const filteredCronErrors = focusedSlug
+            ? metrics.cronErrors.filter(e => isAgentScoped(e.jobName ?? ''))
+            : metrics.cronErrors;
+        const cronErrorsText = filteredCronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || (focusedSlug ? `(no cron errors for ${focusedSlug} in window)` : '(no cron errors)');
+        // Format cron reflections — same filter.
+        const filteredReflections = focusedSlug
+            ? metrics.cronReflections.filter(r => r.agentSlug === focusedSlug || isAgentScoped(r.jobName ?? ''))
+            : metrics.cronReflections;
+        const cronReflectionsText = filteredReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
             `Exist: ${r.existence ?? '?'} Substance: ${r.substance ?? '?'} Actionable: ${r.actionable ?? '?'} ` +
             `Comm: ${r.communication ?? '?'} | ` +
-            `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || '(no cron reflections yet)';
-        // Compute per-agent metrics from reflections
+            `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || (focusedSlug ? `(no cron reflections for ${focusedSlug} yet)` : '(no cron reflections yet)');
+        // Compute per-agent metrics from reflections — when focused, only show
+        // this agent's row (the others are irrelevant to the proposal).
         const agentMetrics = new Map();
         for (const r of metrics.cronReflections) {
             const slug = r.agentSlug || 'clementine';
+            if (focusedSlug && slug !== focusedSlug)
+                continue;
             if (!agentMetrics.has(slug)) {
                 agentMetrics.set(slug, { total: 0, qualitySum: 0, emptyCount: 0, gaps: [] });
             }
@@ -799,7 +820,7 @@ export class SelfImproveLoop {
                 const topGaps = m.gaps.slice(-3).map(g => g.slice(0, 60)).join('; ') || 'none';
                 return `- ${slug}: avg quality ${avgQ}/5, ${emptyPct}% empty outputs, common gaps: "${topGaps}"`;
             }).join('\n')
-            : '(no per-agent data yet)';
+            : (focusedSlug ? `(no reflection data for ${focusedSlug} yet)` : '(no per-agent data yet)');
         // Format goal health data
         const goalHealthText = metrics.goalHealth.length > 0
             ? metrics.goalHealth.map(g => {

package/dist/cli/index.js CHANGED Viewed

@@ -3862,20 +3862,175 @@ const siCmd = program
     .description('Manage Clementine self-improvement');
 siCmd
     .command('status')
-    .description('Show self-improvement state and baseline metrics')
-    .action(async () => {
+    .description('Show self-improvement health — last cycle, infra errors, per-agent runs, recent activity')
+    .option('--json', 'Emit machine-readable JSON')
+    .action(async (opts) => {
+    const BOLD = '\x1b[1m';
+    const DIM = '\x1b[0;90m';
+    const GREEN = '\x1b[0;32m';
+    const YELLOW = '\x1b[1;33m';
+    const RED = '\x1b[0;31m';
+    const CYAN = '\x1b[0;36m';
+    const RESET = '\x1b[0m';
     try {
+        process.env.CLEMENTINE_HOME = BASE_DIR;
         const { SelfImproveLoop } = await import('../agent/self-improve.js');
         const { PersonalAssistant } = await import('../agent/assistant.js');
         const assistant = new PersonalAssistant();
         const loop = new SelfImproveLoop(assistant);
         const state = loop.loadState();
-        const m = state.baselineMetrics;
-        console.log(`Status: ${state.status}`);
-        console.log(`Last run: ${state.lastRunAt || 'never'}`);
-        console.log(`Total experiments: ${state.totalExperiments}`);
-        console.log(`Pending approvals: ${state.pendingApprovals}`);
-        console.log(`Baseline — Feedback: ${(m.feedbackPositiveRatio * 100).toFixed(0)}% positive, Cron: ${(m.cronSuccessRate * 100).toFixed(0)}% success, Quality: ${m.avgResponseQuality.toFixed(2)}`);
+        const log = loop.loadExperimentLog();
+        const pending = loop.getPendingChanges();
+        // Compute "last successful cycle" — most recent log entry that wasn't
+        // a plateau record or pure infra failure. Different from lastRunAt
+        // (which moves on every attempt, even crashed ones).
+        const lastSuccessful = [...log].reverse().find(e => e.area !== 'soul' || e.hypothesis !== 'No new hypothesis — diversity constraint exhausted');
+        const nowMs = Date.now();
+        const formatAge = (iso) => {
+            if (!iso)
+                return 'never';
+            const ms = nowMs - Date.parse(iso);
+            const h = Math.floor(ms / 3_600_000);
+            if (h < 1)
+                return `${Math.floor(ms / 60_000)}m ago`;
+            if (h < 48)
+                return `${h}h ago`;
+            return `${Math.floor(h / 24)}d ago`;
+        };
+        // Auto-applied count over the last 7 days = experiments with status 'approved'
+        const since7d = nowMs - 7 * 86_400_000;
+        const autoAppliedRecent = log.filter(e => e.approvalStatus === 'approved' && Date.parse(e.startedAt) >= since7d).length;
+        // Per-agent SI runs from heartbeat state file.
+        const hbStateFile = path.join(BASE_DIR, '.heartbeat_state.json');
+        let perAgentRuns = {};
+        try {
+            if (existsSync(hbStateFile)) {
+                const hb = JSON.parse(readFileSync(hbStateFile, 'utf-8'));
+                perAgentRuns = (hb.lastAgentSiRuns ?? {});
+            }
+        }
+        catch { /* non-fatal */ }
+        if (opts.json) {
+            console.log(JSON.stringify({
+                state,
+                lastSuccessfulAt: lastSuccessful?.startedAt ?? null,
+                autoAppliedLast7d: autoAppliedRecent,
+                pendingCount: pending.length,
+                perAgentRuns,
+                recent: log.slice(-5).reverse(),
+            }, null, 2));
+            return;
+        }
+        // ── Header ─────────────────────────────────────────────────────
+        console.log();
+        console.log(`  ${BOLD}Self-improve loop${RESET}`);
+        console.log(`  ${DIM}Status:           ${RESET}${state.status}`);
+        console.log(`  ${DIM}Last attempted:   ${RESET}${state.lastRunAt || 'never'} ${DIM}(${formatAge(state.lastRunAt)})${RESET}`);
+        // Stalled-loop warning: if we have a lastRunAt but no successful cycle
+        // in 36+ hours, surface red. That's the visibility gap from pillar #4.
+        const lastSuccAt = lastSuccessful?.startedAt;
+        const hoursSinceSuccess = lastSuccAt ? (nowMs - Date.parse(lastSuccAt)) / 3_600_000 : null;
+        if (lastSuccAt) {
+            const stallTag = hoursSinceSuccess !== null && hoursSinceSuccess > 36 ? `  ${YELLOW}⚠ stalled${RESET}` : `  ${GREEN}✓${RESET}`;
+            console.log(`  ${DIM}Last successful:  ${RESET}${lastSuccAt} ${DIM}(${formatAge(lastSuccAt)})${RESET}${stallTag}`);
+        }
+        else {
+            console.log(`  ${DIM}Last successful:  ${RESET}never  ${YELLOW}⚠ no cycles yet${RESET}`);
+        }
+        console.log(`  ${DIM}Total experiments:${RESET} ${state.totalExperiments}`);
+        console.log(`  ${DIM}Auto-applied (7d):${RESET} ${autoAppliedRecent}`);
+        console.log(`  ${DIM}Pending review:   ${RESET}${pending.length > 0 ? `${YELLOW}${pending.length}${RESET}` : '0'}`);
+        if (state.infraError) {
+            console.log();
+            console.log(`  ${RED}⚠ Infra error blocking the loop:${RESET}`);
+            console.log(`    Category:   ${state.infraError.category}`);
+            console.log(`    Diagnostic: ${state.infraError.diagnostic.slice(0, 200)}`);
+        }
+        else {
+            console.log(`  ${DIM}Infra errors:     ${RESET}${GREEN}none${RESET}`);
+        }
+        // ── Per-agent cycles ───────────────────────────────────────────
+        const agentEntries = Object.entries(perAgentRuns);
+        if (agentEntries.length > 0) {
+            console.log();
+            console.log(`  ${BOLD}Per-agent cycles${RESET}  ${DIM}(weekly cadence, 2 AM)${RESET}`);
+            for (const [slug, iso] of agentEntries) {
+                console.log(`    ${CYAN}${slug.padEnd(28)}${RESET}${DIM}last run ${formatAge(iso)}${RESET}`);
+            }
+        }
+        // ── Recent activity ────────────────────────────────────────────
+        const recent = log.slice(-5).reverse();
+        if (recent.length > 0) {
+            console.log();
+            console.log(`  ${BOLD}Recent activity${RESET}`);
+            for (const e of recent) {
+                const score = (e.score * 10).toFixed(1);
+                let icon = '❌';
+                if (e.approvalStatus === 'approved')
+                    icon = '✅';
+                else if (e.approvalStatus === 'pending')
+                    icon = '⏳';
+                else if (e.approvalStatus === 'unsurfaced')
+                    icon = '⛔';
+                const what = e.hypothesis.slice(0, 60);
+                console.log(`    ${icon} ${DIM}#${String(e.iteration).padEnd(3)}${RESET} ${e.area.padEnd(16)} ${score.padStart(4)}/10  "${what}"`);
+            }
+        }
+        if (pending.length > 0) {
+            console.log();
+            console.log(`  ${YELLOW}${pending.length} change(s) pending your review${RESET}`);
+            console.log(`    ${BOLD}clementine self-improve pending${RESET} ${DIM}— see what they propose${RESET}`);
+            console.log(`    ${BOLD}clementine self-improve apply <id>${RESET} ${DIM}— approve and apply one${RESET}`);
+        }
+        console.log();
+    }
+    catch (err) {
+        console.error('Error:', err);
+        process.exit(1);
+    }
+});
+siCmd
+    .command('pending')
+    .description('List pending self-improve changes — what needs your review')
+    .option('--json', 'Emit machine-readable JSON')
+    .action(async (opts) => {
+    const BOLD = '\x1b[1m';
+    const DIM = '\x1b[0;90m';
+    const YELLOW = '\x1b[1;33m';
+    const CYAN = '\x1b[0;36m';
+    const RESET = '\x1b[0m';
+    try {
+        process.env.CLEMENTINE_HOME = BASE_DIR;
+        const { SelfImproveLoop } = await import('../agent/self-improve.js');
+        const { PersonalAssistant } = await import('../agent/assistant.js');
+        const assistant = new PersonalAssistant();
+        const loop = new SelfImproveLoop(assistant);
+        const pending = loop.getPendingChanges();
+        if (opts.json) {
+            console.log(JSON.stringify(pending.map(p => ({
+                id: p.id, area: p.area, target: p.target,
+                score: p.score, hypothesis: p.hypothesis, reason: p.reason,
+            })), null, 2));
+            return;
+        }
+        if (pending.length === 0) {
+            console.log();
+            console.log(`  ${DIM}No changes pending review.${RESET}`);
+            console.log();
+            return;
+        }
+        console.log();
+        console.log(`  ${YELLOW}${pending.length} change${pending.length === 1 ? '' : 's'} pending${RESET}`);
+        console.log();
+        for (const p of pending) {
+            const score = (p.score * 10).toFixed(1);
+            console.log(`  ${BOLD}#${p.id}${RESET}  ${CYAN}${p.area}${RESET} ${DIM}→${RESET} ${p.target}  ${DIM}(score ${score}/10)${RESET}`);
+            console.log(`    ${p.hypothesis}`);
+            console.log(`    ${DIM}${p.reason}${RESET}`);
+            console.log();
+        }
+        console.log(`  Apply: ${BOLD}clementine self-improve apply <id>${RESET}`);
+        console.log();
     }
     catch (err) {
         console.error('Error:', err);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.1.27",
+  "version": "1.1.29",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",