npm - clementine-agent - Versions diffs - 1.1.26 → 1.1.28 - Mend

clementine-agent 1.1.26 → 1.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/brain-digest.d.ts +8 -1
package/dist/agent/brain-digest.js +74 -5
package/dist/agent/self-improve.js +54 -10
package/package.json +1 -1

package/dist/agent/brain-digest.d.ts CHANGED Viewed

@@ -47,7 +47,14 @@ export declare function gatherBrainDigestInputs(opts: {
 }): BrainDigestInputs;
 /**
  * Format the raw inputs as a single text block the LLM can synthesize.
- * Kept terse — the LLM does the heavy lifting of pattern surfacing.
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
+ * tail rather than dropping it. Same picture in fewer tokens — the model
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
+ *
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
+ * ("X more agents added Y chunks total") without per-row cost.
  */
 export declare function formatRawMaterial(inputs: BrainDigestInputs): string;
 export declare function runBrainDigest(opts: {

package/dist/agent/brain-digest.js CHANGED Viewed

@@ -114,38 +114,107 @@ function gatherMemoryDeltas(memoryStore, sinceIso) {
 }
 /**
  * Format the raw inputs as a single text block the LLM can synthesize.
- * Kept terse — the LLM does the heavy lifting of pattern surfacing.
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
+ * tail rather than dropping it. Same picture in fewer tokens — the model
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
+ *
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
+ * ("X more agents added Y chunks total") without per-row cost.
  */
 export function formatRawMaterial(inputs) {
     const sections = [];
     sections.push(`## Window\nLast ${inputs.windowDays} days.`);
-    sections.push(`## Team roster\n${inputs.agents.length === 0 ? '(no specialist agents)' : inputs.agents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
+    // Team roster — split active vs. quiet so the synthesis prompt naturally
+    // weights active agents in "per-agent highlights" without confabulating
+    // about agents that did nothing this window.
+    const activeSlugSet = new Set([
+        ...inputs.cronRunsByJob.map(r => r.agentSlug).filter((s) => !!s),
+        ...inputs.memoryDeltas.filter(d => d.agentSlug !== 'global').map(d => d.agentSlug),
+    ]);
+    const activeAgents = inputs.agents.filter(a => activeSlugSet.has(a.slug));
+    const quietAgents = inputs.agents.filter(a => !activeSlugSet.has(a.slug));
+    if (inputs.agents.length === 0) {
+        sections.push(`## Team roster\n(no specialist agents)`);
+    }
+    else {
+        const lines = [];
+        if (activeAgents.length > 0) {
+            lines.push(`Active this window:\n${activeAgents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
+        }
+        if (quietAgents.length > 0) {
+            lines.push(`Quiet this window: ${quietAgents.map(a => a.slug).join(', ')}`);
+        }
+        sections.push(`## Team roster\n${lines.join('\n\n')}`);
+    }
+    // Cron activity — failures-first ranking so the synthesis prompt sees the
+    // problem signal early, with a tail summary preserving total volume.
     if (inputs.cronRunsByJob.length === 0) {
         sections.push(`## Cron activity\n(no autonomous runs in window)`);
     }
     else {
-        const lines = inputs.cronRunsByJob.slice(0, 20).map(r => {
+        const ranked = [...inputs.cronRunsByJob].sort((a, b) => {
+            // Failures dominate; ties broken by run count (busier jobs more important).
+            if (b.failures !== a.failures)
+                return b.failures - a.failures;
+            return b.runs - a.runs;
+        });
+        const TOP_N = 12;
+        const top = ranked.slice(0, TOP_N);
+        const tail = ranked.slice(TOP_N);
+        const lines = top.map(r => {
             const tag = r.agentSlug ? ` [${r.agentSlug}]` : '';
             const failTag = r.failures > 0 ? ` — ${r.failures} failure${r.failures === 1 ? '' : 's'}` : '';
             return `- ${r.jobName}${tag}: ${r.runs} run${r.runs === 1 ? '' : 's'}${failTag}`;
         });
+        if (tail.length > 0) {
+            const tailRuns = tail.reduce((s, r) => s + r.runs, 0);
+            const tailFailures = tail.reduce((s, r) => s + r.failures, 0);
+            lines.push(`- _…and ${tail.length} more job${tail.length === 1 ? '' : 's'}: ${tailRuns} runs, ${tailFailures} failures total_`);
+        }
         sections.push(`## Cron activity\n${lines.join('\n')}`);
     }
+    // Memory growth — top-N by delta, summarize rest. The LLM doesn't need a
+    // 30-line list of every agent that wrote one chunk; it needs to know who
+    // wrote a lot.
     if (inputs.memoryDeltas.length === 0) {
         sections.push(`## Memory growth\n(no new chunks in window)`);
     }
     else {
-        const lines = inputs.memoryDeltas.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
+        const TOP_N = 8;
+        const top = inputs.memoryDeltas.slice(0, TOP_N);
+        const tail = inputs.memoryDeltas.slice(TOP_N);
+        const lines = top.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
+        if (tail.length > 0) {
+            const tailTotal = tail.reduce((s, d) => s + d.chunksAdded, 0);
+            lines.push(`- _…and ${tail.length} other agent${tail.length === 1 ? '' : 's'}: +${tailTotal} chunks combined_`);
+        }
         sections.push(`## Memory growth\n${lines.join('\n')}`);
     }
+    // Cross-agent recurrence — widest-spread clusters first (most agents
+    // touched), with cluster size as tiebreaker. Spread is the signal of
+    // "this is genuinely team knowledge" — a 3-cluster touching 4 agents
+    // matters more than a 10-cluster touching 2.
     if (inputs.crossAgentClusters.length === 0) {
         sections.push(`## Cross-agent recurrence\n(no facts surfaced from 2+ agents)`);
     }
     else {
-        const lines = inputs.crossAgentClusters.slice(0, 12).map((c, i) => {
+        const ranked = [...inputs.crossAgentClusters].sort((a, b) => {
+            if (b.agents.length !== a.agents.length)
+                return b.agents.length - a.agents.length;
+            return b.memberCount - a.memberCount;
+        });
+        const TOP_N = 8;
+        const top = ranked.slice(0, TOP_N);
+        const tail = ranked.slice(TOP_N);
+        const lines = top.map((c, i) => {
             const preview = c.representativeContent.replace(/\n/g, ' ').slice(0, 200);
             return `${i + 1}. agents: ${c.agents.join(', ')} (${c.memberCount} chunks)\n   "${preview}${preview.length >= 200 ? '…' : ''}"`;
         });
+        if (tail.length > 0) {
+            lines.push(`_…and ${tail.length} more cross-agent cluster${tail.length === 1 ? '' : 's'} (smaller spread, not surfaced individually)._`);
+        }
         sections.push(`## Cross-agent recurrence\n${lines.join('\n')}`);
     }
     return sections.join('\n\n');

package/dist/agent/self-improve.js CHANGED Viewed

@@ -768,19 +768,40 @@ export class SelfImproveLoop {
                     `Choose a DIFFERENT area/target. If no other improvement is genuinely needed today, return an empty results array: { "results": [] }.\n`
                 : '');
         const patternAnalysis = this.analyzeExperimentPatterns(history);
-        // Format negative feedback
-        const negativeFeedbackText = metrics.negativeFeedback.slice(0, 5).map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
-        // Format cron errors
-        const cronErrorsText = metrics.cronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || '(no cron errors)';
-        // Format cron reflections (quality ratings from automated reflection passes)
-        const cronReflectionsText = metrics.cronReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
+        // Pre-LLM compression: when this is a focused per-agent cycle, filter
+        // every metrics text to that agent's own data. Without this, the LLM
+        // sees ALL agents' cron errors / reflections and may propose changes
+        // for the focused agent based on signals from a totally different one.
+        // (Skill-chaining COMPRESS pattern: filter at the boundary, synthesize
+        // in the LLM.)
+        const focusedSlug = this.config.agentSlug;
+        const isAgentScoped = (jobName) => !!focusedSlug && jobName.startsWith(`${focusedSlug}:`);
+        const filteredNegativeFeedback = focusedSlug
+            // Negative feedback rows don't carry agent tags reliably — keep all
+            // when in agent mode but cap tighter so noise stays bounded.
+            ? metrics.negativeFeedback.slice(0, 3)
+            : metrics.negativeFeedback.slice(0, 5);
+        const negativeFeedbackText = filteredNegativeFeedback.map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
+        // Format cron errors — filter to focused agent's jobs when applicable.
+        const filteredCronErrors = focusedSlug
+            ? metrics.cronErrors.filter(e => isAgentScoped(e.jobName ?? ''))
+            : metrics.cronErrors;
+        const cronErrorsText = filteredCronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || (focusedSlug ? `(no cron errors for ${focusedSlug} in window)` : '(no cron errors)');
+        // Format cron reflections — same filter.
+        const filteredReflections = focusedSlug
+            ? metrics.cronReflections.filter(r => r.agentSlug === focusedSlug || isAgentScoped(r.jobName ?? ''))
+            : metrics.cronReflections;
+        const cronReflectionsText = filteredReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
             `Exist: ${r.existence ?? '?'} Substance: ${r.substance ?? '?'} Actionable: ${r.actionable ?? '?'} ` +
             `Comm: ${r.communication ?? '?'} | ` +
-            `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || '(no cron reflections yet)';
-        // Compute per-agent metrics from reflections
+            `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || (focusedSlug ? `(no cron reflections for ${focusedSlug} yet)` : '(no cron reflections yet)');
+        // Compute per-agent metrics from reflections — when focused, only show
+        // this agent's row (the others are irrelevant to the proposal).
         const agentMetrics = new Map();
         for (const r of metrics.cronReflections) {
             const slug = r.agentSlug || 'clementine';
+            if (focusedSlug && slug !== focusedSlug)
+                continue;
             if (!agentMetrics.has(slug)) {
                 agentMetrics.set(slug, { total: 0, qualitySum: 0, emptyCount: 0, gaps: [] });
             }
@@ -799,7 +820,7 @@ export class SelfImproveLoop {
                 const topGaps = m.gaps.slice(-3).map(g => g.slice(0, 60)).join('; ') || 'none';
                 return `- ${slug}: avg quality ${avgQ}/5, ${emptyPct}% empty outputs, common gaps: "${topGaps}"`;
             }).join('\n')
-            : '(no per-agent data yet)';
+            : (focusedSlug ? `(no reflection data for ${focusedSlug} yet)` : '(no per-agent data yet)');
         // Format goal health data
         const goalHealthText = metrics.goalHealth.length > 0
             ? metrics.goalHealth.map(g => {
@@ -812,11 +833,34 @@ export class SelfImproveLoop {
             ? metrics.advisorInsights.map(a => `- ${a}`).join('\n')
             : '(no advisor data yet)';
         const areas = this.config.areas.map(a => `'${a}'`).join(', ');
+        // For per-agent cycles, also pull the agent's CURRENT instructions
+        // (agent.md body) so the LLM proposes changes informed by what's there
+        // rather than blind. Without this, "improve agent X" was generating
+        // proposals that contradicted or duplicated standing instructions.
+        let agentBodyText = '';
+        if (this.config.agentSlug) {
+            try {
+                const agentFile = path.join(AGENTS_DIR, this.config.agentSlug, 'agent.md');
+                if (existsSync(agentFile)) {
+                    const raw = readFileSync(agentFile, 'utf-8');
+                    // Cap to keep the prompt tractable — agent.md can be 10K+ chars.
+                    // The first 4K covers the role, personality, and most standing
+                    // instructions; deeper sections (long examples, references) are
+                    // less important for "what should change" decisions.
+                    const trimmed = raw.length > 4000 ? raw.slice(0, 4000) + '\n\n[...truncated, full file at agents/' + this.config.agentSlug + '/agent.md]' : raw;
+                    agentBodyText = `\n\n## CURRENT agent.md for "${this.config.agentSlug}"\n` +
+                        `These are the agent's existing standing instructions — your proposals should refine or extend these, not contradict or duplicate them.\n\n` +
+                        '```markdown\n' + trimmed + '\n```\n';
+                }
+            }
+            catch { /* non-fatal — fall through with empty body text */ }
+        }
         const agentFocusText = this.config.agentSlug
             ? `\n\n## AGENT FOCUS: ${this.config.agentSlug}\nThis is a focused improvement cycle for agent "${this.config.agentSlug}" ONLY.\n` +
                 `- You MUST target area "agent" with target "${this.config.agentSlug}", OR area "cron" targeting a cron job that this agent runs.\n` +
                 `- Do NOT propose changes to SOUL.md, AGENTS.md, source code, or other agents.\n` +
-                `- Focus on improving this agent's personality, instructions, and task execution quality.\n`
+                `- Focus on improving this agent's personality, instructions, and task execution quality.\n` +
+                agentBodyText
             : '';
         // Read SOUL.md evolution candidates from FEEDBACK.md (written by synthesizeFeedbackPatterns)
         let soulCandidatesText = '';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.1.26",
+  "version": "1.1.28",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",