clementine-agent 1.1.26 → 1.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,7 +47,14 @@ export declare function gatherBrainDigestInputs(opts: {
47
47
  }): BrainDigestInputs;
48
48
  /**
49
49
  * Format the raw inputs as a single text block the LLM can synthesize.
50
- * Kept terse the LLM does the heavy lifting of pattern surfacing.
50
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
51
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
52
+ * tail rather than dropping it. Same picture in fewer tokens — the model
53
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
54
+ *
55
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
56
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
57
+ * ("X more agents added Y chunks total") without per-row cost.
51
58
  */
52
59
  export declare function formatRawMaterial(inputs: BrainDigestInputs): string;
53
60
  export declare function runBrainDigest(opts: {
@@ -114,38 +114,107 @@ function gatherMemoryDeltas(memoryStore, sinceIso) {
114
114
  }
115
115
  /**
116
116
  * Format the raw inputs as a single text block the LLM can synthesize.
117
- * Kept terse the LLM does the heavy lifting of pattern surfacing.
117
+ * Pre-LLM compression: rank by signal-bearing fields (failures over runs,
118
+ * agent-spread over cluster size, growth over alpha-sort) and summarize the
119
+ * tail rather than dropping it. Same picture in fewer tokens — the model
120
+ * still sees the long-tail counts but doesn't pay tokens for each entry.
121
+ *
122
+ * Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
123
+ * synthesize in the LLM. Tail-summary lines preserve the volume signal
124
+ * ("X more agents added Y chunks total") without per-row cost.
118
125
  */
119
126
  export function formatRawMaterial(inputs) {
120
127
  const sections = [];
121
128
  sections.push(`## Window\nLast ${inputs.windowDays} days.`);
122
- sections.push(`## Team roster\n${inputs.agents.length === 0 ? '(no specialist agents)' : inputs.agents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
129
+ // Team roster split active vs. quiet so the synthesis prompt naturally
130
+ // weights active agents in "per-agent highlights" without confabulating
131
+ // about agents that did nothing this window.
132
+ const activeSlugSet = new Set([
133
+ ...inputs.cronRunsByJob.map(r => r.agentSlug).filter((s) => !!s),
134
+ ...inputs.memoryDeltas.filter(d => d.agentSlug !== 'global').map(d => d.agentSlug),
135
+ ]);
136
+ const activeAgents = inputs.agents.filter(a => activeSlugSet.has(a.slug));
137
+ const quietAgents = inputs.agents.filter(a => !activeSlugSet.has(a.slug));
138
+ if (inputs.agents.length === 0) {
139
+ sections.push(`## Team roster\n(no specialist agents)`);
140
+ }
141
+ else {
142
+ const lines = [];
143
+ if (activeAgents.length > 0) {
144
+ lines.push(`Active this window:\n${activeAgents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
145
+ }
146
+ if (quietAgents.length > 0) {
147
+ lines.push(`Quiet this window: ${quietAgents.map(a => a.slug).join(', ')}`);
148
+ }
149
+ sections.push(`## Team roster\n${lines.join('\n\n')}`);
150
+ }
151
+ // Cron activity — failures-first ranking so the synthesis prompt sees the
152
+ // problem signal early, with a tail summary preserving total volume.
123
153
  if (inputs.cronRunsByJob.length === 0) {
124
154
  sections.push(`## Cron activity\n(no autonomous runs in window)`);
125
155
  }
126
156
  else {
127
- const lines = inputs.cronRunsByJob.slice(0, 20).map(r => {
157
+ const ranked = [...inputs.cronRunsByJob].sort((a, b) => {
158
+ // Failures dominate; ties broken by run count (busier jobs more important).
159
+ if (b.failures !== a.failures)
160
+ return b.failures - a.failures;
161
+ return b.runs - a.runs;
162
+ });
163
+ const TOP_N = 12;
164
+ const top = ranked.slice(0, TOP_N);
165
+ const tail = ranked.slice(TOP_N);
166
+ const lines = top.map(r => {
128
167
  const tag = r.agentSlug ? ` [${r.agentSlug}]` : '';
129
168
  const failTag = r.failures > 0 ? ` — ${r.failures} failure${r.failures === 1 ? '' : 's'}` : '';
130
169
  return `- ${r.jobName}${tag}: ${r.runs} run${r.runs === 1 ? '' : 's'}${failTag}`;
131
170
  });
171
+ if (tail.length > 0) {
172
+ const tailRuns = tail.reduce((s, r) => s + r.runs, 0);
173
+ const tailFailures = tail.reduce((s, r) => s + r.failures, 0);
174
+ lines.push(`- _…and ${tail.length} more job${tail.length === 1 ? '' : 's'}: ${tailRuns} runs, ${tailFailures} failures total_`);
175
+ }
132
176
  sections.push(`## Cron activity\n${lines.join('\n')}`);
133
177
  }
178
+ // Memory growth — top-N by delta, summarize rest. The LLM doesn't need a
179
+ // 30-line list of every agent that wrote one chunk; it needs to know who
180
+ // wrote a lot.
134
181
  if (inputs.memoryDeltas.length === 0) {
135
182
  sections.push(`## Memory growth\n(no new chunks in window)`);
136
183
  }
137
184
  else {
138
- const lines = inputs.memoryDeltas.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
185
+ const TOP_N = 8;
186
+ const top = inputs.memoryDeltas.slice(0, TOP_N);
187
+ const tail = inputs.memoryDeltas.slice(TOP_N);
188
+ const lines = top.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
189
+ if (tail.length > 0) {
190
+ const tailTotal = tail.reduce((s, d) => s + d.chunksAdded, 0);
191
+ lines.push(`- _…and ${tail.length} other agent${tail.length === 1 ? '' : 's'}: +${tailTotal} chunks combined_`);
192
+ }
139
193
  sections.push(`## Memory growth\n${lines.join('\n')}`);
140
194
  }
195
+ // Cross-agent recurrence — widest-spread clusters first (most agents
196
+ // touched), with cluster size as tiebreaker. Spread is the signal of
197
+ // "this is genuinely team knowledge" — a 3-cluster touching 4 agents
198
+ // matters more than a 10-cluster touching 2.
141
199
  if (inputs.crossAgentClusters.length === 0) {
142
200
  sections.push(`## Cross-agent recurrence\n(no facts surfaced from 2+ agents)`);
143
201
  }
144
202
  else {
145
- const lines = inputs.crossAgentClusters.slice(0, 12).map((c, i) => {
203
+ const ranked = [...inputs.crossAgentClusters].sort((a, b) => {
204
+ if (b.agents.length !== a.agents.length)
205
+ return b.agents.length - a.agents.length;
206
+ return b.memberCount - a.memberCount;
207
+ });
208
+ const TOP_N = 8;
209
+ const top = ranked.slice(0, TOP_N);
210
+ const tail = ranked.slice(TOP_N);
211
+ const lines = top.map((c, i) => {
146
212
  const preview = c.representativeContent.replace(/\n/g, ' ').slice(0, 200);
147
213
  return `${i + 1}. agents: ${c.agents.join(', ')} (${c.memberCount} chunks)\n "${preview}${preview.length >= 200 ? '…' : ''}"`;
148
214
  });
215
+ if (tail.length > 0) {
216
+ lines.push(`_…and ${tail.length} more cross-agent cluster${tail.length === 1 ? '' : 's'} (smaller spread, not surfaced individually)._`);
217
+ }
149
218
  sections.push(`## Cross-agent recurrence\n${lines.join('\n')}`);
150
219
  }
151
220
  return sections.join('\n\n');
@@ -768,19 +768,40 @@ export class SelfImproveLoop {
768
768
  `Choose a DIFFERENT area/target. If no other improvement is genuinely needed today, return an empty results array: { "results": [] }.\n`
769
769
  : '');
770
770
  const patternAnalysis = this.analyzeExperimentPatterns(history);
771
- // Format negative feedback
772
- const negativeFeedbackText = metrics.negativeFeedback.slice(0, 5).map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
773
- // Format cron errors
774
- const cronErrorsText = metrics.cronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || '(no cron errors)';
775
- // Format cron reflections (quality ratings from automated reflection passes)
776
- const cronReflectionsText = metrics.cronReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
771
+ // Pre-LLM compression: when this is a focused per-agent cycle, filter
772
+ // every metrics text to that agent's own data. Without this, the LLM
773
+ // sees ALL agents' cron errors / reflections and may propose changes
774
+ // for the focused agent based on signals from a totally different one.
775
+ // (Skill-chaining COMPRESS pattern: filter at the boundary, synthesize
776
+ // in the LLM.)
777
+ const focusedSlug = this.config.agentSlug;
778
+ const isAgentScoped = (jobName) => !!focusedSlug && jobName.startsWith(`${focusedSlug}:`);
779
+ const filteredNegativeFeedback = focusedSlug
780
+ // Negative feedback rows don't carry agent tags reliably — keep all
781
+ // when in agent mode but cap tighter so noise stays bounded.
782
+ ? metrics.negativeFeedback.slice(0, 3)
783
+ : metrics.negativeFeedback.slice(0, 5);
784
+ const negativeFeedbackText = filteredNegativeFeedback.map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
785
+ // Format cron errors — filter to focused agent's jobs when applicable.
786
+ const filteredCronErrors = focusedSlug
787
+ ? metrics.cronErrors.filter(e => isAgentScoped(e.jobName ?? ''))
788
+ : metrics.cronErrors;
789
+ const cronErrorsText = filteredCronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || (focusedSlug ? `(no cron errors for ${focusedSlug} in window)` : '(no cron errors)');
790
+ // Format cron reflections — same filter.
791
+ const filteredReflections = focusedSlug
792
+ ? metrics.cronReflections.filter(r => r.agentSlug === focusedSlug || isAgentScoped(r.jobName ?? ''))
793
+ : metrics.cronReflections;
794
+ const cronReflectionsText = filteredReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
777
795
  `Exist: ${r.existence ?? '?'} Substance: ${r.substance ?? '?'} Actionable: ${r.actionable ?? '?'} ` +
778
796
  `Comm: ${r.communication ?? '?'} | ` +
779
- `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || '(no cron reflections yet)';
780
- // Compute per-agent metrics from reflections
797
+ `Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || (focusedSlug ? `(no cron reflections for ${focusedSlug} yet)` : '(no cron reflections yet)');
798
+ // Compute per-agent metrics from reflections — when focused, only show
799
+ // this agent's row (the others are irrelevant to the proposal).
781
800
  const agentMetrics = new Map();
782
801
  for (const r of metrics.cronReflections) {
783
802
  const slug = r.agentSlug || 'clementine';
803
+ if (focusedSlug && slug !== focusedSlug)
804
+ continue;
784
805
  if (!agentMetrics.has(slug)) {
785
806
  agentMetrics.set(slug, { total: 0, qualitySum: 0, emptyCount: 0, gaps: [] });
786
807
  }
@@ -799,7 +820,7 @@ export class SelfImproveLoop {
799
820
  const topGaps = m.gaps.slice(-3).map(g => g.slice(0, 60)).join('; ') || 'none';
800
821
  return `- ${slug}: avg quality ${avgQ}/5, ${emptyPct}% empty outputs, common gaps: "${topGaps}"`;
801
822
  }).join('\n')
802
- : '(no per-agent data yet)';
823
+ : (focusedSlug ? `(no reflection data for ${focusedSlug} yet)` : '(no per-agent data yet)');
803
824
  // Format goal health data
804
825
  const goalHealthText = metrics.goalHealth.length > 0
805
826
  ? metrics.goalHealth.map(g => {
@@ -812,11 +833,34 @@ export class SelfImproveLoop {
812
833
  ? metrics.advisorInsights.map(a => `- ${a}`).join('\n')
813
834
  : '(no advisor data yet)';
814
835
  const areas = this.config.areas.map(a => `'${a}'`).join(', ');
836
+ // For per-agent cycles, also pull the agent's CURRENT instructions
837
+ // (agent.md body) so the LLM proposes changes informed by what's there
838
+ // rather than blind. Without this, "improve agent X" was generating
839
+ // proposals that contradicted or duplicated standing instructions.
840
+ let agentBodyText = '';
841
+ if (this.config.agentSlug) {
842
+ try {
843
+ const agentFile = path.join(AGENTS_DIR, this.config.agentSlug, 'agent.md');
844
+ if (existsSync(agentFile)) {
845
+ const raw = readFileSync(agentFile, 'utf-8');
846
+ // Cap to keep the prompt tractable — agent.md can be 10K+ chars.
847
+ // The first 4K covers the role, personality, and most standing
848
+ // instructions; deeper sections (long examples, references) are
849
+ // less important for "what should change" decisions.
850
+ const trimmed = raw.length > 4000 ? raw.slice(0, 4000) + '\n\n[...truncated, full file at agents/' + this.config.agentSlug + '/agent.md]' : raw;
851
+ agentBodyText = `\n\n## CURRENT agent.md for "${this.config.agentSlug}"\n` +
852
+ `These are the agent's existing standing instructions — your proposals should refine or extend these, not contradict or duplicate them.\n\n` +
853
+ '```markdown\n' + trimmed + '\n```\n';
854
+ }
855
+ }
856
+ catch { /* non-fatal — fall through with empty body text */ }
857
+ }
815
858
  const agentFocusText = this.config.agentSlug
816
859
  ? `\n\n## AGENT FOCUS: ${this.config.agentSlug}\nThis is a focused improvement cycle for agent "${this.config.agentSlug}" ONLY.\n` +
817
860
  `- You MUST target area "agent" with target "${this.config.agentSlug}", OR area "cron" targeting a cron job that this agent runs.\n` +
818
861
  `- Do NOT propose changes to SOUL.md, AGENTS.md, source code, or other agents.\n` +
819
- `- Focus on improving this agent's personality, instructions, and task execution quality.\n`
862
+ `- Focus on improving this agent's personality, instructions, and task execution quality.\n` +
863
+ agentBodyText
820
864
  : '';
821
865
  // Read SOUL.md evolution candidates from FEEDBACK.md (written by synthesizeFeedbackPatterns)
822
866
  let soulCandidatesText = '';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.1.26",
3
+ "version": "1.1.28",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",