clementine-agent 1.1.26 → 1.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -47,7 +47,14 @@ export declare function gatherBrainDigestInputs(opts: {
|
|
|
47
47
|
}): BrainDigestInputs;
|
|
48
48
|
/**
|
|
49
49
|
* Format the raw inputs as a single text block the LLM can synthesize.
|
|
50
|
-
*
|
|
50
|
+
* Pre-LLM compression: rank by signal-bearing fields (failures over runs,
|
|
51
|
+
* agent-spread over cluster size, growth over alpha-sort) and summarize the
|
|
52
|
+
* tail rather than dropping it. Same picture in fewer tokens — the model
|
|
53
|
+
* still sees the long-tail counts but doesn't pay tokens for each entry.
|
|
54
|
+
*
|
|
55
|
+
* Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
|
|
56
|
+
* synthesize in the LLM. Tail-summary lines preserve the volume signal
|
|
57
|
+
* ("X more agents added Y chunks total") without per-row cost.
|
|
51
58
|
*/
|
|
52
59
|
export declare function formatRawMaterial(inputs: BrainDigestInputs): string;
|
|
53
60
|
export declare function runBrainDigest(opts: {
|
|
@@ -114,38 +114,107 @@ function gatherMemoryDeltas(memoryStore, sinceIso) {
|
|
|
114
114
|
}
|
|
115
115
|
/**
|
|
116
116
|
* Format the raw inputs as a single text block the LLM can synthesize.
|
|
117
|
-
*
|
|
117
|
+
* Pre-LLM compression: rank by signal-bearing fields (failures over runs,
|
|
118
|
+
* agent-spread over cluster size, growth over alpha-sort) and summarize the
|
|
119
|
+
* tail rather than dropping it. Same picture in fewer tokens — the model
|
|
120
|
+
* still sees the long-tail counts but doesn't pay tokens for each entry.
|
|
121
|
+
*
|
|
122
|
+
* Inspired by the skill-chaining COMPRESS pattern: filter at the boundary,
|
|
123
|
+
* synthesize in the LLM. Tail-summary lines preserve the volume signal
|
|
124
|
+
* ("X more agents added Y chunks total") without per-row cost.
|
|
118
125
|
*/
|
|
119
126
|
export function formatRawMaterial(inputs) {
|
|
120
127
|
const sections = [];
|
|
121
128
|
sections.push(`## Window\nLast ${inputs.windowDays} days.`);
|
|
122
|
-
|
|
129
|
+
// Team roster — split active vs. quiet so the synthesis prompt naturally
|
|
130
|
+
// weights active agents in "per-agent highlights" without confabulating
|
|
131
|
+
// about agents that did nothing this window.
|
|
132
|
+
const activeSlugSet = new Set([
|
|
133
|
+
...inputs.cronRunsByJob.map(r => r.agentSlug).filter((s) => !!s),
|
|
134
|
+
...inputs.memoryDeltas.filter(d => d.agentSlug !== 'global').map(d => d.agentSlug),
|
|
135
|
+
]);
|
|
136
|
+
const activeAgents = inputs.agents.filter(a => activeSlugSet.has(a.slug));
|
|
137
|
+
const quietAgents = inputs.agents.filter(a => !activeSlugSet.has(a.slug));
|
|
138
|
+
if (inputs.agents.length === 0) {
|
|
139
|
+
sections.push(`## Team roster\n(no specialist agents)`);
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
const lines = [];
|
|
143
|
+
if (activeAgents.length > 0) {
|
|
144
|
+
lines.push(`Active this window:\n${activeAgents.map(a => `- ${a.name} (${a.slug})`).join('\n')}`);
|
|
145
|
+
}
|
|
146
|
+
if (quietAgents.length > 0) {
|
|
147
|
+
lines.push(`Quiet this window: ${quietAgents.map(a => a.slug).join(', ')}`);
|
|
148
|
+
}
|
|
149
|
+
sections.push(`## Team roster\n${lines.join('\n\n')}`);
|
|
150
|
+
}
|
|
151
|
+
// Cron activity — failures-first ranking so the synthesis prompt sees the
|
|
152
|
+
// problem signal early, with a tail summary preserving total volume.
|
|
123
153
|
if (inputs.cronRunsByJob.length === 0) {
|
|
124
154
|
sections.push(`## Cron activity\n(no autonomous runs in window)`);
|
|
125
155
|
}
|
|
126
156
|
else {
|
|
127
|
-
const
|
|
157
|
+
const ranked = [...inputs.cronRunsByJob].sort((a, b) => {
|
|
158
|
+
// Failures dominate; ties broken by run count (busier jobs more important).
|
|
159
|
+
if (b.failures !== a.failures)
|
|
160
|
+
return b.failures - a.failures;
|
|
161
|
+
return b.runs - a.runs;
|
|
162
|
+
});
|
|
163
|
+
const TOP_N = 12;
|
|
164
|
+
const top = ranked.slice(0, TOP_N);
|
|
165
|
+
const tail = ranked.slice(TOP_N);
|
|
166
|
+
const lines = top.map(r => {
|
|
128
167
|
const tag = r.agentSlug ? ` [${r.agentSlug}]` : '';
|
|
129
168
|
const failTag = r.failures > 0 ? ` — ${r.failures} failure${r.failures === 1 ? '' : 's'}` : '';
|
|
130
169
|
return `- ${r.jobName}${tag}: ${r.runs} run${r.runs === 1 ? '' : 's'}${failTag}`;
|
|
131
170
|
});
|
|
171
|
+
if (tail.length > 0) {
|
|
172
|
+
const tailRuns = tail.reduce((s, r) => s + r.runs, 0);
|
|
173
|
+
const tailFailures = tail.reduce((s, r) => s + r.failures, 0);
|
|
174
|
+
lines.push(`- _…and ${tail.length} more job${tail.length === 1 ? '' : 's'}: ${tailRuns} runs, ${tailFailures} failures total_`);
|
|
175
|
+
}
|
|
132
176
|
sections.push(`## Cron activity\n${lines.join('\n')}`);
|
|
133
177
|
}
|
|
178
|
+
// Memory growth — top-N by delta, summarize rest. The LLM doesn't need a
|
|
179
|
+
// 30-line list of every agent that wrote one chunk; it needs to know who
|
|
180
|
+
// wrote a lot.
|
|
134
181
|
if (inputs.memoryDeltas.length === 0) {
|
|
135
182
|
sections.push(`## Memory growth\n(no new chunks in window)`);
|
|
136
183
|
}
|
|
137
184
|
else {
|
|
138
|
-
const
|
|
185
|
+
const TOP_N = 8;
|
|
186
|
+
const top = inputs.memoryDeltas.slice(0, TOP_N);
|
|
187
|
+
const tail = inputs.memoryDeltas.slice(TOP_N);
|
|
188
|
+
const lines = top.map(d => `- ${d.agentSlug}: +${d.chunksAdded} chunks`);
|
|
189
|
+
if (tail.length > 0) {
|
|
190
|
+
const tailTotal = tail.reduce((s, d) => s + d.chunksAdded, 0);
|
|
191
|
+
lines.push(`- _…and ${tail.length} other agent${tail.length === 1 ? '' : 's'}: +${tailTotal} chunks combined_`);
|
|
192
|
+
}
|
|
139
193
|
sections.push(`## Memory growth\n${lines.join('\n')}`);
|
|
140
194
|
}
|
|
195
|
+
// Cross-agent recurrence — widest-spread clusters first (most agents
|
|
196
|
+
// touched), with cluster size as tiebreaker. Spread is the signal of
|
|
197
|
+
// "this is genuinely team knowledge" — a 3-cluster touching 4 agents
|
|
198
|
+
// matters more than a 10-cluster touching 2.
|
|
141
199
|
if (inputs.crossAgentClusters.length === 0) {
|
|
142
200
|
sections.push(`## Cross-agent recurrence\n(no facts surfaced from 2+ agents)`);
|
|
143
201
|
}
|
|
144
202
|
else {
|
|
145
|
-
const
|
|
203
|
+
const ranked = [...inputs.crossAgentClusters].sort((a, b) => {
|
|
204
|
+
if (b.agents.length !== a.agents.length)
|
|
205
|
+
return b.agents.length - a.agents.length;
|
|
206
|
+
return b.memberCount - a.memberCount;
|
|
207
|
+
});
|
|
208
|
+
const TOP_N = 8;
|
|
209
|
+
const top = ranked.slice(0, TOP_N);
|
|
210
|
+
const tail = ranked.slice(TOP_N);
|
|
211
|
+
const lines = top.map((c, i) => {
|
|
146
212
|
const preview = c.representativeContent.replace(/\n/g, ' ').slice(0, 200);
|
|
147
213
|
return `${i + 1}. agents: ${c.agents.join(', ')} (${c.memberCount} chunks)\n "${preview}${preview.length >= 200 ? '…' : ''}"`;
|
|
148
214
|
});
|
|
215
|
+
if (tail.length > 0) {
|
|
216
|
+
lines.push(`_…and ${tail.length} more cross-agent cluster${tail.length === 1 ? '' : 's'} (smaller spread, not surfaced individually)._`);
|
|
217
|
+
}
|
|
149
218
|
sections.push(`## Cross-agent recurrence\n${lines.join('\n')}`);
|
|
150
219
|
}
|
|
151
220
|
return sections.join('\n\n');
|
|
@@ -768,19 +768,40 @@ export class SelfImproveLoop {
|
|
|
768
768
|
`Choose a DIFFERENT area/target. If no other improvement is genuinely needed today, return an empty results array: { "results": [] }.\n`
|
|
769
769
|
: '');
|
|
770
770
|
const patternAnalysis = this.analyzeExperimentPatterns(history);
|
|
771
|
-
//
|
|
772
|
-
|
|
773
|
-
//
|
|
774
|
-
|
|
775
|
-
//
|
|
776
|
-
|
|
771
|
+
// Pre-LLM compression: when this is a focused per-agent cycle, filter
|
|
772
|
+
// every metrics text to that agent's own data. Without this, the LLM
|
|
773
|
+
// sees ALL agents' cron errors / reflections and may propose changes
|
|
774
|
+
// for the focused agent based on signals from a totally different one.
|
|
775
|
+
// (Skill-chaining COMPRESS pattern: filter at the boundary, synthesize
|
|
776
|
+
// in the LLM.)
|
|
777
|
+
const focusedSlug = this.config.agentSlug;
|
|
778
|
+
const isAgentScoped = (jobName) => !!focusedSlug && jobName.startsWith(`${focusedSlug}:`);
|
|
779
|
+
const filteredNegativeFeedback = focusedSlug
|
|
780
|
+
// Negative feedback rows don't carry agent tags reliably — keep all
|
|
781
|
+
// when in agent mode but cap tighter so noise stays bounded.
|
|
782
|
+
? metrics.negativeFeedback.slice(0, 3)
|
|
783
|
+
: metrics.negativeFeedback.slice(0, 5);
|
|
784
|
+
const negativeFeedbackText = filteredNegativeFeedback.map(f => `- Rating: ${f.rating} | Message: "${(f.messageSnippet ?? '').slice(0, 100)}" | Response: "${(f.responseSnippet ?? '').slice(0, 100)}"${f.comment ? ` | Comment: "${f.comment}"` : ''}`).join('\n') || '(no negative feedback)';
|
|
785
|
+
// Format cron errors — filter to focused agent's jobs when applicable.
|
|
786
|
+
const filteredCronErrors = focusedSlug
|
|
787
|
+
? metrics.cronErrors.filter(e => isAgentScoped(e.jobName ?? ''))
|
|
788
|
+
: metrics.cronErrors;
|
|
789
|
+
const cronErrorsText = filteredCronErrors.slice(0, 5).map(e => `- Job: ${e.jobName} | Error: ${(e.error ?? 'unknown').slice(0, 200)} | At: ${e.startedAt}`).join('\n') || (focusedSlug ? `(no cron errors for ${focusedSlug} in window)` : '(no cron errors)');
|
|
790
|
+
// Format cron reflections — same filter.
|
|
791
|
+
const filteredReflections = focusedSlug
|
|
792
|
+
? metrics.cronReflections.filter(r => r.agentSlug === focusedSlug || isAgentScoped(r.jobName ?? ''))
|
|
793
|
+
: metrics.cronReflections;
|
|
794
|
+
const cronReflectionsText = filteredReflections.slice(-10).map(r => `- Job: ${r.jobName}${r.agentSlug ? ` (${r.agentSlug})` : ''} | Quality: ${r.quality}/5 | ` +
|
|
777
795
|
`Exist: ${r.existence ?? '?'} Substance: ${r.substance ?? '?'} Actionable: ${r.actionable ?? '?'} ` +
|
|
778
796
|
`Comm: ${r.communication ?? '?'} | ` +
|
|
779
|
-
`Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || '(no cron reflections yet)';
|
|
780
|
-
// Compute per-agent metrics from reflections
|
|
797
|
+
`Gap: "${r.gap?.slice(0, 80) ?? ''}"${r.commNote ? ` | CommNote: "${r.commNote.slice(0, 80)}"` : ''} | At: ${r.timestamp}`).join('\n') || (focusedSlug ? `(no cron reflections for ${focusedSlug} yet)` : '(no cron reflections yet)');
|
|
798
|
+
// Compute per-agent metrics from reflections — when focused, only show
|
|
799
|
+
// this agent's row (the others are irrelevant to the proposal).
|
|
781
800
|
const agentMetrics = new Map();
|
|
782
801
|
for (const r of metrics.cronReflections) {
|
|
783
802
|
const slug = r.agentSlug || 'clementine';
|
|
803
|
+
if (focusedSlug && slug !== focusedSlug)
|
|
804
|
+
continue;
|
|
784
805
|
if (!agentMetrics.has(slug)) {
|
|
785
806
|
agentMetrics.set(slug, { total: 0, qualitySum: 0, emptyCount: 0, gaps: [] });
|
|
786
807
|
}
|
|
@@ -799,7 +820,7 @@ export class SelfImproveLoop {
|
|
|
799
820
|
const topGaps = m.gaps.slice(-3).map(g => g.slice(0, 60)).join('; ') || 'none';
|
|
800
821
|
return `- ${slug}: avg quality ${avgQ}/5, ${emptyPct}% empty outputs, common gaps: "${topGaps}"`;
|
|
801
822
|
}).join('\n')
|
|
802
|
-
: '(no per-agent data yet)';
|
|
823
|
+
: (focusedSlug ? `(no reflection data for ${focusedSlug} yet)` : '(no per-agent data yet)');
|
|
803
824
|
// Format goal health data
|
|
804
825
|
const goalHealthText = metrics.goalHealth.length > 0
|
|
805
826
|
? metrics.goalHealth.map(g => {
|
|
@@ -812,11 +833,34 @@ export class SelfImproveLoop {
|
|
|
812
833
|
? metrics.advisorInsights.map(a => `- ${a}`).join('\n')
|
|
813
834
|
: '(no advisor data yet)';
|
|
814
835
|
const areas = this.config.areas.map(a => `'${a}'`).join(', ');
|
|
836
|
+
// For per-agent cycles, also pull the agent's CURRENT instructions
|
|
837
|
+
// (agent.md body) so the LLM proposes changes informed by what's there
|
|
838
|
+
// rather than blind. Without this, "improve agent X" was generating
|
|
839
|
+
// proposals that contradicted or duplicated standing instructions.
|
|
840
|
+
let agentBodyText = '';
|
|
841
|
+
if (this.config.agentSlug) {
|
|
842
|
+
try {
|
|
843
|
+
const agentFile = path.join(AGENTS_DIR, this.config.agentSlug, 'agent.md');
|
|
844
|
+
if (existsSync(agentFile)) {
|
|
845
|
+
const raw = readFileSync(agentFile, 'utf-8');
|
|
846
|
+
// Cap to keep the prompt tractable — agent.md can be 10K+ chars.
|
|
847
|
+
// The first 4K covers the role, personality, and most standing
|
|
848
|
+
// instructions; deeper sections (long examples, references) are
|
|
849
|
+
// less important for "what should change" decisions.
|
|
850
|
+
const trimmed = raw.length > 4000 ? raw.slice(0, 4000) + '\n\n[...truncated, full file at agents/' + this.config.agentSlug + '/agent.md]' : raw;
|
|
851
|
+
agentBodyText = `\n\n## CURRENT agent.md for "${this.config.agentSlug}"\n` +
|
|
852
|
+
`These are the agent's existing standing instructions — your proposals should refine or extend these, not contradict or duplicate them.\n\n` +
|
|
853
|
+
'```markdown\n' + trimmed + '\n```\n';
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
catch { /* non-fatal — fall through with empty body text */ }
|
|
857
|
+
}
|
|
815
858
|
const agentFocusText = this.config.agentSlug
|
|
816
859
|
? `\n\n## AGENT FOCUS: ${this.config.agentSlug}\nThis is a focused improvement cycle for agent "${this.config.agentSlug}" ONLY.\n` +
|
|
817
860
|
`- You MUST target area "agent" with target "${this.config.agentSlug}", OR area "cron" targeting a cron job that this agent runs.\n` +
|
|
818
861
|
`- Do NOT propose changes to SOUL.md, AGENTS.md, source code, or other agents.\n` +
|
|
819
|
-
`- Focus on improving this agent's personality, instructions, and task execution quality.\n`
|
|
862
|
+
`- Focus on improving this agent's personality, instructions, and task execution quality.\n` +
|
|
863
|
+
agentBodyText
|
|
820
864
|
: '';
|
|
821
865
|
// Read SOUL.md evolution candidates from FEEDBACK.md (written by synthesizeFeedbackPatterns)
|
|
822
866
|
let soulCandidatesText = '';
|