clementine-agent 1.0.34 → 1.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/self-improve.js +29 -7
- package/package.json +1 -1
|
@@ -198,6 +198,11 @@ export class SelfImproveLoop {
|
|
|
198
198
|
const loopStart = Date.now();
|
|
199
199
|
const history = this.loadExperimentLog();
|
|
200
200
|
let consecutiveLow = 0;
|
|
201
|
+
// Cap accepted proposals per run so the owner's approval queue stays
|
|
202
|
+
// scannable. The nightly loop should surface 1-3 solid ideas — not a
|
|
203
|
+
// flood — even if the hypothesizer is inspired.
|
|
204
|
+
const maxAcceptancesPerRun = 3;
|
|
205
|
+
let acceptedThisRun = 0;
|
|
201
206
|
try {
|
|
202
207
|
// Step 1: Gather baseline metrics
|
|
203
208
|
const metrics = await this.gatherMetrics();
|
|
@@ -379,6 +384,7 @@ export class SelfImproveLoop {
|
|
|
379
384
|
}
|
|
380
385
|
}
|
|
381
386
|
consecutiveLow = 0;
|
|
387
|
+
acceptedThisRun++;
|
|
382
388
|
}
|
|
383
389
|
else {
|
|
384
390
|
consecutiveLow++;
|
|
@@ -389,7 +395,13 @@ export class SelfImproveLoop {
|
|
|
389
395
|
area: proposal.area,
|
|
390
396
|
score,
|
|
391
397
|
accepted,
|
|
398
|
+
acceptedThisRun,
|
|
392
399
|
}, `Iteration ${i} complete`);
|
|
400
|
+
// Stop once we've landed enough good ideas for the owner to review.
|
|
401
|
+
if (acceptedThisRun >= maxAcceptancesPerRun) {
|
|
402
|
+
logger.info({ acceptedThisRun }, 'Reached max-acceptances per run — stopping');
|
|
403
|
+
break;
|
|
404
|
+
}
|
|
393
405
|
}
|
|
394
406
|
catch (err) {
|
|
395
407
|
const classified = classifyError(err);
|
|
@@ -666,6 +678,17 @@ export class SelfImproveLoop {
|
|
|
666
678
|
const recentTargets = new Map();
|
|
667
679
|
const recentAreas = new Map();
|
|
668
680
|
for (const e of history.slice(-50)) {
|
|
681
|
+
// Skip error-fallback experiments. They default to `area: 'soul', target:
|
|
682
|
+
// 'unknown'` (see the error-catch block below) and historically have
|
|
683
|
+
// poisoned diversity accounting — e.g. a ~2-week stretch of API errors
|
|
684
|
+
// artificially blacklisted the whole 'soul' area even though no real
|
|
685
|
+
// attempt was made. A crashed iteration isn't evidence we explored the
|
|
686
|
+
// space, just that the SDK call failed.
|
|
687
|
+
if (e.reason?.startsWith('Error:'))
|
|
688
|
+
continue;
|
|
689
|
+
// Plateau markers also shouldn't count as attempts.
|
|
690
|
+
if (e.hypothesis?.startsWith('No new hypothesis'))
|
|
691
|
+
continue;
|
|
669
692
|
const key = `${e.area}:${e.target}`;
|
|
670
693
|
const ts = Date.parse(e.startedAt);
|
|
671
694
|
const tsMs = Number.isFinite(ts) ? ts : 0;
|
|
@@ -717,7 +740,7 @@ export class SelfImproveLoop {
|
|
|
717
740
|
(overTargeted.length > 0
|
|
718
741
|
? `These specific targets MUST NOT be re-targeted:\n${overTargeted.map(t => `- ${t}`).join('\n')}\n`
|
|
719
742
|
: '') +
|
|
720
|
-
`Choose a DIFFERENT area/target. If no other improvement is needed,
|
|
743
|
+
`Choose a DIFFERENT area/target. If no other improvement is genuinely needed today, return an empty results array: { "results": [] }.\n`
|
|
721
744
|
: '');
|
|
722
745
|
const patternAnalysis = this.analyzeExperimentPatterns(history);
|
|
723
746
|
// Format negative feedback
|
|
@@ -802,18 +825,17 @@ export class SelfImproveLoop {
|
|
|
802
825
|
agentFocusText +
|
|
803
826
|
soulCandidatesText +
|
|
804
827
|
`\n## Instructions\n` +
|
|
805
|
-
`Rank
|
|
828
|
+
`Propose **1-3 concrete, high-impact improvements** the owner should review today — no fewer (aim for at least one actionable suggestion when data warrants it), no more (the owner reads each proposal manually and you'll overwhelm them). Rank by expected impact; drop anything below "solid idea".\n\n` +
|
|
829
|
+
`For each opportunity, specify:\n` +
|
|
806
830
|
`- area: ${areas}\n` +
|
|
807
|
-
`- target: the file/agent slug that should change\n` +
|
|
831
|
+
`- target: the exact file path / agent slug / cron job name that should change (not "unknown", not "n/a")\n` +
|
|
808
832
|
`- what: a 1-sentence description of what specifically should change\n` +
|
|
809
|
-
`- why: which metric this should improve\n\n` +
|
|
833
|
+
`- why: which metric or signal from the data above this should improve\n\n` +
|
|
810
834
|
`Area notes:\n` +
|
|
811
835
|
`- For "goal": target = "{owner}/{goal-slug}" (e.g. "clementine/improve-reply-rates" or "ross-the-sdr/book-demos"). ` +
|
|
812
836
|
`Propose when you observe a pattern in completed tasks or cron runs that suggests a missing or stale goal. ` +
|
|
813
837
|
`The proposedChange must be a JSON goal object with at minimum: title, description, priority, reviewFrequency.\n\n` +
|
|
814
|
-
`
|
|
815
|
-
`[{ "area": "...", "target": "...", "what": "...", "why": "..." }]\n` +
|
|
816
|
-
`If no improvement is needed, output: []`;
|
|
838
|
+
`Return your answer as a JSON object matching the schema: { "results": [ ... ] }. Up to 3 items. If absolutely nothing actionable today, return { "results": [] }.`;
|
|
817
839
|
const analysisResult = await this.assistant.runPlanStep('si-analyze', analysisPrompt, {
|
|
818
840
|
tier: 2,
|
|
819
841
|
maxTurns: 3,
|