clementine-agent 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Clementine TypeScript — Lightweight complexity classifier.
3
+ *
4
+ * Deterministic regex + length heuristics that decide whether a user
5
+ * message is "complex" enough to warrant planning-before-acting. No
6
+ * LLM call — gate is cheap enough to run on every message.
7
+ *
8
+ * When complex, the gateway injects a "plan-first" system-prompt
9
+ * directive so the agent proposes a numbered plan and waits for
10
+ * confirmation before diving in. Not perfect — the LLM still decides
11
+ * what "plan" means — but much more consistent than a generic
12
+ * SOUL.md directive that the model ignores half the time.
13
+ */
14
+ export interface ComplexityVerdict {
15
+ complex: boolean;
16
+ reason: string;
17
+ signals: string[];
18
+ }
19
+ /**
20
+ * Classify complexity. Pure function — no LLM, no I/O.
21
+ */
22
+ export declare function classifyComplexity(text: string): ComplexityVerdict;
23
+ /**
24
+ * Build a system-prompt directive to inject when a complex message is
25
+ * detected. Prepended to Clementine's normal system prompt for this
26
+ * single query only. Short + declarative — meta-instructions are
27
+ * easier for the model to follow when they're terse.
28
+ */
29
+ export declare function planFirstDirective(): string;
30
+ //# sourceMappingURL=complexity-classifier.d.ts.map
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Clementine TypeScript — Lightweight complexity classifier.
3
+ *
4
+ * Deterministic regex + length heuristics that decide whether a user
5
+ * message is "complex" enough to warrant planning-before-acting. No
6
+ * LLM call — gate is cheap enough to run on every message.
7
+ *
8
+ * When complex, the gateway injects a "plan-first" system-prompt
9
+ * directive so the agent proposes a numbered plan and waits for
10
+ * confirmation before diving in. Not perfect — the LLM still decides
11
+ * what "plan" means — but much more consistent than a generic
12
+ * SOUL.md directive that the model ignores half the time.
13
+ */
14
+ /**
15
+ * Action verbs that signal the user is asking Clementine to DO things
16
+ * (as opposed to asking questions or making small talk). Multiple
17
+ * action verbs in one message is a strong complexity signal.
18
+ */
19
+ const ACTION_VERBS = [
20
+ 'send', 'create', 'run', 'schedule', 'update', 'delete', 'add', 'remove',
21
+ 'draft', 'write', 'post', 'publish', 'deploy', 'build', 'edit', 'move',
22
+ 'rename', 'archive', 'restore', 'assign', 'delegate', 'email', 'message',
23
+ 'invite', 'book', 'cancel', 'notify', 'alert', 'set up', 'tear down',
24
+ 'process', 'review', 'approve', 'reject',
25
+ 'extract', 'fetch', 'pull', 'gather', 'compile', 'summarize', 'analyze',
26
+ 'generate', 'produce', 'export', 'import', 'upload', 'download', 'sync',
27
+ ];
28
+ /**
29
+ * Chain markers — "do X and then Y" explicitly encode a multi-step
30
+ * task. A single occurrence in a DO-type message is a clear signal.
31
+ */
32
+ const CHAIN_MARKERS = [
33
+ /\band\s+then\b/i,
34
+ /,\s+then\b/i, // "X, then Y"
35
+ /\bfirst\b[\s\S]{0,80}\bthen\b/i,
36
+ /\bafter\s+(that|which)\b/i,
37
+ /\bonce\s+(that|you)\b.*,/i,
38
+ /\bnext\b.*,/i,
39
+ ];
40
+ /**
41
+ * Phrasings that explicitly ask for plan-first behavior. Triggers
42
+ * regardless of other heuristics.
43
+ */
44
+ const EXPLICIT_PLAN_ASKS = [
45
+ /\bpropose\s+a\s+plan\b/i,
46
+ /\bwhat\s+(would|'d)\s+be\s+your\s+approach\b/i,
47
+ /\bplan\s+(this|it)\s+out\b/i,
48
+ /\blay\s+out\s+(a|the)\s+plan\b/i,
49
+ /\bwalk\s+me\s+through\s+(what|how)\b/i,
50
+ ];
51
+ function countActionVerbs(text) {
52
+ const lower = text.toLowerCase();
53
+ let count = 0;
54
+ for (const v of ACTION_VERBS) {
55
+ const re = new RegExp(`\\b${v.replace(/\s+/g, '\\s+')}\\b`, 'g');
56
+ const matches = lower.match(re);
57
+ if (matches)
58
+ count += matches.length;
59
+ }
60
+ return count;
61
+ }
62
+ /**
63
+ * Rough entity count — quoted strings, @mentions, and capitalized
64
+ * multi-word phrases that look like proper nouns. Not perfect;
65
+ * designed to catch cases like "email John, Sarah, and Mike".
66
+ */
67
+ function countEntities(text) {
68
+ let count = 0;
69
+ // Quoted strings
70
+ count += (text.match(/"[^"]{2,60}"/g) ?? []).length;
71
+ count += (text.match(/'[^']{2,60}'/g) ?? []).length;
72
+ // @mentions
73
+ count += (text.match(/@\w+/g) ?? []).length;
74
+ // Comma-separated name lists (e.g., "John, Sarah, and Mike")
75
+ const listMatch = text.match(/(?:[A-Z][a-z]{1,20},\s+){2,}(?:and\s+)?[A-Z][a-z]{1,20}/);
76
+ if (listMatch)
77
+ count += 3;
78
+ return count;
79
+ }
80
+ /**
81
+ * Classify complexity. Pure function — no LLM, no I/O.
82
+ */
83
+ export function classifyComplexity(text) {
84
+ if (!text || typeof text !== 'string')
85
+ return { complex: false, reason: 'empty', signals: [] };
86
+ const trimmed = text.trim();
87
+ // Skip commands and very short messages
88
+ if (trimmed.length < 30)
89
+ return { complex: false, reason: 'too short', signals: [] };
90
+ if (trimmed.startsWith('!') || trimmed.startsWith('/'))
91
+ return { complex: false, reason: 'command', signals: [] };
92
+ const signals = [];
93
+ // Signal 1: explicit ask for plan-first
94
+ for (const re of EXPLICIT_PLAN_ASKS) {
95
+ if (re.test(trimmed)) {
96
+ return { complex: true, reason: 'user explicitly asked for a plan', signals: ['explicit-plan-ask'] };
97
+ }
98
+ }
99
+ // Signal 2: multiple action verbs
100
+ const verbs = countActionVerbs(trimmed);
101
+ if (verbs >= 3)
102
+ signals.push(`${verbs} action verbs`);
103
+ // Signal 3: chain markers
104
+ for (const re of CHAIN_MARKERS) {
105
+ if (re.test(trimmed)) {
106
+ signals.push('chain marker');
107
+ break;
108
+ }
109
+ }
110
+ // Signal 4: multiple entities
111
+ const entities = countEntities(trimmed);
112
+ if (entities >= 3)
113
+ signals.push(`${entities} entities`);
114
+ // Signal 5: long message with at least one action verb (big scope, not just a question)
115
+ if (trimmed.length > 400 && verbs >= 1)
116
+ signals.push('long + action');
117
+ // Gate: at least 2 signals fire, OR a single high-confidence signal
118
+ // (chain markers, explicit-plan-ask, or 3+ action verbs).
119
+ const highConfidenceSingles = [
120
+ verbs >= 3,
121
+ signals.includes('chain marker'),
122
+ ];
123
+ if (highConfidenceSingles.some(Boolean)) {
124
+ return { complex: true, reason: 'strong single signal', signals };
125
+ }
126
+ if (signals.length >= 2) {
127
+ return { complex: true, reason: 'multiple signals', signals };
128
+ }
129
+ return { complex: false, reason: 'below threshold', signals };
130
+ }
131
+ /**
132
+ * Build a system-prompt directive to inject when a complex message is
133
+ * detected. Prepended to Clementine's normal system prompt for this
134
+ * single query only. Short + declarative — meta-instructions are
135
+ * easier for the model to follow when they're terse.
136
+ */
137
+ export function planFirstDirective() {
138
+ return [
139
+ '## PLAN BEFORE ACTING',
140
+ '',
141
+ 'This request has multiple steps. Before doing any of them:',
142
+ '1. Write a numbered plan (3-7 steps, one line each).',
143
+ '2. Call out anything that needs my decision — which contact, which template, which timing.',
144
+ '3. End with: "Reply **go** to start, or tell me what to change."',
145
+ '4. STOP. Do NOT start executing the plan in this turn.',
146
+ '',
147
+ 'When I reply "go" (or equivalent) in the next message, proceed with the plan.',
148
+ 'If I edit the plan, revise and ask again.',
149
+ '',
150
+ 'SKIP this protocol only if the request is actually a single step disguised as multiple (e.g., "send an email to Aaron about X and cc Sarah" is one email, not two).',
151
+ ].join('\n');
152
+ }
153
+ //# sourceMappingURL=complexity-classifier.js.map
@@ -168,6 +168,48 @@ export function gatherInsightSignals(gateway) {
168
168
  }
169
169
  }
170
170
  catch { /* non-fatal */ }
171
+ // 5. Broken jobs from the failure monitor. Any currently-flagged job
172
+ // with a diagnosis is a real, actionable signal the owner should
173
+ // see proactively rather than stumble across in the dashboard.
174
+ try {
175
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
176
+ const fm = require('../gateway/failure-monitor.js');
177
+ const broken = fm.computeBrokenJobs();
178
+ for (const b of broken.slice(0, 3)) {
179
+ const hint = b.diagnosis?.rootCause
180
+ ? ` — ${b.diagnosis.rootCause.slice(0, 120)}`
181
+ : '';
182
+ signals.push(`Broken cron job "${b.jobName}": ${b.errorCount48h}/${b.totalRuns48h} failures${hint}`);
183
+ if (b.diagnosis?.proposedFix?.autoApply) {
184
+ signals.push(`One-click fix available for "${b.jobName}" — ${b.diagnosis.proposedFix.details.slice(0, 100)}`);
185
+ }
186
+ }
187
+ }
188
+ catch { /* failure-monitor may not be loadable; fine */ }
189
+ // 6. Claim tracker — failed claims in the last N hours erode trust.
190
+ // Surface them so the owner sees "Clementine said she'd do X; she
191
+ // didn't" instead of silently swallowing the miss.
192
+ try {
193
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
194
+ const { MemoryStore } = require('../memory/store.js');
195
+ const { MEMORY_DB_PATH, VAULT_DIR } = require('../config.js');
196
+ if (existsSync(MEMORY_DB_PATH)) {
197
+ const store = new MemoryStore(MEMORY_DB_PATH, VAULT_DIR);
198
+ store.initialize();
199
+ const db = store.conn;
200
+ try {
201
+ const rows = db.prepare(`SELECT subject, claim_type FROM claims
202
+ WHERE status = 'failed' AND verified_at >= datetime('now', '-6 hours')
203
+ ORDER BY verified_at DESC LIMIT 3`).all();
204
+ for (const r of rows) {
205
+ signals.push(`Failed claim: "${r.subject}" (${r.claim_type}) — I promised and didn't deliver`);
206
+ }
207
+ }
208
+ catch { /* table may not exist */ }
209
+ store.close();
210
+ }
211
+ }
212
+ catch { /* non-fatal */ }
171
213
  return signals;
172
214
  }
173
215
  /**
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Clementine TypeScript — Team-routing classifier.
3
+ *
4
+ * Decides whether a user message addressed to Clementine should be
5
+ * delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
6
+ * by Clementine herself.
7
+ *
8
+ * CRITICAL safety rail: this classifier is ONLY invoked when the user
9
+ * is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
10
+ * agent-scoped channels) bypass routing entirely — the session-key
11
+ * ownership check in gateway/router.ts enforces this before calling
12
+ * classifyRoute. Routing never crosses the boundary between different
13
+ * agent bots.
14
+ *
15
+ * Returns structured decision: {targetAgent, confidence, reasoning}.
16
+ * Caller decides what to do with confidence (auto-delegate, soft-suggest,
17
+ * or stay with Clementine).
18
+ */
19
+ import type { AgentProfile } from '../types.js';
20
+ import type { Gateway } from '../gateway/router.js';
21
+ export interface RouteDecision {
22
+ targetAgent: string;
23
+ confidence: number;
24
+ reasoning: string;
25
+ }
26
+ /**
27
+ * Session keys eligible for routing. Any key NOT in this set is
28
+ * considered agent-scoped or system-scoped and never routes.
29
+ *
30
+ * - `discord:user:{ownerId}` — main bot DM with owner
31
+ * - `discord:channel:{channelId}:{ownerId}` — owner's main channel
32
+ * (where Clementine's main bot is posted, without an agent slug
33
+ * embedded in the key)
34
+ * - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
35
+ * - `dashboard:*` — web dashboard chat
36
+ * - `cli:*` — local CLI chat
37
+ *
38
+ * Rejected prefixes (routing NEVER fires):
39
+ * - `discord:agent:{slug}:*` — direct-to-agent DM
40
+ * - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
41
+ * - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
42
+ * embedded (5-part form, where position 3 is an agent slug)
43
+ * - `slack:agent:*`, `slack:channel:*:{slug}:*`
44
+ * - `team:*` — inter-agent messages travel via team-bus, never route
45
+ */
46
+ export declare function isRoutable(sessionKey: string, ownerAgentSlugs: Set<string>): boolean;
47
+ /**
48
+ * Classify a user message. Returns null if the call fails — caller
49
+ * should fall back to Clementine handling.
50
+ */
51
+ export declare function classifyRoute(userMessage: string, agents: AgentProfile[], gateway: Gateway): Promise<RouteDecision | null>;
52
+ //# sourceMappingURL=route-classifier.d.ts.map
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Clementine TypeScript — Team-routing classifier.
3
+ *
4
+ * Decides whether a user message addressed to Clementine should be
5
+ * delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
6
+ * by Clementine herself.
7
+ *
8
+ * CRITICAL safety rail: this classifier is ONLY invoked when the user
9
+ * is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
10
+ * agent-scoped channels) bypass routing entirely — the session-key
11
+ * ownership check in gateway/router.ts enforces this before calling
12
+ * classifyRoute. Routing never crosses the boundary between different
13
+ * agent bots.
14
+ *
15
+ * Returns structured decision: {targetAgent, confidence, reasoning}.
16
+ * Caller decides what to do with confidence (auto-delegate, soft-suggest,
17
+ * or stay with Clementine).
18
+ */
19
+ import pino from 'pino';
20
+ const logger = pino({ name: 'clementine.route-classifier' });
21
+ /**
22
+ * Session keys eligible for routing. Any key NOT in this set is
23
+ * considered agent-scoped or system-scoped and never routes.
24
+ *
25
+ * - `discord:user:{ownerId}` — main bot DM with owner
26
+ * - `discord:channel:{channelId}:{ownerId}` — owner's main channel
27
+ * (where Clementine's main bot is posted, without an agent slug
28
+ * embedded in the key)
29
+ * - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
30
+ * - `dashboard:*` — web dashboard chat
31
+ * - `cli:*` — local CLI chat
32
+ *
33
+ * Rejected prefixes (routing NEVER fires):
34
+ * - `discord:agent:{slug}:*` — direct-to-agent DM
35
+ * - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
36
+ * - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
37
+ * embedded (5-part form, where position 3 is an agent slug)
38
+ * - `slack:agent:*`, `slack:channel:*:{slug}:*`
39
+ * - `team:*` — inter-agent messages travel via team-bus, never route
40
+ */
41
+ export function isRoutable(sessionKey, ownerAgentSlugs) {
42
+ if (!sessionKey)
43
+ return false;
44
+ const parts = sessionKey.split(':');
45
+ // Agent-bot DMs and member sessions are always agent-scoped
46
+ if (parts[0] === 'discord') {
47
+ const kind = parts[1];
48
+ if (kind === 'agent' || kind === 'member' || kind === 'member-dm')
49
+ return false;
50
+ // 5-part discord:channel:{channelId}:{slug}:{userId} means agent in team chat
51
+ if (kind === 'channel' && parts.length >= 5 && ownerAgentSlugs.has(parts[3] ?? '')) {
52
+ return false;
53
+ }
54
+ // discord:user:* and the 4-part discord:channel:{channelId}:{userId} pass
55
+ return kind === 'user' || kind === 'channel';
56
+ }
57
+ if (parts[0] === 'slack') {
58
+ const kind = parts[1];
59
+ if (kind === 'agent')
60
+ return false;
61
+ // slack:channel:{channelId}:{slug}:{userId} — agent-scoped
62
+ if (kind === 'channel' && parts.length >= 5 && ownerAgentSlugs.has(parts[3] ?? '')) {
63
+ return false;
64
+ }
65
+ return kind === 'user' || kind === 'dm' || kind === 'channel';
66
+ }
67
+ if (parts[0] === 'telegram')
68
+ return parts[1] === 'user' || /^\d+$/.test(parts[1] ?? '');
69
+ if (parts[0] === 'dashboard')
70
+ return true;
71
+ if (parts[0] === 'cli')
72
+ return true;
73
+ // Anything else (team:*, cron:*, heartbeat-triggered, etc.) — no routing
74
+ return false;
75
+ }
76
+ /** Build the agent roster string for the classifier prompt. */
77
+ function formatAgentRoster(agents) {
78
+ const lines = [];
79
+ // Clementine is always an option — the "stay with me" target
80
+ lines.push('- **clementine**: generalist assistant, calendar/inbox/planning, meta questions, small talk, anything not clearly a specialist task');
81
+ for (const a of agents) {
82
+ if (a.slug === 'clementine')
83
+ continue;
84
+ // Use name + description; truncate to keep the prompt tight
85
+ const desc = (a.description ?? '').slice(0, 200).replace(/\s+/g, ' ').trim();
86
+ lines.push(`- **${a.slug}** (${a.name}): ${desc}`);
87
+ }
88
+ return lines.join('\n');
89
+ }
90
+ function buildPrompt(userMessage, agents) {
91
+ return [
92
+ 'You are Clementine\'s team dispatcher. Decide which team member should handle an incoming user message.',
93
+ '',
94
+ '## The team:',
95
+ formatAgentRoster(agents),
96
+ '',
97
+ '## The message:',
98
+ userMessage.slice(0, 1500),
99
+ '',
100
+ '## Decision rules',
101
+ '',
102
+ '- Default to **clementine** (the generalist) unless the request clearly matches a specialist agent\'s domain.',
103
+ '- Match on DOMAIN, not keywords. "Help me think about our outbound strategy" is strategic → Clementine. "Send a follow-up to Aaron about the Scorpion audit" is operational outbound → the SDR agent.',
104
+ '- If the user explicitly names an agent ("have Ross do X"), pick that agent at confidence 1.0.',
105
+ '- If the request is meta ("what agents do I have", "how did Ross do this week") → clementine.',
106
+ '- Small talk, greetings, casual chat → clementine.',
107
+ '- Ambiguous or multi-domain requests → clementine with lower confidence (she can delegate herself).',
108
+ '',
109
+ '## Confidence scale',
110
+ '- 0.9-1.0: Explicit address of a specific agent, or a textbook specialist task (e.g., "send a follow-up" → SDR)',
111
+ '- 0.7-0.9: Clear specialist domain but implicit (e.g., "draft a LinkedIn message" → SDR, "write a content brief" → CMO agent)',
112
+ '- 0.4-0.7: Plausibly specialist but could go to Clementine',
113
+ '- <0.4: Generalist task or ambiguous — clementine',
114
+ '',
115
+ '## Output schema (JSON only, no fences):',
116
+ '{',
117
+ ' "targetAgent": "slug (use \\"clementine\\" if no specialist match)",',
118
+ ' "confidence": 0.0-1.0,',
119
+ ' "reasoning": "one short sentence — what signal drove the choice"',
120
+ '}',
121
+ ].join('\n');
122
+ }
123
+ function parseResponse(raw) {
124
+ try {
125
+ const match = raw.match(/\{[\s\S]*\}/);
126
+ if (!match)
127
+ return null;
128
+ const parsed = JSON.parse(match[0]);
129
+ if (typeof parsed.targetAgent !== 'string')
130
+ return null;
131
+ const confidence = typeof parsed.confidence === 'number'
132
+ ? Math.max(0, Math.min(1, parsed.confidence))
133
+ : 0;
134
+ return {
135
+ targetAgent: parsed.targetAgent.trim().toLowerCase(),
136
+ confidence,
137
+ reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning.slice(0, 200) : '',
138
+ };
139
+ }
140
+ catch {
141
+ return null;
142
+ }
143
+ }
144
+ /**
145
+ * Classify a user message. Returns null if the call fails — caller
146
+ * should fall back to Clementine handling.
147
+ */
148
+ export async function classifyRoute(userMessage, agents, gateway) {
149
+ // Only classify when there's at least one non-clementine agent available.
150
+ const specialists = agents.filter(a => a.slug !== 'clementine');
151
+ if (specialists.length === 0)
152
+ return null;
153
+ // Fast path: explicit slug mention anywhere in the message.
154
+ for (const a of specialists) {
155
+ const nameLower = a.name.toLowerCase();
156
+ const firstName = nameLower.split(/\s+/)[0];
157
+ // Only match on reasonable word boundaries; skip one-letter firsts
158
+ if (firstName.length < 3)
159
+ continue;
160
+ const wordRe = new RegExp(`\\b(${firstName}|${a.slug})\\b`, 'i');
161
+ if (wordRe.test(userMessage)) {
162
+ logger.debug({ slug: a.slug, trigger: 'explicit-mention' }, 'Fast-path routing decision');
163
+ return {
164
+ targetAgent: a.slug,
165
+ confidence: 1.0,
166
+ reasoning: `User explicitly addressed ${a.name} by name.`,
167
+ };
168
+ }
169
+ }
170
+ // LLM classifier for everything else.
171
+ const prompt = buildPrompt(userMessage, agents);
172
+ let raw;
173
+ try {
174
+ raw = await gateway.handleCronJob('route-classify', prompt, 1, // tier 1
175
+ 3, // maxTurns — classifier doesn't need tools
176
+ 'haiku');
177
+ }
178
+ catch (err) {
179
+ logger.warn({ err }, 'Route classifier call failed');
180
+ return null;
181
+ }
182
+ const decision = parseResponse(raw);
183
+ if (!decision) {
184
+ logger.warn({ rawHead: raw.slice(0, 200) }, 'Route classifier returned unparseable response');
185
+ return null;
186
+ }
187
+ // Validate target exists in the roster; if not, treat as Clementine.
188
+ const allSlugs = new Set(agents.map(a => a.slug));
189
+ allSlugs.add('clementine');
190
+ if (!allSlugs.has(decision.targetAgent)) {
191
+ logger.warn({ decision }, 'Classifier returned unknown agent — treating as clementine');
192
+ decision.targetAgent = 'clementine';
193
+ decision.confidence = Math.min(decision.confidence, 0.3);
194
+ }
195
+ return decision;
196
+ }
197
+ //# sourceMappingURL=route-classifier.js.map
@@ -611,63 +611,108 @@ export class SelfImproveLoop {
611
611
  async hypothesize(metrics, history) {
612
612
  // Read targeted triggers (written by cron scheduler when jobs fail repeatedly)
613
613
  let targetedTriggers = '';
614
+ const triggerBullets = [];
615
+ // Source 1: explicit triggers written by the cron scheduler at 3+
616
+ // consecutive errors (legacy path — we still honor and drain).
614
617
  const triggersDir = path.join(SELF_IMPROVE_DIR, 'triggers');
615
618
  if (existsSync(triggersDir)) {
616
619
  const triggerFiles = readdirSync(triggersDir).filter(f => f.endsWith('.json'));
617
- if (triggerFiles.length > 0) {
618
- const triggers = triggerFiles.slice(0, 3).map(f => {
619
- try {
620
- const t = JSON.parse(readFileSync(path.join(triggersDir, f), 'utf-8'));
621
- // Clean up trigger after reading
622
- unlinkSync(path.join(triggersDir, f));
623
- return t;
624
- }
625
- catch {
626
- return null;
627
- }
628
- }).filter(Boolean);
629
- if (triggers.length > 0) {
630
- targetedTriggers = `\n\n## PRIORITY: Failing Jobs Needing Attention\n` +
631
- `These jobs have been failing repeatedly and need prompt/config fixes:\n` +
632
- triggers.map((t) => `- **${t.jobName}**: ${t.consecutiveErrors} consecutive errors. Recent: ${(t.recentErrors ?? []).join('; ')}`).join('\n') +
633
- `\n\nFocus your improvement hypothesis on fixing these jobs first.\n`;
620
+ const triggers = triggerFiles.slice(0, 3).map(f => {
621
+ try {
622
+ const t = JSON.parse(readFileSync(path.join(triggersDir, f), 'utf-8'));
623
+ unlinkSync(path.join(triggersDir, f));
624
+ return t;
625
+ }
626
+ catch {
627
+ return null;
634
628
  }
629
+ }).filter(Boolean);
630
+ for (const t of triggers) {
631
+ triggerBullets.push(`- **${t.jobName}**: ${t.consecutiveErrors} consecutive errors. Recent: ${(t.recentErrors ?? []).join('; ')}`);
635
632
  }
636
633
  }
634
+ // Source 2: broken-jobs from the failure monitor. These are jobs the
635
+ // user hasn't applied a fix for yet — real, current gaps the hypothesizer
636
+ // should target. Complements the diversity constraint: even if the area
637
+ // has been over-targeted historically, a specific broken job is a fresh
638
+ // concrete signal.
639
+ try {
640
+ const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
641
+ const broken = computeBrokenJobs();
642
+ for (const b of broken.slice(0, 3)) {
643
+ const diagHint = b.diagnosis
644
+ ? ` Diagnosis: ${b.diagnosis.rootCause.slice(0, 120)}`
645
+ : '';
646
+ triggerBullets.push(`- **${b.jobName}**: ${b.errorCount48h}/${b.totalRuns48h} failed in 48h${b.circuitBreakerEngagedAt ? ' (breaker engaged)' : ''}.${diagHint}`);
647
+ }
648
+ }
649
+ catch { /* failure-monitor module optional */ }
650
+ if (triggerBullets.length > 0) {
651
+ targetedTriggers = `\n\n## PRIORITY: Failing Jobs Needing Attention\n` +
652
+ `These jobs have been failing recently and need prompt/config fixes:\n` +
653
+ triggerBullets.join('\n') +
654
+ `\n\nFocus your improvement hypothesis on fixing these jobs first.\n`;
655
+ }
637
656
  // Format experiment history for the prompt
638
657
  const historyText = history.slice(-20).map(e => `#${e.iteration} | ${e.area} | "${e.hypothesis.slice(0, 60)}" | ${(e.score * 10).toFixed(1)}/10 ${e.accepted ? '✅' : '❌'}`).join('\n') || '(no prior experiments)';
639
- // Enforce diversity: count recent proposals per area:target AND per area
658
+ // Enforce diversity: count recent proposals per area:target AND per area.
659
+ // A pair is only "over-targeted" if its MOST RECENT attempt was within
660
+ // the last 30 days — otherwise it's fair game to retry with fresh data.
661
+ // Stops the saturation state where after ~60 experiments the loop has
662
+ // blocked every area:target pair permanently and produces no new
663
+ // hypotheses (the Apr 11-19 plateau).
664
+ const DIVERSITY_WINDOW_MS = 30 * 24 * 60 * 60 * 1000;
665
+ const diversityCutoff = Date.now() - DIVERSITY_WINDOW_MS;
640
666
  const recentTargets = new Map();
641
667
  const recentAreas = new Map();
642
- for (const e of history.slice(-10)) {
668
+ for (const e of history.slice(-50)) {
643
669
  const key = `${e.area}:${e.target}`;
644
- recentTargets.set(key, (recentTargets.get(key) ?? 0) + 1);
645
- recentAreas.set(e.area, (recentAreas.get(e.area) ?? 0) + 1);
670
+ const ts = Date.parse(e.startedAt);
671
+ const tsMs = Number.isFinite(ts) ? ts : 0;
672
+ const cur = recentTargets.get(key);
673
+ recentTargets.set(key, {
674
+ count: (cur?.count ?? 0) + 1,
675
+ newestMs: Math.max(cur?.newestMs ?? 0, tsMs),
676
+ });
677
+ const curA = recentAreas.get(e.area);
678
+ recentAreas.set(e.area, {
679
+ count: (curA?.count ?? 0) + 1,
680
+ newestMs: Math.max(curA?.newestMs ?? 0, tsMs),
681
+ });
646
682
  }
647
683
  for (const p of this.getPendingChanges()) {
648
684
  const key = `${p.area}:${p.target}`;
649
- recentTargets.set(key, (recentTargets.get(key) ?? 0) + 1);
650
- recentAreas.set(p.area, (recentAreas.get(p.area) ?? 0) + 1);
685
+ const now = Date.now();
686
+ const cur = recentTargets.get(key);
687
+ recentTargets.set(key, {
688
+ count: (cur?.count ?? 0) + 1,
689
+ newestMs: Math.max(cur?.newestMs ?? 0, now),
690
+ });
691
+ const curA = recentAreas.get(p.area);
692
+ recentAreas.set(p.area, {
693
+ count: (curA?.count ?? 0) + 1,
694
+ newestMs: Math.max(curA?.newestMs ?? 0, now),
695
+ });
651
696
  }
652
- // Block area:target pairs with >= 2 recent proposals
697
+ // Block only when both (a) count is high enough AND (b) the last attempt
698
+ // was within the diversity window.
653
699
  const overTargeted = [...recentTargets.entries()]
654
- .filter(([, count]) => count >= 2)
700
+ .filter(([, v]) => v.count >= 2 && v.newestMs > diversityCutoff)
655
701
  .map(([key]) => key);
656
- // Block entire areas with >= 3 recent proposals
657
702
  const overTargetedAreas = [...recentAreas.entries()]
658
- .filter(([, count]) => count >= 3)
703
+ .filter(([, v]) => v.count >= 3 && v.newestMs > diversityCutoff)
659
704
  .map(([area]) => area);
660
705
  // Build area coverage stats to nudge the LLM toward unexplored areas
661
706
  const allAreas = this.config.areas;
662
707
  const areaCoverage = allAreas.map(area => {
663
- const count = recentAreas.get(area) ?? 0;
708
+ const count = recentAreas.get(area)?.count ?? 0;
664
709
  return `- ${area}: ${count} recent proposals`;
665
710
  }).join('\n');
666
711
  const diversityConstraint = `\n\n## AREA COVERAGE (target under-explored areas)\n${areaCoverage}\n` +
667
712
  (overTargeted.length > 0 || overTargetedAreas.length > 0
668
713
  ? `\n## DIVERSITY CONSTRAINT\n` +
669
714
  (overTargetedAreas.length > 0
670
- ? `These AREAS have been over-targeted and MUST NOT be chosen:\n${overTargetedAreas.map(a => `- ${a} (${recentAreas.get(a)} proposals)`).join('\n')}\n`
715
+ ? `These AREAS have been over-targeted and MUST NOT be chosen:\n${overTargetedAreas.map(a => `- ${a} (${recentAreas.get(a)?.count ?? 0} proposals)`).join('\n')}\n`
671
716
  : '') +
672
717
  (overTargeted.length > 0
673
718
  ? `These specific targets MUST NOT be re-targeted:\n${overTargeted.map(t => `- ${t}`).join('\n')}\n`