clementine-agent 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/complexity-classifier.d.ts +30 -0
- package/dist/agent/complexity-classifier.js +153 -0
- package/dist/agent/insight-engine.js +42 -0
- package/dist/agent/route-classifier.d.ts +52 -0
- package/dist/agent/route-classifier.js +197 -0
- package/dist/agent/self-improve.js +74 -29
- package/dist/cli/dashboard.js +56 -1
- package/dist/gateway/claim-tracker.d.ts +8 -0
- package/dist/gateway/claim-tracker.js +145 -1
- package/dist/gateway/failure-monitor.js +108 -5
- package/dist/gateway/heartbeat-scheduler.js +16 -3
- package/dist/gateway/outcome-grader.d.ts +41 -0
- package/dist/gateway/outcome-grader.js +173 -0
- package/dist/gateway/router.d.ts +19 -0
- package/dist/gateway/router.js +144 -2
- package/dist/memory/store.js +11 -0
- package/package.json +1 -1
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clementine TypeScript — Lightweight complexity classifier.
|
|
3
|
+
*
|
|
4
|
+
* Deterministic regex + length heuristics that decide whether a user
|
|
5
|
+
* message is "complex" enough to warrant planning-before-acting. No
|
|
6
|
+
* LLM call — gate is cheap enough to run on every message.
|
|
7
|
+
*
|
|
8
|
+
* When complex, the gateway injects a "plan-first" system-prompt
|
|
9
|
+
* directive so the agent proposes a numbered plan and waits for
|
|
10
|
+
* confirmation before diving in. Not perfect — the LLM still decides
|
|
11
|
+
* what "plan" means — but much more consistent than a generic
|
|
12
|
+
* SOUL.md directive that the model ignores half the time.
|
|
13
|
+
*/
|
|
14
|
+
export interface ComplexityVerdict {
|
|
15
|
+
complex: boolean;
|
|
16
|
+
reason: string;
|
|
17
|
+
signals: string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Classify complexity. Pure function — no LLM, no I/O.
|
|
21
|
+
*/
|
|
22
|
+
export declare function classifyComplexity(text: string): ComplexityVerdict;
|
|
23
|
+
/**
|
|
24
|
+
* Build a system-prompt directive to inject when a complex message is
|
|
25
|
+
* detected. Prepended to Clementine's normal system prompt for this
|
|
26
|
+
* single query only. Short + declarative — meta-instructions are
|
|
27
|
+
* easier for the model to follow when they're terse.
|
|
28
|
+
*/
|
|
29
|
+
export declare function planFirstDirective(): string;
|
|
30
|
+
//# sourceMappingURL=complexity-classifier.d.ts.map
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clementine TypeScript — Lightweight complexity classifier.
|
|
3
|
+
*
|
|
4
|
+
* Deterministic regex + length heuristics that decide whether a user
|
|
5
|
+
* message is "complex" enough to warrant planning-before-acting. No
|
|
6
|
+
* LLM call — gate is cheap enough to run on every message.
|
|
7
|
+
*
|
|
8
|
+
* When complex, the gateway injects a "plan-first" system-prompt
|
|
9
|
+
* directive so the agent proposes a numbered plan and waits for
|
|
10
|
+
* confirmation before diving in. Not perfect — the LLM still decides
|
|
11
|
+
* what "plan" means — but much more consistent than a generic
|
|
12
|
+
* SOUL.md directive that the model ignores half the time.
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Action verbs that signal the user is asking Clementine to DO things
|
|
16
|
+
* (as opposed to asking questions or making small talk). Multiple
|
|
17
|
+
* action verbs in one message is a strong complexity signal.
|
|
18
|
+
*/
|
|
19
|
+
const ACTION_VERBS = [
|
|
20
|
+
'send', 'create', 'run', 'schedule', 'update', 'delete', 'add', 'remove',
|
|
21
|
+
'draft', 'write', 'post', 'publish', 'deploy', 'build', 'edit', 'move',
|
|
22
|
+
'rename', 'archive', 'restore', 'assign', 'delegate', 'email', 'message',
|
|
23
|
+
'invite', 'book', 'cancel', 'notify', 'alert', 'set up', 'tear down',
|
|
24
|
+
'process', 'review', 'approve', 'reject',
|
|
25
|
+
'extract', 'fetch', 'pull', 'gather', 'compile', 'summarize', 'analyze',
|
|
26
|
+
'generate', 'produce', 'export', 'import', 'upload', 'download', 'sync',
|
|
27
|
+
];
|
|
28
|
+
/**
|
|
29
|
+
* Chain markers — "do X and then Y" explicitly encode a multi-step
|
|
30
|
+
* task. A single occurrence in a DO-type message is a clear signal.
|
|
31
|
+
*/
|
|
32
|
+
const CHAIN_MARKERS = [
|
|
33
|
+
/\band\s+then\b/i,
|
|
34
|
+
/,\s+then\b/i, // "X, then Y"
|
|
35
|
+
/\bfirst\b[\s\S]{0,80}\bthen\b/i,
|
|
36
|
+
/\bafter\s+(that|which)\b/i,
|
|
37
|
+
/\bonce\s+(that|you)\b.*,/i,
|
|
38
|
+
/\bnext\b.*,/i,
|
|
39
|
+
];
|
|
40
|
+
/**
|
|
41
|
+
* Phrasings that explicitly ask for plan-first behavior. Triggers
|
|
42
|
+
* regardless of other heuristics.
|
|
43
|
+
*/
|
|
44
|
+
const EXPLICIT_PLAN_ASKS = [
|
|
45
|
+
/\bpropose\s+a\s+plan\b/i,
|
|
46
|
+
/\bwhat\s+(would|'d)\s+be\s+your\s+approach\b/i,
|
|
47
|
+
/\bplan\s+(this|it)\s+out\b/i,
|
|
48
|
+
/\blay\s+out\s+(a|the)\s+plan\b/i,
|
|
49
|
+
/\bwalk\s+me\s+through\s+(what|how)\b/i,
|
|
50
|
+
];
|
|
51
|
+
function countActionVerbs(text) {
|
|
52
|
+
const lower = text.toLowerCase();
|
|
53
|
+
let count = 0;
|
|
54
|
+
for (const v of ACTION_VERBS) {
|
|
55
|
+
const re = new RegExp(`\\b${v.replace(/\s+/g, '\\s+')}\\b`, 'g');
|
|
56
|
+
const matches = lower.match(re);
|
|
57
|
+
if (matches)
|
|
58
|
+
count += matches.length;
|
|
59
|
+
}
|
|
60
|
+
return count;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Rough entity count — quoted strings, @mentions, and capitalized
|
|
64
|
+
* multi-word phrases that look like proper nouns. Not perfect;
|
|
65
|
+
* designed to catch cases like "email John, Sarah, and Mike".
|
|
66
|
+
*/
|
|
67
|
+
function countEntities(text) {
|
|
68
|
+
let count = 0;
|
|
69
|
+
// Quoted strings
|
|
70
|
+
count += (text.match(/"[^"]{2,60}"/g) ?? []).length;
|
|
71
|
+
count += (text.match(/'[^']{2,60}'/g) ?? []).length;
|
|
72
|
+
// @mentions
|
|
73
|
+
count += (text.match(/@\w+/g) ?? []).length;
|
|
74
|
+
// Comma-separated name lists (e.g., "John, Sarah, and Mike")
|
|
75
|
+
const listMatch = text.match(/(?:[A-Z][a-z]{1,20},\s+){2,}(?:and\s+)?[A-Z][a-z]{1,20}/);
|
|
76
|
+
if (listMatch)
|
|
77
|
+
count += 3;
|
|
78
|
+
return count;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Classify complexity. Pure function — no LLM, no I/O.
|
|
82
|
+
*/
|
|
83
|
+
export function classifyComplexity(text) {
|
|
84
|
+
if (!text || typeof text !== 'string')
|
|
85
|
+
return { complex: false, reason: 'empty', signals: [] };
|
|
86
|
+
const trimmed = text.trim();
|
|
87
|
+
// Skip commands and very short messages
|
|
88
|
+
if (trimmed.length < 30)
|
|
89
|
+
return { complex: false, reason: 'too short', signals: [] };
|
|
90
|
+
if (trimmed.startsWith('!') || trimmed.startsWith('/'))
|
|
91
|
+
return { complex: false, reason: 'command', signals: [] };
|
|
92
|
+
const signals = [];
|
|
93
|
+
// Signal 1: explicit ask for plan-first
|
|
94
|
+
for (const re of EXPLICIT_PLAN_ASKS) {
|
|
95
|
+
if (re.test(trimmed)) {
|
|
96
|
+
return { complex: true, reason: 'user explicitly asked for a plan', signals: ['explicit-plan-ask'] };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Signal 2: multiple action verbs
|
|
100
|
+
const verbs = countActionVerbs(trimmed);
|
|
101
|
+
if (verbs >= 3)
|
|
102
|
+
signals.push(`${verbs} action verbs`);
|
|
103
|
+
// Signal 3: chain markers
|
|
104
|
+
for (const re of CHAIN_MARKERS) {
|
|
105
|
+
if (re.test(trimmed)) {
|
|
106
|
+
signals.push('chain marker');
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Signal 4: multiple entities
|
|
111
|
+
const entities = countEntities(trimmed);
|
|
112
|
+
if (entities >= 3)
|
|
113
|
+
signals.push(`${entities} entities`);
|
|
114
|
+
// Signal 5: long message with at least one action verb (big scope, not just a question)
|
|
115
|
+
if (trimmed.length > 400 && verbs >= 1)
|
|
116
|
+
signals.push('long + action');
|
|
117
|
+
// Gate: at least 2 signals fire, OR a single high-confidence signal
|
|
118
|
+
// (chain markers, explicit-plan-ask, or 3+ action verbs).
|
|
119
|
+
const highConfidenceSingles = [
|
|
120
|
+
verbs >= 3,
|
|
121
|
+
signals.includes('chain marker'),
|
|
122
|
+
];
|
|
123
|
+
if (highConfidenceSingles.some(Boolean)) {
|
|
124
|
+
return { complex: true, reason: 'strong single signal', signals };
|
|
125
|
+
}
|
|
126
|
+
if (signals.length >= 2) {
|
|
127
|
+
return { complex: true, reason: 'multiple signals', signals };
|
|
128
|
+
}
|
|
129
|
+
return { complex: false, reason: 'below threshold', signals };
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Build a system-prompt directive to inject when a complex message is
|
|
133
|
+
* detected. Prepended to Clementine's normal system prompt for this
|
|
134
|
+
* single query only. Short + declarative — meta-instructions are
|
|
135
|
+
* easier for the model to follow when they're terse.
|
|
136
|
+
*/
|
|
137
|
+
export function planFirstDirective() {
|
|
138
|
+
return [
|
|
139
|
+
'## PLAN BEFORE ACTING',
|
|
140
|
+
'',
|
|
141
|
+
'This request has multiple steps. Before doing any of them:',
|
|
142
|
+
'1. Write a numbered plan (3-7 steps, one line each).',
|
|
143
|
+
'2. Call out anything that needs my decision — which contact, which template, which timing.',
|
|
144
|
+
'3. End with: "Reply **go** to start, or tell me what to change."',
|
|
145
|
+
'4. STOP. Do NOT start executing the plan in this turn.',
|
|
146
|
+
'',
|
|
147
|
+
'When I reply "go" (or equivalent) in the next message, proceed with the plan.',
|
|
148
|
+
'If I edit the plan, revise and ask again.',
|
|
149
|
+
'',
|
|
150
|
+
'SKIP this protocol only if the request is actually a single step disguised as multiple (e.g., "send an email to Aaron about X and cc Sarah" is one email, not two).',
|
|
151
|
+
].join('\n');
|
|
152
|
+
}
|
|
153
|
+
//# sourceMappingURL=complexity-classifier.js.map
|
|
@@ -168,6 +168,48 @@ export function gatherInsightSignals(gateway) {
|
|
|
168
168
|
}
|
|
169
169
|
}
|
|
170
170
|
catch { /* non-fatal */ }
|
|
171
|
+
// 5. Broken jobs from the failure monitor. Any currently-flagged job
|
|
172
|
+
// with a diagnosis is a real, actionable signal the owner should
|
|
173
|
+
// see proactively rather than stumble across in the dashboard.
|
|
174
|
+
try {
|
|
175
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
176
|
+
const fm = require('../gateway/failure-monitor.js');
|
|
177
|
+
const broken = fm.computeBrokenJobs();
|
|
178
|
+
for (const b of broken.slice(0, 3)) {
|
|
179
|
+
const hint = b.diagnosis?.rootCause
|
|
180
|
+
? ` — ${b.diagnosis.rootCause.slice(0, 120)}`
|
|
181
|
+
: '';
|
|
182
|
+
signals.push(`Broken cron job "${b.jobName}": ${b.errorCount48h}/${b.totalRuns48h} failures${hint}`);
|
|
183
|
+
if (b.diagnosis?.proposedFix?.autoApply) {
|
|
184
|
+
signals.push(`One-click fix available for "${b.jobName}" — ${b.diagnosis.proposedFix.details.slice(0, 100)}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
catch { /* failure-monitor may not be loadable; fine */ }
|
|
189
|
+
// 6. Claim tracker — failed claims in the last N hours erode trust.
|
|
190
|
+
// Surface them so the owner sees "Clementine said she'd do X; she
|
|
191
|
+
// didn't" instead of silently swallowing the miss.
|
|
192
|
+
try {
|
|
193
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
194
|
+
const { MemoryStore } = require('../memory/store.js');
|
|
195
|
+
const { MEMORY_DB_PATH, VAULT_DIR } = require('../config.js');
|
|
196
|
+
if (existsSync(MEMORY_DB_PATH)) {
|
|
197
|
+
const store = new MemoryStore(MEMORY_DB_PATH, VAULT_DIR);
|
|
198
|
+
store.initialize();
|
|
199
|
+
const db = store.conn;
|
|
200
|
+
try {
|
|
201
|
+
const rows = db.prepare(`SELECT subject, claim_type FROM claims
|
|
202
|
+
WHERE status = 'failed' AND verified_at >= datetime('now', '-6 hours')
|
|
203
|
+
ORDER BY verified_at DESC LIMIT 3`).all();
|
|
204
|
+
for (const r of rows) {
|
|
205
|
+
signals.push(`Failed claim: "${r.subject}" (${r.claim_type}) — I promised and didn't deliver`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
catch { /* table may not exist */ }
|
|
209
|
+
store.close();
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
catch { /* non-fatal */ }
|
|
171
213
|
return signals;
|
|
172
214
|
}
|
|
173
215
|
/**
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clementine TypeScript — Team-routing classifier.
|
|
3
|
+
*
|
|
4
|
+
* Decides whether a user message addressed to Clementine should be
|
|
5
|
+
* delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
|
|
6
|
+
* by Clementine herself.
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL safety rail: this classifier is ONLY invoked when the user
|
|
9
|
+
* is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
|
|
10
|
+
* agent-scoped channels) bypass routing entirely — the session-key
|
|
11
|
+
* ownership check in gateway/router.ts enforces this before calling
|
|
12
|
+
* classifyRoute. Routing never crosses the boundary between different
|
|
13
|
+
* agent bots.
|
|
14
|
+
*
|
|
15
|
+
* Returns structured decision: {targetAgent, confidence, reasoning}.
|
|
16
|
+
* Caller decides what to do with confidence (auto-delegate, soft-suggest,
|
|
17
|
+
* or stay with Clementine).
|
|
18
|
+
*/
|
|
19
|
+
import type { AgentProfile } from '../types.js';
|
|
20
|
+
import type { Gateway } from '../gateway/router.js';
|
|
21
|
+
export interface RouteDecision {
|
|
22
|
+
targetAgent: string;
|
|
23
|
+
confidence: number;
|
|
24
|
+
reasoning: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Session keys eligible for routing. Any key NOT in this set is
|
|
28
|
+
* considered agent-scoped or system-scoped and never routes.
|
|
29
|
+
*
|
|
30
|
+
* - `discord:user:{ownerId}` — main bot DM with owner
|
|
31
|
+
* - `discord:channel:{channelId}:{ownerId}` — owner's main channel
|
|
32
|
+
* (where Clementine's main bot is posted, without an agent slug
|
|
33
|
+
* embedded in the key)
|
|
34
|
+
* - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
|
|
35
|
+
* - `dashboard:*` — web dashboard chat
|
|
36
|
+
* - `cli:*` — local CLI chat
|
|
37
|
+
*
|
|
38
|
+
* Rejected prefixes (routing NEVER fires):
|
|
39
|
+
* - `discord:agent:{slug}:*` — direct-to-agent DM
|
|
40
|
+
* - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
|
|
41
|
+
* - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
|
|
42
|
+
* embedded (5-part form, where position 3 is an agent slug)
|
|
43
|
+
* - `slack:agent:*`, `slack:channel:*:{slug}:*`
|
|
44
|
+
* - `team:*` — inter-agent messages travel via team-bus, never route
|
|
45
|
+
*/
|
|
46
|
+
export declare function isRoutable(sessionKey: string, ownerAgentSlugs: Set<string>): boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Classify a user message. Returns null if the call fails — caller
|
|
49
|
+
* should fall back to Clementine handling.
|
|
50
|
+
*/
|
|
51
|
+
export declare function classifyRoute(userMessage: string, agents: AgentProfile[], gateway: Gateway): Promise<RouteDecision | null>;
|
|
52
|
+
//# sourceMappingURL=route-classifier.d.ts.map
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clementine TypeScript — Team-routing classifier.
|
|
3
|
+
*
|
|
4
|
+
* Decides whether a user message addressed to Clementine should be
|
|
5
|
+
* delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
|
|
6
|
+
* by Clementine herself.
|
|
7
|
+
*
|
|
8
|
+
* CRITICAL safety rail: this classifier is ONLY invoked when the user
|
|
9
|
+
* is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
|
|
10
|
+
* agent-scoped channels) bypass routing entirely — the session-key
|
|
11
|
+
* ownership check in gateway/router.ts enforces this before calling
|
|
12
|
+
* classifyRoute. Routing never crosses the boundary between different
|
|
13
|
+
* agent bots.
|
|
14
|
+
*
|
|
15
|
+
* Returns structured decision: {targetAgent, confidence, reasoning}.
|
|
16
|
+
* Caller decides what to do with confidence (auto-delegate, soft-suggest,
|
|
17
|
+
* or stay with Clementine).
|
|
18
|
+
*/
|
|
19
|
+
import pino from 'pino';
|
|
20
|
+
const logger = pino({ name: 'clementine.route-classifier' });
|
|
21
|
+
/**
|
|
22
|
+
* Session keys eligible for routing. Any key NOT in this set is
|
|
23
|
+
* considered agent-scoped or system-scoped and never routes.
|
|
24
|
+
*
|
|
25
|
+
* - `discord:user:{ownerId}` — main bot DM with owner
|
|
26
|
+
* - `discord:channel:{channelId}:{ownerId}` — owner's main channel
|
|
27
|
+
* (where Clementine's main bot is posted, without an agent slug
|
|
28
|
+
* embedded in the key)
|
|
29
|
+
* - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
|
|
30
|
+
* - `dashboard:*` — web dashboard chat
|
|
31
|
+
* - `cli:*` — local CLI chat
|
|
32
|
+
*
|
|
33
|
+
* Rejected prefixes (routing NEVER fires):
|
|
34
|
+
* - `discord:agent:{slug}:*` — direct-to-agent DM
|
|
35
|
+
* - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
|
|
36
|
+
* - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
|
|
37
|
+
* embedded (5-part form, where position 3 is an agent slug)
|
|
38
|
+
* - `slack:agent:*`, `slack:channel:*:{slug}:*`
|
|
39
|
+
* - `team:*` — inter-agent messages travel via team-bus, never route
|
|
40
|
+
*/
|
|
41
|
+
export function isRoutable(sessionKey, ownerAgentSlugs) {
|
|
42
|
+
if (!sessionKey)
|
|
43
|
+
return false;
|
|
44
|
+
const parts = sessionKey.split(':');
|
|
45
|
+
// Agent-bot DMs and member sessions are always agent-scoped
|
|
46
|
+
if (parts[0] === 'discord') {
|
|
47
|
+
const kind = parts[1];
|
|
48
|
+
if (kind === 'agent' || kind === 'member' || kind === 'member-dm')
|
|
49
|
+
return false;
|
|
50
|
+
// 5-part discord:channel:{channelId}:{slug}:{userId} means agent in team chat
|
|
51
|
+
if (kind === 'channel' && parts.length >= 5 && ownerAgentSlugs.has(parts[3] ?? '')) {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
// discord:user:* and the 4-part discord:channel:{channelId}:{userId} pass
|
|
55
|
+
return kind === 'user' || kind === 'channel';
|
|
56
|
+
}
|
|
57
|
+
if (parts[0] === 'slack') {
|
|
58
|
+
const kind = parts[1];
|
|
59
|
+
if (kind === 'agent')
|
|
60
|
+
return false;
|
|
61
|
+
// slack:channel:{channelId}:{slug}:{userId} — agent-scoped
|
|
62
|
+
if (kind === 'channel' && parts.length >= 5 && ownerAgentSlugs.has(parts[3] ?? '')) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
return kind === 'user' || kind === 'dm' || kind === 'channel';
|
|
66
|
+
}
|
|
67
|
+
if (parts[0] === 'telegram')
|
|
68
|
+
return parts[1] === 'user' || /^\d+$/.test(parts[1] ?? '');
|
|
69
|
+
if (parts[0] === 'dashboard')
|
|
70
|
+
return true;
|
|
71
|
+
if (parts[0] === 'cli')
|
|
72
|
+
return true;
|
|
73
|
+
// Anything else (team:*, cron:*, heartbeat-triggered, etc.) — no routing
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
/** Build the agent roster string for the classifier prompt. */
|
|
77
|
+
function formatAgentRoster(agents) {
|
|
78
|
+
const lines = [];
|
|
79
|
+
// Clementine is always an option — the "stay with me" target
|
|
80
|
+
lines.push('- **clementine**: generalist assistant, calendar/inbox/planning, meta questions, small talk, anything not clearly a specialist task');
|
|
81
|
+
for (const a of agents) {
|
|
82
|
+
if (a.slug === 'clementine')
|
|
83
|
+
continue;
|
|
84
|
+
// Use name + description; truncate to keep the prompt tight
|
|
85
|
+
const desc = (a.description ?? '').slice(0, 200).replace(/\s+/g, ' ').trim();
|
|
86
|
+
lines.push(`- **${a.slug}** (${a.name}): ${desc}`);
|
|
87
|
+
}
|
|
88
|
+
return lines.join('\n');
|
|
89
|
+
}
|
|
90
|
+
function buildPrompt(userMessage, agents) {
|
|
91
|
+
return [
|
|
92
|
+
'You are Clementine\'s team dispatcher. Decide which team member should handle an incoming user message.',
|
|
93
|
+
'',
|
|
94
|
+
'## The team:',
|
|
95
|
+
formatAgentRoster(agents),
|
|
96
|
+
'',
|
|
97
|
+
'## The message:',
|
|
98
|
+
userMessage.slice(0, 1500),
|
|
99
|
+
'',
|
|
100
|
+
'## Decision rules',
|
|
101
|
+
'',
|
|
102
|
+
'- Default to **clementine** (the generalist) unless the request clearly matches a specialist agent\'s domain.',
|
|
103
|
+
'- Match on DOMAIN, not keywords. "Help me think about our outbound strategy" is strategic → Clementine. "Send a follow-up to Aaron about the Scorpion audit" is operational outbound → the SDR agent.',
|
|
104
|
+
'- If the user explicitly names an agent ("have Ross do X"), pick that agent at confidence 1.0.',
|
|
105
|
+
'- If the request is meta ("what agents do I have", "how did Ross do this week") → clementine.',
|
|
106
|
+
'- Small talk, greetings, casual chat → clementine.',
|
|
107
|
+
'- Ambiguous or multi-domain requests → clementine with lower confidence (she can delegate herself).',
|
|
108
|
+
'',
|
|
109
|
+
'## Confidence scale',
|
|
110
|
+
'- 0.9-1.0: Explicit address of a specific agent, or a textbook specialist task (e.g., "send a follow-up" → SDR)',
|
|
111
|
+
'- 0.7-0.9: Clear specialist domain but implicit (e.g., "draft a LinkedIn message" → SDR, "write a content brief" → CMO agent)',
|
|
112
|
+
'- 0.4-0.7: Plausibly specialist but could go to Clementine',
|
|
113
|
+
'- <0.4: Generalist task or ambiguous — clementine',
|
|
114
|
+
'',
|
|
115
|
+
'## Output schema (JSON only, no fences):',
|
|
116
|
+
'{',
|
|
117
|
+
' "targetAgent": "slug (use \\"clementine\\" if no specialist match)",',
|
|
118
|
+
' "confidence": 0.0-1.0,',
|
|
119
|
+
' "reasoning": "one short sentence — what signal drove the choice"',
|
|
120
|
+
'}',
|
|
121
|
+
].join('\n');
|
|
122
|
+
}
|
|
123
|
+
function parseResponse(raw) {
|
|
124
|
+
try {
|
|
125
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
126
|
+
if (!match)
|
|
127
|
+
return null;
|
|
128
|
+
const parsed = JSON.parse(match[0]);
|
|
129
|
+
if (typeof parsed.targetAgent !== 'string')
|
|
130
|
+
return null;
|
|
131
|
+
const confidence = typeof parsed.confidence === 'number'
|
|
132
|
+
? Math.max(0, Math.min(1, parsed.confidence))
|
|
133
|
+
: 0;
|
|
134
|
+
return {
|
|
135
|
+
targetAgent: parsed.targetAgent.trim().toLowerCase(),
|
|
136
|
+
confidence,
|
|
137
|
+
reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning.slice(0, 200) : '',
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Classify a user message. Returns null if the call fails — caller
|
|
146
|
+
* should fall back to Clementine handling.
|
|
147
|
+
*/
|
|
148
|
+
export async function classifyRoute(userMessage, agents, gateway) {
|
|
149
|
+
// Only classify when there's at least one non-clementine agent available.
|
|
150
|
+
const specialists = agents.filter(a => a.slug !== 'clementine');
|
|
151
|
+
if (specialists.length === 0)
|
|
152
|
+
return null;
|
|
153
|
+
// Fast path: explicit slug mention anywhere in the message.
|
|
154
|
+
for (const a of specialists) {
|
|
155
|
+
const nameLower = a.name.toLowerCase();
|
|
156
|
+
const firstName = nameLower.split(/\s+/)[0];
|
|
157
|
+
// Only match on reasonable word boundaries; skip one-letter firsts
|
|
158
|
+
if (firstName.length < 3)
|
|
159
|
+
continue;
|
|
160
|
+
const wordRe = new RegExp(`\\b(${firstName}|${a.slug})\\b`, 'i');
|
|
161
|
+
if (wordRe.test(userMessage)) {
|
|
162
|
+
logger.debug({ slug: a.slug, trigger: 'explicit-mention' }, 'Fast-path routing decision');
|
|
163
|
+
return {
|
|
164
|
+
targetAgent: a.slug,
|
|
165
|
+
confidence: 1.0,
|
|
166
|
+
reasoning: `User explicitly addressed ${a.name} by name.`,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// LLM classifier for everything else.
|
|
171
|
+
const prompt = buildPrompt(userMessage, agents);
|
|
172
|
+
let raw;
|
|
173
|
+
try {
|
|
174
|
+
raw = await gateway.handleCronJob('route-classify', prompt, 1, // tier 1
|
|
175
|
+
3, // maxTurns — classifier doesn't need tools
|
|
176
|
+
'haiku');
|
|
177
|
+
}
|
|
178
|
+
catch (err) {
|
|
179
|
+
logger.warn({ err }, 'Route classifier call failed');
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
const decision = parseResponse(raw);
|
|
183
|
+
if (!decision) {
|
|
184
|
+
logger.warn({ rawHead: raw.slice(0, 200) }, 'Route classifier returned unparseable response');
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
// Validate target exists in the roster; if not, treat as Clementine.
|
|
188
|
+
const allSlugs = new Set(agents.map(a => a.slug));
|
|
189
|
+
allSlugs.add('clementine');
|
|
190
|
+
if (!allSlugs.has(decision.targetAgent)) {
|
|
191
|
+
logger.warn({ decision }, 'Classifier returned unknown agent — treating as clementine');
|
|
192
|
+
decision.targetAgent = 'clementine';
|
|
193
|
+
decision.confidence = Math.min(decision.confidence, 0.3);
|
|
194
|
+
}
|
|
195
|
+
return decision;
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=route-classifier.js.map
|
|
@@ -611,63 +611,108 @@ export class SelfImproveLoop {
|
|
|
611
611
|
async hypothesize(metrics, history) {
|
|
612
612
|
// Read targeted triggers (written by cron scheduler when jobs fail repeatedly)
|
|
613
613
|
let targetedTriggers = '';
|
|
614
|
+
const triggerBullets = [];
|
|
615
|
+
// Source 1: explicit triggers written by the cron scheduler at 3+
|
|
616
|
+
// consecutive errors (legacy path — we still honor and drain).
|
|
614
617
|
const triggersDir = path.join(SELF_IMPROVE_DIR, 'triggers');
|
|
615
618
|
if (existsSync(triggersDir)) {
|
|
616
619
|
const triggerFiles = readdirSync(triggersDir).filter(f => f.endsWith('.json'));
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
catch {
|
|
626
|
-
return null;
|
|
627
|
-
}
|
|
628
|
-
}).filter(Boolean);
|
|
629
|
-
if (triggers.length > 0) {
|
|
630
|
-
targetedTriggers = `\n\n## PRIORITY: Failing Jobs Needing Attention\n` +
|
|
631
|
-
`These jobs have been failing repeatedly and need prompt/config fixes:\n` +
|
|
632
|
-
triggers.map((t) => `- **${t.jobName}**: ${t.consecutiveErrors} consecutive errors. Recent: ${(t.recentErrors ?? []).join('; ')}`).join('\n') +
|
|
633
|
-
`\n\nFocus your improvement hypothesis on fixing these jobs first.\n`;
|
|
620
|
+
const triggers = triggerFiles.slice(0, 3).map(f => {
|
|
621
|
+
try {
|
|
622
|
+
const t = JSON.parse(readFileSync(path.join(triggersDir, f), 'utf-8'));
|
|
623
|
+
unlinkSync(path.join(triggersDir, f));
|
|
624
|
+
return t;
|
|
625
|
+
}
|
|
626
|
+
catch {
|
|
627
|
+
return null;
|
|
634
628
|
}
|
|
629
|
+
}).filter(Boolean);
|
|
630
|
+
for (const t of triggers) {
|
|
631
|
+
triggerBullets.push(`- **${t.jobName}**: ${t.consecutiveErrors} consecutive errors. Recent: ${(t.recentErrors ?? []).join('; ')}`);
|
|
635
632
|
}
|
|
636
633
|
}
|
|
634
|
+
// Source 2: broken-jobs from the failure monitor. These are jobs the
|
|
635
|
+
// user hasn't applied a fix for yet — real, current gaps the hypothesizer
|
|
636
|
+
// should target. Complements the diversity constraint: even if the area
|
|
637
|
+
// has been over-targeted historically, a specific broken job is a fresh
|
|
638
|
+
// concrete signal.
|
|
639
|
+
try {
|
|
640
|
+
const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
|
|
641
|
+
const broken = computeBrokenJobs();
|
|
642
|
+
for (const b of broken.slice(0, 3)) {
|
|
643
|
+
const diagHint = b.diagnosis
|
|
644
|
+
? ` Diagnosis: ${b.diagnosis.rootCause.slice(0, 120)}`
|
|
645
|
+
: '';
|
|
646
|
+
triggerBullets.push(`- **${b.jobName}**: ${b.errorCount48h}/${b.totalRuns48h} failed in 48h${b.circuitBreakerEngagedAt ? ' (breaker engaged)' : ''}.${diagHint}`);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
catch { /* failure-monitor module optional */ }
|
|
650
|
+
if (triggerBullets.length > 0) {
|
|
651
|
+
targetedTriggers = `\n\n## PRIORITY: Failing Jobs Needing Attention\n` +
|
|
652
|
+
`These jobs have been failing recently and need prompt/config fixes:\n` +
|
|
653
|
+
triggerBullets.join('\n') +
|
|
654
|
+
`\n\nFocus your improvement hypothesis on fixing these jobs first.\n`;
|
|
655
|
+
}
|
|
637
656
|
// Format experiment history for the prompt
|
|
638
657
|
const historyText = history.slice(-20).map(e => `#${e.iteration} | ${e.area} | "${e.hypothesis.slice(0, 60)}" | ${(e.score * 10).toFixed(1)}/10 ${e.accepted ? '✅' : '❌'}`).join('\n') || '(no prior experiments)';
|
|
639
|
-
// Enforce diversity: count recent proposals per area:target AND per area
|
|
658
|
+
// Enforce diversity: count recent proposals per area:target AND per area.
|
|
659
|
+
// A pair is only "over-targeted" if its MOST RECENT attempt was within
|
|
660
|
+
// the last 30 days — otherwise it's fair game to retry with fresh data.
|
|
661
|
+
// Stops the saturation state where after ~60 experiments the loop has
|
|
662
|
+
// blocked every area:target pair permanently and produces no new
|
|
663
|
+
// hypotheses (the Apr 11-19 plateau).
|
|
664
|
+
const DIVERSITY_WINDOW_MS = 30 * 24 * 60 * 60 * 1000;
|
|
665
|
+
const diversityCutoff = Date.now() - DIVERSITY_WINDOW_MS;
|
|
640
666
|
const recentTargets = new Map();
|
|
641
667
|
const recentAreas = new Map();
|
|
642
|
-
for (const e of history.slice(-
|
|
668
|
+
for (const e of history.slice(-50)) {
|
|
643
669
|
const key = `${e.area}:${e.target}`;
|
|
644
|
-
|
|
645
|
-
|
|
670
|
+
const ts = Date.parse(e.startedAt);
|
|
671
|
+
const tsMs = Number.isFinite(ts) ? ts : 0;
|
|
672
|
+
const cur = recentTargets.get(key);
|
|
673
|
+
recentTargets.set(key, {
|
|
674
|
+
count: (cur?.count ?? 0) + 1,
|
|
675
|
+
newestMs: Math.max(cur?.newestMs ?? 0, tsMs),
|
|
676
|
+
});
|
|
677
|
+
const curA = recentAreas.get(e.area);
|
|
678
|
+
recentAreas.set(e.area, {
|
|
679
|
+
count: (curA?.count ?? 0) + 1,
|
|
680
|
+
newestMs: Math.max(curA?.newestMs ?? 0, tsMs),
|
|
681
|
+
});
|
|
646
682
|
}
|
|
647
683
|
for (const p of this.getPendingChanges()) {
|
|
648
684
|
const key = `${p.area}:${p.target}`;
|
|
649
|
-
|
|
650
|
-
|
|
685
|
+
const now = Date.now();
|
|
686
|
+
const cur = recentTargets.get(key);
|
|
687
|
+
recentTargets.set(key, {
|
|
688
|
+
count: (cur?.count ?? 0) + 1,
|
|
689
|
+
newestMs: Math.max(cur?.newestMs ?? 0, now),
|
|
690
|
+
});
|
|
691
|
+
const curA = recentAreas.get(p.area);
|
|
692
|
+
recentAreas.set(p.area, {
|
|
693
|
+
count: (curA?.count ?? 0) + 1,
|
|
694
|
+
newestMs: Math.max(curA?.newestMs ?? 0, now),
|
|
695
|
+
});
|
|
651
696
|
}
|
|
652
|
-
// Block
|
|
697
|
+
// Block only when both (a) count is high enough AND (b) the last attempt
|
|
698
|
+
// was within the diversity window.
|
|
653
699
|
const overTargeted = [...recentTargets.entries()]
|
|
654
|
-
.filter(([,
|
|
700
|
+
.filter(([, v]) => v.count >= 2 && v.newestMs > diversityCutoff)
|
|
655
701
|
.map(([key]) => key);
|
|
656
|
-
// Block entire areas with >= 3 recent proposals
|
|
657
702
|
const overTargetedAreas = [...recentAreas.entries()]
|
|
658
|
-
.filter(([,
|
|
703
|
+
.filter(([, v]) => v.count >= 3 && v.newestMs > diversityCutoff)
|
|
659
704
|
.map(([area]) => area);
|
|
660
705
|
// Build area coverage stats to nudge the LLM toward unexplored areas
|
|
661
706
|
const allAreas = this.config.areas;
|
|
662
707
|
const areaCoverage = allAreas.map(area => {
|
|
663
|
-
const count = recentAreas.get(area) ?? 0;
|
|
708
|
+
const count = recentAreas.get(area)?.count ?? 0;
|
|
664
709
|
return `- ${area}: ${count} recent proposals`;
|
|
665
710
|
}).join('\n');
|
|
666
711
|
const diversityConstraint = `\n\n## AREA COVERAGE (target under-explored areas)\n${areaCoverage}\n` +
|
|
667
712
|
(overTargeted.length > 0 || overTargetedAreas.length > 0
|
|
668
713
|
? `\n## DIVERSITY CONSTRAINT\n` +
|
|
669
714
|
(overTargetedAreas.length > 0
|
|
670
|
-
? `These AREAS have been over-targeted and MUST NOT be chosen:\n${overTargetedAreas.map(a => `- ${a} (${recentAreas.get(a)} proposals)`).join('\n')}\n`
|
|
715
|
+
? `These AREAS have been over-targeted and MUST NOT be chosen:\n${overTargetedAreas.map(a => `- ${a} (${recentAreas.get(a)?.count ?? 0} proposals)`).join('\n')}\n`
|
|
671
716
|
: '') +
|
|
672
717
|
(overTargeted.length > 0
|
|
673
718
|
? `These specific targets MUST NOT be re-targeted:\n${overTargeted.map(t => `- ${t}`).join('\n')}\n`
|