npm - clementine-agent - Versions diffs - 1.0.22 → 1.0.24 - Mend

clementine-agent 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/agent/complexity-classifier.d.ts +30 -0
package/dist/agent/complexity-classifier.js +153 -0
package/dist/agent/insight-engine.js +42 -0
package/dist/agent/route-classifier.d.ts +52 -0
package/dist/agent/route-classifier.js +203 -0
package/dist/cli/dashboard.js +56 -1
package/dist/gateway/claim-tracker.js +28 -3
package/dist/gateway/failure-diagnostics.js +4 -1
package/dist/gateway/failure-monitor.js +6 -1
package/dist/gateway/fix-verification.js +4 -1
package/dist/gateway/router.d.ts +19 -0
package/dist/gateway/router.js +144 -2
package/package.json +1 -1

package/dist/agent/complexity-classifier.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Clementine TypeScript — Lightweight complexity classifier.
+ *
+ * Deterministic regex + length heuristics that decide whether a user
+ * message is "complex" enough to warrant planning-before-acting. No
+ * LLM call — gate is cheap enough to run on every message.
+ *
+ * When complex, the gateway injects a "plan-first" system-prompt
+ * directive so the agent proposes a numbered plan and waits for
+ * confirmation before diving in. Not perfect — the LLM still decides
+ * what "plan" means — but much more consistent than a generic
+ * SOUL.md directive that the model ignores half the time.
+ */
+export interface ComplexityVerdict {
+    complex: boolean;
+    reason: string;
+    signals: string[];
+}
+/**
+ * Classify complexity. Pure function — no LLM, no I/O.
+ */
+export declare function classifyComplexity(text: string): ComplexityVerdict;
+/**
+ * Build a system-prompt directive to inject when a complex message is
+ * detected. Prepended to Clementine's normal system prompt for this
+ * single query only. Short + declarative — meta-instructions are
+ * easier for the model to follow when they're terse.
+ */
+export declare function planFirstDirective(): string;
+//# sourceMappingURL=complexity-classifier.d.ts.map

package/dist/agent/complexity-classifier.js ADDED Viewed

@@ -0,0 +1,153 @@
+/**
+ * Clementine TypeScript — Lightweight complexity classifier.
+ *
+ * Deterministic regex + length heuristics that decide whether a user
+ * message is "complex" enough to warrant planning-before-acting. No
+ * LLM call — gate is cheap enough to run on every message.
+ *
+ * When complex, the gateway injects a "plan-first" system-prompt
+ * directive so the agent proposes a numbered plan and waits for
+ * confirmation before diving in. Not perfect — the LLM still decides
+ * what "plan" means — but much more consistent than a generic
+ * SOUL.md directive that the model ignores half the time.
+ */
+/**
+ * Action verbs that signal the user is asking Clementine to DO things
+ * (as opposed to asking questions or making small talk). Multiple
+ * action verbs in one message is a strong complexity signal.
+ */
+const ACTION_VERBS = [
+    'send', 'create', 'run', 'schedule', 'update', 'delete', 'add', 'remove',
+    'draft', 'write', 'post', 'publish', 'deploy', 'build', 'edit', 'move',
+    'rename', 'archive', 'restore', 'assign', 'delegate', 'email', 'message',
+    'invite', 'book', 'cancel', 'notify', 'alert', 'set up', 'tear down',
+    'process', 'review', 'approve', 'reject',
+    'extract', 'fetch', 'pull', 'gather', 'compile', 'summarize', 'analyze',
+    'generate', 'produce', 'export', 'import', 'upload', 'download', 'sync',
+];
+/**
+ * Chain markers — "do X and then Y" explicitly encode a multi-step
+ * task. A single occurrence in a DO-type message is a clear signal.
+ */
+const CHAIN_MARKERS = [
+    /\band\s+then\b/i,
+    /,\s+then\b/i, // "X, then Y"
+    /\bfirst\b[\s\S]{0,80}\bthen\b/i,
+    /\bafter\s+(that|which)\b/i,
+    /\bonce\s+(that|you)\b.*,/i,
+    /\bnext\b.*,/i,
+];
+/**
+ * Phrasings that explicitly ask for plan-first behavior. Triggers
+ * regardless of other heuristics.
+ */
+const EXPLICIT_PLAN_ASKS = [
+    /\bpropose\s+a\s+plan\b/i,
+    /\bwhat\s+(would|'d)\s+be\s+your\s+approach\b/i,
+    /\bplan\s+(this|it)\s+out\b/i,
+    /\blay\s+out\s+(a|the)\s+plan\b/i,
+    /\bwalk\s+me\s+through\s+(what|how)\b/i,
+];
+function countActionVerbs(text) {
+    const lower = text.toLowerCase();
+    let count = 0;
+    for (const v of ACTION_VERBS) {
+        const re = new RegExp(`\\b${v.replace(/\s+/g, '\\s+')}\\b`, 'g');
+        const matches = lower.match(re);
+        if (matches)
+            count += matches.length;
+    }
+    return count;
+}
+/**
+ * Rough entity count — quoted strings, @mentions, and capitalized
+ * multi-word phrases that look like proper nouns. Not perfect;
+ * designed to catch cases like "email John, Sarah, and Mike".
+ */
+function countEntities(text) {
+    let count = 0;
+    // Quoted strings
+    count += (text.match(/"[^"]{2,60}"/g) ?? []).length;
+    count += (text.match(/'[^']{2,60}'/g) ?? []).length;
+    // @mentions
+    count += (text.match(/@\w+/g) ?? []).length;
+    // Comma-separated name lists (e.g., "John, Sarah, and Mike")
+    const listMatch = text.match(/(?:[A-Z][a-z]{1,20},\s+){2,}(?:and\s+)?[A-Z][a-z]{1,20}/);
+    if (listMatch)
+        count += 3;
+    return count;
+}
+/**
+ * Classify complexity. Pure function — no LLM, no I/O.
+ */
+export function classifyComplexity(text) {
+    if (!text || typeof text !== 'string')
+        return { complex: false, reason: 'empty', signals: [] };
+    const trimmed = text.trim();
+    // Skip commands and very short messages
+    if (trimmed.length < 30)
+        return { complex: false, reason: 'too short', signals: [] };
+    if (trimmed.startsWith('!') || trimmed.startsWith('/'))
+        return { complex: false, reason: 'command', signals: [] };
+    const signals = [];
+    // Signal 1: explicit ask for plan-first
+    for (const re of EXPLICIT_PLAN_ASKS) {
+        if (re.test(trimmed)) {
+            return { complex: true, reason: 'user explicitly asked for a plan', signals: ['explicit-plan-ask'] };
+        }
+    }
+    // Signal 2: multiple action verbs
+    const verbs = countActionVerbs(trimmed);
+    if (verbs >= 3)
+        signals.push(`${verbs} action verbs`);
+    // Signal 3: chain markers
+    for (const re of CHAIN_MARKERS) {
+        if (re.test(trimmed)) {
+            signals.push('chain marker');
+            break;
+        }
+    }
+    // Signal 4: multiple entities
+    const entities = countEntities(trimmed);
+    if (entities >= 3)
+        signals.push(`${entities} entities`);
+    // Signal 5: long message with at least one action verb (big scope, not just a question)
+    if (trimmed.length > 400 && verbs >= 1)
+        signals.push('long + action');
+    // Gate: at least 2 signals fire, OR a single high-confidence signal
+    // (chain markers, explicit-plan-ask, or 3+ action verbs).
+    const highConfidenceSingles = [
+        verbs >= 3,
+        signals.includes('chain marker'),
+    ];
+    if (highConfidenceSingles.some(Boolean)) {
+        return { complex: true, reason: 'strong single signal', signals };
+    }
+    if (signals.length >= 2) {
+        return { complex: true, reason: 'multiple signals', signals };
+    }
+    return { complex: false, reason: 'below threshold', signals };
+}
+/**
+ * Build a system-prompt directive to inject when a complex message is
+ * detected. Prepended to Clementine's normal system prompt for this
+ * single query only. Short + declarative — meta-instructions are
+ * easier for the model to follow when they're terse.
+ */
+export function planFirstDirective() {
+    return [
+        '## PLAN BEFORE ACTING',
+        '',
+        'This request has multiple steps. Before doing any of them:',
+        '1. Write a numbered plan (3-7 steps, one line each).',
+        '2. Call out anything that needs my decision — which contact, which template, which timing.',
+        '3. End with: "Reply **go** to start, or tell me what to change."',
+        '4. STOP. Do NOT start executing the plan in this turn.',
+        '',
+        'When I reply "go" (or equivalent) in the next message, proceed with the plan.',
+        'If I edit the plan, revise and ask again.',
+        '',
+        'SKIP this protocol only if the request is actually a single step disguised as multiple (e.g., "send an email to Aaron about X and cc Sarah" is one email, not two).',
+    ].join('\n');
+}
+//# sourceMappingURL=complexity-classifier.js.map

package/dist/agent/insight-engine.js CHANGED Viewed

@@ -168,6 +168,48 @@ export function gatherInsightSignals(gateway) {
         }
     }
     catch { /* non-fatal */ }
+    // 5. Broken jobs from the failure monitor. Any currently-flagged job
+    //    with a diagnosis is a real, actionable signal the owner should
+    //    see proactively rather than stumble across in the dashboard.
+    try {
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const fm = require('../gateway/failure-monitor.js');
+        const broken = fm.computeBrokenJobs();
+        for (const b of broken.slice(0, 3)) {
+            const hint = b.diagnosis?.rootCause
+                ? ` — ${b.diagnosis.rootCause.slice(0, 120)}`
+                : '';
+            signals.push(`Broken cron job "${b.jobName}": ${b.errorCount48h}/${b.totalRuns48h} failures${hint}`);
+            if (b.diagnosis?.proposedFix?.autoApply) {
+                signals.push(`One-click fix available for "${b.jobName}" — ${b.diagnosis.proposedFix.details.slice(0, 100)}`);
+            }
+        }
+    }
+    catch { /* failure-monitor may not be loadable; fine */ }
+    // 6. Claim tracker — failed claims in the last N hours erode trust.
+    //    Surface them so the owner sees "Clementine said she'd do X; she
+    //    didn't" instead of silently swallowing the miss.
+    try {
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const { MemoryStore } = require('../memory/store.js');
+        const { MEMORY_DB_PATH, VAULT_DIR } = require('../config.js');
+        if (existsSync(MEMORY_DB_PATH)) {
+            const store = new MemoryStore(MEMORY_DB_PATH, VAULT_DIR);
+            store.initialize();
+            const db = store.conn;
+            try {
+                const rows = db.prepare(`SELECT subject, claim_type FROM claims
+           WHERE status = 'failed' AND verified_at >= datetime('now', '-6 hours')
+           ORDER BY verified_at DESC LIMIT 3`).all();
+                for (const r of rows) {
+                    signals.push(`Failed claim: "${r.subject}" (${r.claim_type}) — I promised and didn't deliver`);
+                }
+            }
+            catch { /* table may not exist */ }
+            store.close();
+        }
+    }
+    catch { /* non-fatal */ }
     return signals;
 }
 /**

package/dist/agent/route-classifier.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Clementine TypeScript — Team-routing classifier.
+ *
+ * Decides whether a user message addressed to Clementine should be
+ * delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
+ * by Clementine herself.
+ *
+ * CRITICAL safety rail: this classifier is ONLY invoked when the user
+ * is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
+ * agent-scoped channels) bypass routing entirely — the session-key
+ * ownership check in gateway/router.ts enforces this before calling
+ * classifyRoute. Routing never crosses the boundary between different
+ * agent bots.
+ *
+ * Returns structured decision: {targetAgent, confidence, reasoning}.
+ * Caller decides what to do with confidence (auto-delegate, soft-suggest,
+ * or stay with Clementine).
+ */
+import type { AgentProfile } from '../types.js';
+import type { Gateway } from '../gateway/router.js';
+export interface RouteDecision {
+    targetAgent: string;
+    confidence: number;
+    reasoning: string;
+}
+/**
+ * Session keys eligible for routing. Any key NOT in this set is
+ * considered agent-scoped or system-scoped and never routes.
+ *
+ * - `discord:user:{ownerId}` — main bot DM with owner
+ * - `discord:channel:{channelId}:{ownerId}` — owner's main channel
+ *   (where Clementine's main bot is posted, without an agent slug
+ *   embedded in the key)
+ * - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
+ * - `dashboard:*` — web dashboard chat
+ * - `cli:*` — local CLI chat
+ *
+ * Rejected prefixes (routing NEVER fires):
+ * - `discord:agent:{slug}:*` — direct-to-agent DM
+ * - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
+ * - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
+ *   embedded (5-part form, where position 3 is an agent slug)
+ * - `slack:agent:*`, `slack:channel:*:{slug}:*`
+ * - `team:*` — inter-agent messages travel via team-bus, never route
+ */
+export declare function isRoutable(sessionKey: string, _ownerAgentSlugs: Set<string>): boolean;
+/**
+ * Classify a user message. Returns null if the call fails — caller
+ * should fall back to Clementine handling.
+ */
+export declare function classifyRoute(userMessage: string, agents: AgentProfile[], gateway: Gateway): Promise<RouteDecision | null>;
+//# sourceMappingURL=route-classifier.d.ts.map

package/dist/agent/route-classifier.js ADDED Viewed

@@ -0,0 +1,203 @@
+/**
+ * Clementine TypeScript — Team-routing classifier.
+ *
+ * Decides whether a user message addressed to Clementine should be
+ * delegated to a specialist agent (Ross, Sasha, Nora, etc.) or handled
+ * by Clementine herself.
+ *
+ * CRITICAL safety rail: this classifier is ONLY invoked when the user
+ * is talking TO Clementine. Direct-to-agent messages (agent bot DMs,
+ * agent-scoped channels) bypass routing entirely — the session-key
+ * ownership check in gateway/router.ts enforces this before calling
+ * classifyRoute. Routing never crosses the boundary between different
+ * agent bots.
+ *
+ * Returns structured decision: {targetAgent, confidence, reasoning}.
+ * Caller decides what to do with confidence (auto-delegate, soft-suggest,
+ * or stay with Clementine).
+ */
+import pino from 'pino';
+const logger = pino({ name: 'clementine.route-classifier' });
+/**
+ * Session keys eligible for routing. Any key NOT in this set is
+ * considered agent-scoped or system-scoped and never routes.
+ *
+ * - `discord:user:{ownerId}` — main bot DM with owner
+ * - `discord:channel:{channelId}:{ownerId}` — owner's main channel
+ *   (where Clementine's main bot is posted, without an agent slug
+ *   embedded in the key)
+ * - `slack:user:{userId}` / `slack:dm:{userId}` — Slack DM/owner channel
+ * - `dashboard:*` — web dashboard chat
+ * - `cli:*` — local CLI chat
+ *
+ * Rejected prefixes (routing NEVER fires):
+ * - `discord:agent:{slug}:*` — direct-to-agent DM
+ * - `discord:member:*`, `discord:member-dm:*` — member channels/DMs
+ * - Any `discord:channel:{channelId}:{slug}:{userId}` with an agent slug
+ *   embedded (5-part form, where position 3 is an agent slug)
+ * - `slack:agent:*`, `slack:channel:*:{slug}:*`
+ * - `team:*` — inter-agent messages travel via team-bus, never route
+ */
+export function isRoutable(sessionKey, _ownerAgentSlugs) {
+    if (!sessionKey)
+        return false;
+    const parts = sessionKey.split(':');
+    // Structural rule: any 5+ part channel key has an agent slug embedded
+    // (e.g. `discord:channel:{channelId}:{slug}:{userId}`). We reject this
+    // regardless of whether the slug appears in a passed-in roster — the
+    // ownerAgentSlugs list can be stale during agent-hire/rename events,
+    // and the key SHAPE is the safer source of truth.
+    //
+    // `_ownerAgentSlugs` is kept in the signature for future use but the
+    // current implementation is structure-only.
+    // Agent-bot DMs and member sessions are always agent-scoped
+    if (parts[0] === 'discord') {
+        const kind = parts[1];
+        if (kind === 'agent' || kind === 'member' || kind === 'member-dm')
+            return false;
+        // Any 5+ part channel key → agent-scoped, never route
+        if (kind === 'channel' && parts.length >= 5)
+            return false;
+        // discord:user:* and the 4-part discord:channel:{channelId}:{userId} pass
+        return kind === 'user' || (kind === 'channel' && parts.length === 4);
+    }
+    if (parts[0] === 'slack') {
+        const kind = parts[1];
+        if (kind === 'agent')
+            return false;
+        // Any 5+ part channel key → agent-scoped
+        if (kind === 'channel' && parts.length >= 5)
+            return false;
+        return kind === 'user' || kind === 'dm' || (kind === 'channel' && parts.length === 4);
+    }
+    if (parts[0] === 'telegram')
+        return parts[1] === 'user' || /^\d+$/.test(parts[1] ?? '');
+    if (parts[0] === 'dashboard')
+        return true;
+    if (parts[0] === 'cli')
+        return true;
+    // Anything else (team:*, cron:*, heartbeat-triggered, etc.) — no routing
+    return false;
+}
+/** Build the agent roster string for the classifier prompt. */
+function formatAgentRoster(agents) {
+    const lines = [];
+    // Clementine is always an option — the "stay with me" target
+    lines.push('- **clementine**: generalist assistant, calendar/inbox/planning, meta questions, small talk, anything not clearly a specialist task');
+    for (const a of agents) {
+        if (a.slug === 'clementine')
+            continue;
+        // Use name + description; truncate to keep the prompt tight
+        const desc = (a.description ?? '').slice(0, 200).replace(/\s+/g, ' ').trim();
+        lines.push(`- **${a.slug}** (${a.name}): ${desc}`);
+    }
+    return lines.join('\n');
+}
+function buildPrompt(userMessage, agents) {
+    return [
+        'You are Clementine\'s team dispatcher. Decide which team member should handle an incoming user message.',
+        '',
+        '## The team:',
+        formatAgentRoster(agents),
+        '',
+        '## The message:',
+        userMessage.slice(0, 1500),
+        '',
+        '## Decision rules',
+        '',
+        '- Default to **clementine** (the generalist) unless the request clearly matches a specialist agent\'s domain.',
+        '- Match on DOMAIN, not keywords. "Help me think about our outbound strategy" is strategic → Clementine. "Send a follow-up to Aaron about the Scorpion audit" is operational outbound → the SDR agent.',
+        '- If the user explicitly names an agent ("have Ross do X"), pick that agent at confidence 1.0.',
+        '- If the request is meta ("what agents do I have", "how did Ross do this week") → clementine.',
+        '- Small talk, greetings, casual chat → clementine.',
+        '- Ambiguous or multi-domain requests → clementine with lower confidence (she can delegate herself).',
+        '',
+        '## Confidence scale',
+        '- 0.9-1.0: Explicit address of a specific agent, or a textbook specialist task (e.g., "send a follow-up" → SDR)',
+        '- 0.7-0.9: Clear specialist domain but implicit (e.g., "draft a LinkedIn message" → SDR, "write a content brief" → CMO agent)',
+        '- 0.4-0.7: Plausibly specialist but could go to Clementine',
+        '- <0.4: Generalist task or ambiguous — clementine',
+        '',
+        '## Output schema (JSON only, no fences):',
+        '{',
+        '  "targetAgent": "slug (use \\"clementine\\" if no specialist match)",',
+        '  "confidence": 0.0-1.0,',
+        '  "reasoning": "one short sentence — what signal drove the choice"',
+        '}',
+    ].join('\n');
+}
+function parseResponse(raw) {
+    try {
+        const match = raw.match(/\{[\s\S]*\}/);
+        if (!match)
+            return null;
+        const parsed = JSON.parse(match[0]);
+        if (typeof parsed.targetAgent !== 'string')
+            return null;
+        const confidence = typeof parsed.confidence === 'number'
+            ? Math.max(0, Math.min(1, parsed.confidence))
+            : 0;
+        return {
+            targetAgent: parsed.targetAgent.trim().toLowerCase(),
+            confidence,
+            reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning.slice(0, 200) : '',
+        };
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Classify a user message. Returns null if the call fails — caller
+ * should fall back to Clementine handling.
+ */
+export async function classifyRoute(userMessage, agents, gateway) {
+    // Only classify when there's at least one non-clementine agent available.
+    const specialists = agents.filter(a => a.slug !== 'clementine');
+    if (specialists.length === 0)
+        return null;
+    // Fast path: explicit slug mention anywhere in the message.
+    for (const a of specialists) {
+        const nameLower = a.name.toLowerCase();
+        const firstName = nameLower.split(/\s+/)[0];
+        // Only match on reasonable word boundaries; skip one-letter firsts
+        if (firstName.length < 3)
+            continue;
+        const wordRe = new RegExp(`\\b(${firstName}|${a.slug})\\b`, 'i');
+        if (wordRe.test(userMessage)) {
+            logger.debug({ slug: a.slug, trigger: 'explicit-mention' }, 'Fast-path routing decision');
+            return {
+                targetAgent: a.slug,
+                confidence: 1.0,
+                reasoning: `User explicitly addressed ${a.name} by name.`,
+            };
+        }
+    }
+    // LLM classifier for everything else.
+    const prompt = buildPrompt(userMessage, agents);
+    let raw;
+    try {
+        raw = await gateway.handleCronJob('route-classify', prompt, 1, // tier 1
+        3, // maxTurns — classifier doesn't need tools
+        'haiku');
+    }
+    catch (err) {
+        logger.warn({ err }, 'Route classifier call failed');
+        return null;
+    }
+    const decision = parseResponse(raw);
+    if (!decision) {
+        logger.warn({ rawHead: raw.slice(0, 200) }, 'Route classifier returned unparseable response');
+        return null;
+    }
+    // Validate target exists in the roster; if not, treat as Clementine.
+    const allSlugs = new Set(agents.map(a => a.slug));
+    allSlugs.add('clementine');
+    if (!allSlugs.has(decision.targetAgent)) {
+        logger.warn({ decision }, 'Classifier returned unknown agent — treating as clementine');
+        decision.targetAgent = 'clementine';
+        decision.confidence = Math.min(decision.confidence, 0.3);
+    }
+    return decision;
+}
+//# sourceMappingURL=route-classifier.js.map

package/dist/cli/dashboard.js CHANGED Viewed

@@ -2075,6 +2075,16 @@ export async function cmdDashboard(opts) {
             res.status(500).json({ error: String(err) });
         }
     });
+    // ── Team routing audit ──────────────────────────────────────────
+    app.get('/api/routing-audit', async (_req, res) => {
+        try {
+            const { getRecentRouteDecisions } = await import('../gateway/router.js');
+            res.json({ decisions: getRecentRouteDecisions(50) });
+        }
+        catch (err) {
+            res.status(500).json({ error: String(err) });
+        }
+    });
     // ── Claims + trust score ────────────────────────────────────────
     app.get('/api/claims', async (req, res) => {
         try {
@@ -9417,6 +9427,13 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
         <div class="card-header">Recent claims</div>
         <div class="card-body" id="panel-claims"><div class="empty-state">Loading...</div></div>
       </div>
+      <div class="card" style="margin-top:16px">
+        <div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
+          <span>Team routing decisions</span>
+          <span style="font-size:11px;color:var(--text-muted)">Only owner-facing Clementine sessions are classified &mdash; agent-bot DMs bypass routing entirely.</span>
+        </div>
+        <div class="card-body" id="panel-routing-audit"><div class="empty-state">Loading...</div></div>
+      </div>
     </div>
     <!-- ═══ Logs Page ═══ -->
@@ -10458,7 +10475,7 @@ function navigateTo(page, opts) {
     document.getElementById('builder-input').focus();
   }
   if (page === 'automations') { refreshCron(); refreshTimers(); refreshSelfImprove(); refreshSkills(); refreshBrokenJobs(); }
-  if (page === 'claims') { refreshClaims(); }
+  if (page === 'claims') { refreshClaims(); refreshRoutingAudit(); }
   if (page === 'intelligence') { refreshMemory(); }
   if (page === 'settings') { refreshSettings(); refreshRemoteAccess(); refreshSalesforce(); refreshClaudeIntegrations(); refreshMcpServers(); }
   if (page === 'logs') refreshLogs();
@@ -16401,6 +16418,44 @@ async function refreshClaims(filter) {
   }
 }
+async function refreshRoutingAudit() {
+  var container = document.getElementById('panel-routing-audit');
+  if (!container) return;
+  try {
+    var r = await apiFetch('/api/routing-audit');
+    var d = await r.json();
+    var decisions = d.decisions || [];
+    if (decisions.length === 0) {
+      container.innerHTML = '<div class="empty-state">No routing decisions yet. Send Clementine a message that could be delegated and it will show up here.</div>';
+      return;
+    }
+    var actionColor = {
+      'auto-delegated': '#22c55e',
+      'soft-suggested': '#f59e0b',
+      'stayed-with-clementine': '#6b7280',
+    };
+    var html = '<div style="display:flex;flex-direction:column;gap:6px;font-size:12px">';
+    for (var de of decisions) {
+      var color = actionColor[de.action] || '#6b7280';
+      var confPct = Math.round((de.confidence || 0) * 100);
+      html += '<div style="padding:8px 10px;border:1px solid var(--border);border-left:3px solid ' + color + ';border-radius:4px;background:var(--bg-secondary)">'
+        + '<div style="display:flex;align-items:center;gap:8px;flex-wrap:wrap">'
+        + '<span style="font-size:10px;padding:1px 6px;background:' + color + '22;color:' + color + ';border-radius:3px">' + esc(de.action) + '</span>'
+        + '<span style="font-size:11px"><strong>' + esc(de.targetAgent) + '</strong> @ ' + confPct + '%</span>'
+        + '<span style="font-size:10px;color:var(--text-muted)">' + timeAgo(de.timestamp) + '</span>'
+        + '<span style="font-size:10px;color:var(--text-muted);margin-left:auto">' + esc(de.sessionKey) + '</span>'
+        + '</div>'
+        + '<div style="font-size:11px;color:var(--text-secondary);margin-top:4px">\u201c' + esc(de.messageSnippet.slice(0, 200)) + '\u201d</div>'
+        + '<div style="font-size:10px;color:var(--text-muted);margin-top:2px;font-style:italic">' + esc(de.reasoning) + '</div>'
+        + '</div>';
+    }
+    html += '</div>';
+    container.innerHTML = html;
+  } catch (e) {
+    container.innerHTML = '<div class="empty-state" style="color:var(--red)">Failed to load routing audit</div>';
+  }
+}
 async function markClaim(id, status) {
   var endpoint = status === 'verified' ? 'mark-verified' : status === 'failed' ? 'mark-failed' : 'dismiss';
   try {

package/dist/gateway/claim-tracker.js CHANGED Viewed

@@ -123,13 +123,24 @@ const PATTERNS = [
  * Bounded to prevent memory growth — oldest entries are evicted.
  */
 const MAX_PENDING_LLM = 20;
+const PENDING_LLM_TTL_MS = 6 * 60 * 60 * 1000; // 6h — after that a claim is stale anyway
 const pendingLLMExtraction = [];
+function pruneExpiredPending(now = Date.now()) {
+    while (pendingLLMExtraction.length > 0) {
+        const oldest = pendingLLMExtraction[0];
+        if (now - oldest.queuedAt <= PENDING_LLM_TTL_MS)
+            break;
+        pendingLLMExtraction.shift();
+    }
+}
 function enqueueForLLM(text, sessionKey, agentSlug) {
+    const now = Date.now();
+    pruneExpiredPending(now);
     // De-dup by text hash within the queue — don't re-enqueue the same DM.
     const hash = sha1(text);
-    if (pendingLLMExtraction.some(e => sha1(e.text) === hash))
+    if (pendingLLMExtraction.some(e => e.hash === hash))
         return;
-    pendingLLMExtraction.push({ text, sessionKey, agentSlug, queuedAt: Date.now() });
+    pendingLLMExtraction.push({ text, hash, sessionKey, agentSlug, queuedAt: now });
     while (pendingLLMExtraction.length > MAX_PENDING_LLM)
         pendingLLMExtraction.shift();
 }
@@ -208,11 +219,16 @@ export function extractClaims(text, sessionKey, agentSlug) {
  * the next sweep.
  */
 export async function drainLLMFallback(gateway, maxPerSweep = 3) {
+    pruneExpiredPending();
     let drained = 0;
-    const batch = pendingLLMExtraction.splice(0, Math.min(maxPerSweep, pendingLLMExtraction.length));
+    // Peek — don't remove yet. We only splice on successful processing so a
+    // transient LLM failure doesn't silently drop the candidate.
+    const batch = pendingLLMExtraction.slice(0, Math.min(maxPerSweep, pendingLLMExtraction.length));
+    const toRemove = new Set();
     for (const item of batch) {
         try {
             const claims = await llmExtractClaims(item.text, gateway);
+            toRemove.add(item.hash); // success (or "no claims" — not worth re-trying)
             if (claims.length === 0)
                 continue;
             const toRecord = claims.map(c => ({
@@ -229,9 +245,18 @@ export async function drainLLMFallback(gateway, maxPerSweep = 3) {
             drained += claims.length;
         }
         catch (err) {
+            // Don't add to toRemove — leave in queue for next sweep. TTL eventually
+            // evicts permanently-failing entries.
             logger.debug({ err }, 'LLM fallback extraction failed for one DM');
         }
     }
+    // Remove successfully-processed entries in one pass
+    if (toRemove.size > 0) {
+        for (let i = pendingLLMExtraction.length - 1; i >= 0; i--) {
+            if (toRemove.has(pendingLLMExtraction[i].hash))
+                pendingLLMExtraction.splice(i, 1);
+        }
+    }
     return drained;
 }
 async function llmExtractClaims(text, gateway) {

package/dist/gateway/failure-diagnostics.js CHANGED Viewed

@@ -52,7 +52,10 @@ function loadCache() {
 function saveCache(cache) {
     try {
         mkdirSync(path.dirname(CACHE_FILE), { recursive: true });
-        writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
+        const tmp = CACHE_FILE + '.tmp';
+        writeFileSync(tmp, JSON.stringify(cache, null, 2));
+        const { renameSync } = require('node:fs');
+        renameSync(tmp, CACHE_FILE);
     }
     catch (err) {
         logger.warn({ err }, 'Failed to persist diagnostic cache');

package/dist/gateway/failure-monitor.js CHANGED Viewed

@@ -50,7 +50,12 @@ function loadState() {
 function saveState(state) {
     try {
         mkdirSync(path.dirname(STATE_FILE), { recursive: true });
-        writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
+        // Atomic write — write to temp file then rename. Prevents partial
+        // writes from corrupting the state if the process is killed mid-write.
+        const tmp = STATE_FILE + '.tmp';
+        writeFileSync(tmp, JSON.stringify(state, null, 2));
+        const { renameSync } = require('node:fs');
+        renameSync(tmp, STATE_FILE);
     }
     catch (err) {
         logger.warn({ err }, 'Failed to persist failure-monitor state');

package/dist/gateway/fix-verification.js CHANGED Viewed

@@ -29,7 +29,10 @@ function loadState() {
 function saveState(state) {
     try {
         mkdirSync(path.dirname(STATE_FILE), { recursive: true });
-        writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
+        const tmp = STATE_FILE + '.tmp';
+        writeFileSync(tmp, JSON.stringify(state, null, 2));
+        const { renameSync } = require('node:fs');
+        renameSync(tmp, STATE_FILE);
     }
     catch (err) {
         logger.warn({ err }, 'Failed to persist fix-verification state');

package/dist/gateway/router.d.ts CHANGED Viewed

@@ -53,6 +53,14 @@ export declare class Gateway {
      * Falls back to pushing rawResult directly if the agent call fails.
      */
     private _deliverDeepResult;
+    /**
+     * For Clementine-owned sessions, classify whether the message should be
+     * delegated to a specialist agent. Returns null when routing isn't
+     * eligible; { delegated: true, ackMessage } when auto-delegated;
+     * { delegated: false, softSuggest } when only suggesting.
+     */
+    static routeAuditLogPath(): string;
+    private _maybeRouteToSpecialist;
     private _agentManager?;
     private _teamRouter?;
     private _teamBus?;
@@ -215,4 +223,15 @@ export declare class Gateway {
     /** Extract a procedural skill from a successful cron execution (fire-and-forget). */
     extractCronSkill(jobName: string, prompt: string, output: string, durationMs: number, agentSlug?: string): Promise<void>;
 }
+interface RouteAuditEntry {
+    timestamp: string;
+    sessionKey: string;
+    messageSnippet: string;
+    targetAgent: string;
+    confidence: number;
+    reasoning: string;
+    action: 'auto-delegated' | 'soft-suggested' | 'stayed-with-clementine';
+}
+export declare function getRecentRouteDecisions(limit?: number): RouteAuditEntry[];
+export {};
 //# sourceMappingURL=router.d.ts.map

package/dist/gateway/router.js CHANGED Viewed

@@ -205,6 +205,67 @@ export class Gateway {
             }
         }
     }
+    /**
+     * For Clementine-owned sessions, classify whether the message should be
+     * delegated to a specialist agent. Returns null when routing isn't
+     * eligible; { delegated: true, ackMessage } when auto-delegated;
+     * { delegated: false, softSuggest } when only suggesting.
+     */
+    static routeAuditLogPath() {
+        return path.join(BASE_DIR, 'routing-audit.jsonl');
+    }
+    async _maybeRouteToSpecialist(sessionKey, text, onText) {
+        try {
+            const { isRoutable, classifyRoute } = await import('../agent/route-classifier.js');
+            // Fetch team roster and build the set of agent slugs for the routing gate
+            const agentMgr = this.getAgentManager();
+            const agents = agentMgr.listAll();
+            const ownerAgentSlugs = new Set(agents.filter(a => a.slug !== 'clementine').map(a => a.slug));
+            if (!isRoutable(sessionKey, ownerAgentSlugs))
+                return null;
+            if (ownerAgentSlugs.size === 0)
+                return null; // no team to route to
+            const decision = await classifyRoute(text, agents, this);
+            if (!decision)
+                return null;
+            logRouteDecision({ sessionKey, message: text, decision });
+            if (decision.targetAgent === 'clementine')
+                return null;
+            const targetProfile = agents.find(a => a.slug === decision.targetAgent);
+            if (!targetProfile)
+                return null;
+            // Auto-delegate at high confidence
+            if (decision.confidence >= 0.8) {
+                // Fire the team task in the background; ack immediately.
+                const ackMessage = `Routing this to **${targetProfile.name}** (${decision.reasoning.toLowerCase()}). I'll post their response back here when done.`;
+                onText?.(ackMessage).catch(() => { });
+                this.handleTeamTask('Clementine', 'clementine', text, targetProfile)
+                    .then(response => {
+                    if (!response)
+                        return;
+                    const delivery = `**${targetProfile.name}**: ${response}`;
+                    return this._dispatcher?.send(delivery, { sessionKey });
+                })
+                    .catch(err => {
+                    logger.warn({ err, target: decision.targetAgent }, 'Delegated task failed');
+                    void this._dispatcher?.send(`**${targetProfile.name}** hit an error handling that: ${String(err).slice(0, 200)}`, { sessionKey });
+                });
+                return { delegated: true, ackMessage };
+            }
+            // Soft-suggest at medium confidence
+            if (decision.confidence >= 0.5) {
+                return {
+                    delegated: false,
+                    softSuggest: `[Routing suggestion: This looks like it could be ${targetProfile.name}'s domain (${decision.reasoning}). If you want to delegate, reply "send to ${targetProfile.name}" or address them directly. Otherwise I'll handle it.]`,
+                };
+            }
+            return null; // low confidence — stay with Clementine silently
+        }
+        catch (err) {
+            logger.debug({ err, sessionKey }, 'Team routing attempt failed (non-fatal)');
+            return null;
+        }
+    }
     // Team system (lazy-initialized)
     _agentManager;
     _teamRouter;
@@ -676,6 +737,48 @@ export class Gateway {
                 // Use per-message override, then session default, then global default
                 const sess = this.sessions.get(sessionKey);
                 const effectiveModel = model ?? sess?.model;
+                // ── Team routing (Clementine-owned sessions only) ──────────────
+                // If the user is talking TO Clementine (her main bot DM, owner
+                // channel, dashboard, or CLI) and hasn't locked the session to a
+                // specific agent profile, classify whether the message should go
+                // to a specialist. Direct-to-agent-bot sessions bypass this entirely.
+                // Small-talk and meta queries stay with Clementine by default.
+                const routingResult = !isInternalMsg && !sess?.profile && !text.startsWith('!')
+                    ? await this._maybeRouteToSpecialist(sessionKey, text, onText)
+                    : null;
+                if (routingResult?.delegated) {
+                    return routingResult.ackMessage;
+                }
+                // Soft-suggest mode: pass annotation through to Clementine's reply
+                if (routingResult?.softSuggest) {
+                    securityAnnotation = (securityAnnotation
+                        ? securityAnnotation + '\n\n'
+                        : '') + routingResult.softSuggest;
+                }
+                // ── Pre-flight planning for complex asks ───────────────────────
+                // For interactive sessions only (owner DMs, dashboard, CLI), a
+                // cheap deterministic heuristic flags complex multi-step requests.
+                // When it fires, we prepend a directive to the text that tells
+                // the agent to propose a plan + stop, rather than executing
+                // directly. Not a hard stop — on the user's "go" reply the
+                // agent proceeds from the plan it proposed.
+                let enrichedText = text;
+                const isInteractive = isOwnerDm
+                    || sessionKey.startsWith('dashboard:')
+                    || sessionKey.startsWith('cli:');
+                if (isInteractive && !isInternalMsg && !text.startsWith('!')) {
+                    try {
+                        const { classifyComplexity, planFirstDirective } = await import('../agent/complexity-classifier.js');
+                        const verdict = classifyComplexity(text);
+                        if (verdict.complex) {
+                            logger.info({ sessionKey, signals: verdict.signals, reason: verdict.reason }, 'Pre-flight planning directive injected');
+                            enrichedText = `${planFirstDirective()}\n\n---\n\n${text}`;
+                        }
+                    }
+                    catch (err) {
+                        logger.debug({ err }, 'Complexity classifier failed (non-fatal)');
+                    }
+                }
                 // ── Deep mode control ──────────────────────────────────────────
                 if (sess?.deepTask) {
                     const lower = text.toLowerCase().trim();
@@ -796,7 +899,7 @@ export class Gateway {
                 // If the previous query on this session was interrupted by this
                 // incoming message, fold the partial output in so the agent can pivot
                 // smoothly instead of re-planning from scratch.
-                let chatPrompt = text;
+                let chatPrompt = enrichedText;
                 const interrupt = sessState.pendingInterrupt;
                 if (interrupt && interrupt.partial.trim()) {
                     delete sessState.pendingInterrupt;
@@ -804,7 +907,7 @@ export class Gateway {
                     chatPrompt =
                         `[You were mid-response when the user sent a new message — they chose not to wait. ` +
                             `Here's what you had said so far (may be mid-sentence):\n---\n${partialPreview}\n---\n` +
-                            `New message from user:]\n\n${text}`;
+                            `New message from user:]\n\n${enrichedText}`;
                     logger.info({ sessionKey, partialLen: interrupt.partial.length }, 'Folding interrupted partial into new prompt');
                 }
                 else if (interrupt) {
@@ -1380,4 +1483,43 @@ export class Gateway {
         }
     }
 }
+/**
+ * In-memory ring buffer of recent routing decisions. The dashboard
+ * endpoint reads from this without hitting disk. Persisted to
+ * routing-audit.jsonl on every append so a restart replays them from
+ * the file next boot (TODO if we need the history to survive restarts).
+ */
+const _routeAuditBuffer = [];
+function logRouteDecision(opts) {
+    const action = opts.decision.targetAgent === 'clementine'
+        ? 'stayed-with-clementine'
+        : opts.decision.confidence >= 0.8
+            ? 'auto-delegated'
+            : opts.decision.confidence >= 0.5
+                ? 'soft-suggested'
+                : 'stayed-with-clementine';
+    const entry = {
+        timestamp: new Date().toISOString(),
+        sessionKey: opts.sessionKey,
+        messageSnippet: opts.message.slice(0, 300),
+        targetAgent: opts.decision.targetAgent,
+        confidence: opts.decision.confidence,
+        reasoning: opts.decision.reasoning,
+        action,
+    };
+    _routeAuditBuffer.push(entry);
+    while (_routeAuditBuffer.length > 200)
+        _routeAuditBuffer.shift();
+    try {
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const { appendFileSync } = require('node:fs');
+        appendFileSync(Gateway.routeAuditLogPath(), JSON.stringify(entry) + '\n');
+    }
+    catch (err) {
+        logger.debug({ err }, 'Route audit log write failed (non-fatal)');
+    }
+}
+export function getRecentRouteDecisions(limit = 50) {
+    return _routeAuditBuffer.slice(-limit).reverse();
+}
 //# sourceMappingURL=router.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.0.22",
+  "version": "1.0.24",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",