npm - clementine-agent - Versions diffs - 1.1.6 → 1.1.8 - Mend

clementine-agent 1.1.6 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/agent/assistant.d.ts +1 -1
package/dist/agent/assistant.js +2 -2
package/dist/agent/insight-engine.d.ts +13 -1
package/dist/agent/insight-engine.js +110 -1
package/dist/gateway/fix-applier.js +21 -0
package/dist/gateway/fix-verification.d.ts +43 -3
package/dist/gateway/fix-verification.js +115 -9
package/dist/gateway/router.d.ts +1 -1
package/dist/gateway/router.js +2 -2
package/dist/memory/store.js +11 -0
package/package.json +1 -1

package/dist/agent/assistant.d.ts CHANGED Viewed

@@ -271,7 +271,7 @@ export declare class PersonalAssistant {
      * so follow-up conversation has context.
      */
     injectContext(sessionKey: string, userText: string, assistantText: string): void;
-    getRecentActivity(sinceIso: string): Array<{
+    getRecentActivity(sinceIso: string, maxEntries?: number): Array<{
         sessionKey: string;
         role: string;
         content: string;

package/dist/agent/assistant.js CHANGED Viewed

@@ -4828,11 +4828,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
             }
         }
     }
-    getRecentActivity(sinceIso) {
+    getRecentActivity(sinceIso, maxEntries) {
         if (!this.memoryStore)
             return [];
         try {
-            return this.memoryStore.getRecentActivity(sinceIso);
+            return this.memoryStore.getRecentActivity(sinceIso, maxEntries);
         }
         catch {
             return [];

package/dist/agent/insight-engine.d.ts CHANGED Viewed

@@ -47,13 +47,25 @@ export declare function maybeIncreaseCooldown(state: InsightState): void;
  * Returns structured event summaries that can be passed to an LLM for urgency rating.
  */
 export declare function gatherInsightSignals(gateway: {
-    getRecentActivity: (since: string) => Array<{
+    getRecentActivity: (since: string, maxEntries?: number) => Array<{
         sessionKey: string;
         role: string;
         content: string;
         createdAt: string;
     }>;
 }): string[];
+export declare function detectFrustrationSignals(activity: Array<{
+    sessionKey: string;
+    role: string;
+    content: string;
+    createdAt: string;
+}>): string[];
+export declare function detectRepeatedTopics(activity: Array<{
+    sessionKey: string;
+    role: string;
+    content: string;
+    createdAt: string;
+}>): string[];
 /**
  * Build a prompt for urgency rating (to be sent to a lightweight LLM).
  * Returns null if there are no signals worth evaluating.

package/dist/agent/insight-engine.js CHANGED Viewed

@@ -189,7 +189,27 @@ export function gatherInsightSignals(gateway) {
     catch (err) {
         logger.debug({ err }, 'Failed to pull broken-jobs signals');
     }
-    // 6. Claim tracker — failed claims in the last N hours erode trust.
+    // 6. Conversational signals derived from recent transcripts.
+    //    Surfaces patterns IN the conversation itself, not just system events:
+    //    user frustration markers, repeating topics, etc. These are early
+    //    warning signs that the agent's responses may be off-track.
+    try {
+        const since24h = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
+        const since7d = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
+        // 24h frustration scan — 50 entries plenty to count corrections in a day.
+        const recent = gateway.getRecentActivity(since24h, 50);
+        for (const s of detectFrustrationSignals(recent))
+            signals.push(s);
+        // 7d repeat-topic scan — pull more entries since topics span sessions.
+        // Cap at 200 to keep keyword extraction cheap.
+        const week = gateway.getRecentActivity(since7d, 200);
+        for (const s of detectRepeatedTopics(week))
+            signals.push(s);
+    }
+    catch (err) {
+        logger.debug({ err }, 'Failed to pull conversational signals');
+    }
+    // 7. Claim tracker — failed claims in the last N hours erode trust.
     //    Surface them so the owner sees "Clementine said she'd do X; she
     //    didn't" instead of silently swallowing the miss.
     try {
@@ -214,6 +234,95 @@ export function gatherInsightSignals(gateway) {
     }
     return signals;
 }
+// ── Conversational signal detectors ─────────────────────────────────
+//
+// Pure functions over recent transcript activity. Exported so the insight
+// dashboard / debug commands can run them independently of the full
+// gatherInsightSignals path.
+/**
+ * Markers that suggest the user is correcting or frustrated with the
+ * agent's last response. Tuned to start-of-message tokens since
+ * mid-message "no" or "actually" is often just normal narrative.
+ */
+const CORRECTION_PATTERNS = [
+    /^(no|nope|not\b)/i,
+    /^(actually|wait)\b/i,
+    /^(that['’]?s| that is) (wrong|not|incorrect|backwards|opposite)/i,
+    /^I (meant|said|wanted|asked)\b/i,
+    /^you (didn['’]?t|misunderstood|got it wrong|missed)/i,
+    /^(stop|cancel|undo|nevermind|never mind)\b/i,
+];
+export function detectFrustrationSignals(activity) {
+    const signals = [];
+    let count = 0;
+    const sessionsAffected = new Set();
+    for (const entry of activity) {
+        if (entry.role !== 'user')
+            continue;
+        const trimmed = entry.content.trim();
+        for (const re of CORRECTION_PATTERNS) {
+            if (re.test(trimmed)) {
+                count++;
+                sessionsAffected.add(entry.sessionKey);
+                break;
+            }
+        }
+    }
+    if (count >= 3) {
+        signals.push(`Conversation friction: ${count} user correction(s) across ${sessionsAffected.size} session(s) in the last 24h — recent agent responses may be off-track`);
+    }
+    return signals;
+}
+/**
+ * Words too generic to count as a topic — would otherwise dominate the
+ * "recurring topic" signal with noise like "thanks", "okay", "please".
+ */
+const TOPIC_STOPWORDS = new Set([
+    'about', 'after', 'again', 'against', 'because', 'before', 'being', 'between',
+    'could', 'doing', 'don’t', 'down', 'during', 'each', 'from', 'further',
+    'going', 'gonna', 'have', 'having', 'here', 'into', 'just', 'know', 'like',
+    'maybe', 'might', 'more', 'most', 'much', 'need', 'okay', 'only', 'other',
+    'over', 'please', 'really', 'said', 'same', 'some', 'still', 'such', 'than',
+    'that', 'them', 'then', 'there', 'these', 'they', 'thing', 'think', 'this',
+    'those', 'through', 'thanks', 'time', 'told', 'under', 'until', 'using', 'very',
+    'want', 'wanted', 'wants', 'were', 'what', 'when', 'where', 'which', 'while',
+    'will', 'with', 'would', 'your', 'yours', 'yeah', 'yes',
+    'tonight', 'today', 'tomorrow', 'morning', 'evening', 'session', 'work',
+    'doing', 'made', 'make', 'making', 'sure', 'right', 'wrong', 'good', 'bad',
+    'much', 'many', 'lots',
+]);
+export function detectRepeatedTopics(activity) {
+    // Build a (keyword → set of session IDs) map. A keyword that shows up in
+    // 3+ DISTINCT sessions across the window is "recurring" — could be an
+    // unresolved thread, a project the user is grinding on, or a question
+    // they've asked multiple ways.
+    const sessionsForKeyword = new Map();
+    for (const entry of activity) {
+        if (entry.role !== 'user')
+            continue;
+        const text = entry.content.toLowerCase();
+        // Word extraction: 5+ chars, alpha-only (no numbers/punctuation).
+        const matches = text.match(/[a-z][a-z’]{4,15}/g) ?? [];
+        const seenInThisMessage = new Set();
+        for (const w of matches) {
+            if (TOPIC_STOPWORDS.has(w))
+                continue;
+            if (seenInThisMessage.has(w))
+                continue; // dedupe within a single message
+            seenInThisMessage.add(w);
+            if (!sessionsForKeyword.has(w))
+                sessionsForKeyword.set(w, new Set());
+            sessionsForKeyword.get(w).add(entry.sessionKey);
+        }
+    }
+    // Rank by session-spread; surface the top 2 to avoid flooding insight
+    // notifications with too many topic mentions.
+    const ranked = [...sessionsForKeyword.entries()]
+        .filter(([, sessions]) => sessions.size >= 3)
+        .sort((a, b) => b[1].size - a[1].size)
+        .slice(0, 2);
+    return ranked.map(([keyword, sessions]) => `Recurring topic "${keyword}" came up across ${sessions.size} sessions this week — possible ongoing thread`);
+}
 /**
  * Build a prompt for urgency rating (to be sent to a lightweight LLM).
  * Returns null if there are no signals worth evaluating.

package/dist/gateway/fix-applier.js CHANGED Viewed

@@ -358,6 +358,18 @@ function applyAdvisorRuleFix(jobName, autoApply, opts) {
     }
     appendAudit({ kind: 'advisor-rule', jobName, file: targetPath, ruleId: autoApply.ruleId, diff });
     logger.info({ jobName, ruleId: autoApply.ruleId, file: targetPath }, 'Applied advisor-rule fix');
+    // Phase 8.1 — record this autoApply for verification. The next
+    // AUTOAPPLY_VERDICT_WINDOW non-skipped runs decide whether the rule
+    // helped; if not, fix-verification auto-reverts (deletes the file).
+    // Lazy import to avoid circular dependency (fix-verification imports
+    // failure-monitor which transitively touches the cron path).
+    import('./fix-verification.js').then(({ recordAutoApplyForVerification }) => {
+        recordAutoApplyForVerification(jobName, {
+            kind: 'advisor-rule',
+            file: targetPath,
+            ruleId: autoApply.ruleId,
+        });
+    }).catch(err => logger.warn({ err, jobName }, 'Failed to record autoApply for verification'));
     return {
         ok: true,
         message: `Wrote advisor rule ${autoApply.ruleId} (hot-reloads on next eval)`,
@@ -408,6 +420,15 @@ function applyPromptOverrideFix(jobName, autoApply, opts) {
         diff,
     });
     logger.info({ jobName, scope: autoApply.scope, scopeKey: autoApply.scopeKey, file: targetPath }, 'Applied prompt-override fix');
+    // Phase 8.1 — same multi-run verification flow as advisor-rule.
+    import('./fix-verification.js').then(({ recordAutoApplyForVerification }) => {
+        recordAutoApplyForVerification(jobName, {
+            kind: 'prompt-override',
+            file: targetPath,
+            scope: autoApply.scope,
+            scopeKey: autoApply.scopeKey,
+        });
+    }).catch(err => logger.warn({ err, jobName }, 'Failed to record autoApply for verification'));
     return {
         ok: true,
         message: `Wrote prompt override ${autoApply.scope}${autoApply.scopeKey ? `:${autoApply.scopeKey}` : ''}`,

package/dist/gateway/fix-verification.d.ts CHANGED Viewed

@@ -13,6 +13,29 @@ interface PendingVerification {
     recordedAt: string;
     preFailureCount: number;
     preLastError: string | null;
+    /** Used by Phase 8.1 — when set, the verifier is also responsible for
+     * deleting this artifact if the fix doesn't help. Existing CRON.md edits
+     * leave this unset (they're hand-edits, not auto-applies, so we never
+     * revert them automatically). */
+    autoApply?: AutoApplyTracker;
+    /** Run-by-run history accumulated since the fix was applied. Single-run
+     * verdicts (the original CRON.md flow) only need the first entry; multi-
+     * run autoApply verifications need the accumulated sample. */
+    postRunOutcomes?: Array<'ok' | 'error' | 'retried'>;
+}
+/**
+ * Tracks an autoApply that's currently being verified. When the verdict
+ * window closes negatively, revertFix() uses these fields to undo.
+ */
+export interface AutoApplyTracker {
+    kind: 'advisor-rule' | 'prompt-override';
+    /** Absolute path of the file the apply wrote. */
+    file: string;
+    /** advisor-rule only: the rule's id, used by the loader's hot-reload. */
+    ruleId?: string;
+    /** prompt-override only: scope label for the verdict message. */
+    scope?: 'global' | 'agent' | 'job';
+    scopeKey?: string;
 }
 /**
  * Compare an old and new jobs list and record verifications for any job that:
@@ -25,15 +48,32 @@ interface PendingVerification {
  * pending verification" rather than waiting for a run that will never come.
  */
 export declare function recordEditsForFailingJobs(oldJobs: CronJobDefinition[], newJobs: CronJobDefinition[]): void;
+/**
+ * Phase 8.1 — record a pending verification for an autoApply (advisor-rule
+ * or prompt-override) so the verifier can roll the fix back if the next
+ * AUTOAPPLY_VERDICT_WINDOW runs don't show improvement.
+ *
+ * Called from fix-applier.applyFix on success. Idempotent: if a previous
+ * verification for the same job is still pending, the new tracker overwrites
+ * it (the most-recent fix is the one we're verifying).
+ */
+export declare function recordAutoApplyForVerification(jobName: string, tracker: AutoApplyTracker): void;
 /**
  * After a cron run completes, check whether we were waiting on a fix
- * verification for this job. If so, send the owner a verdict and clear it.
+ * verification for this job. Two flows:
+ *
+ *   1. Hand-edit (CRON.md) — verdict on the FIRST non-skipped run. Original
+ *      Phase 7 behavior, preserved.
+ *   2. AutoApply (advisor-rule / prompt-override) — accumulate up to
+ *      AUTOAPPLY_VERDICT_WINDOW outcomes, then decide. If 0 successes,
+ *      revert the file. Either way, DM the verdict.
  *
- * Skipped runs (circuit breaker, pre-check exit, etc.) don't carry signal
- * and shouldn't count as a verdict either way.
+ * Skipped runs don't carry signal and don't advance the window in either flow.
  */
 export declare function checkAndDeliverVerification(entry: CronRunEntry, send: (text: string) => Promise<unknown>): Promise<void>;
 /** Read-only accessor for dashboards or debugging. */
 export declare function listPendingVerifications(): PendingVerification[];
+/** Test helper — clear all state. */
+export declare function _resetVerificationState(): void;
 export {};
 //# sourceMappingURL=fix-verification.d.ts.map

package/dist/gateway/fix-verification.js CHANGED Viewed

@@ -15,6 +15,13 @@ import { BASE_DIR } from '../config.js';
 import { computeBrokenJobs } from './failure-monitor.js';
 const logger = pino({ name: 'clementine.fix-verification' });
 const STATE_FILE = path.join(BASE_DIR, 'cron', 'fix-verifications.json');
+/**
+ * Number of post-fix runs we accumulate before deciding an autoApply
+ * verdict. Single sample is too noisy; ten is too patient. Three is
+ * a tight window: 0/3 successes after a "fix" is overwhelming evidence
+ * the fix didn't help.
+ */
+const AUTOAPPLY_VERDICT_WINDOW = 3;
 function loadState() {
     try {
         if (!existsSync(STATE_FILE))
@@ -109,12 +116,62 @@ export function recordEditsForFailingJobs(oldJobs, newJobs) {
     if (mutated)
         saveState(state);
 }
+/**
+ * Phase 8.1 — record a pending verification for an autoApply (advisor-rule
+ * or prompt-override) so the verifier can roll the fix back if the next
+ * AUTOAPPLY_VERDICT_WINDOW runs don't show improvement.
+ *
+ * Called from fix-applier.applyFix on success. Idempotent: if a previous
+ * verification for the same job is still pending, the new tracker overwrites
+ * it (the most-recent fix is the one we're verifying).
+ */
+export function recordAutoApplyForVerification(jobName, tracker) {
+    const state = loadState();
+    const broken = computeBrokenJobs();
+    const b = broken.find(x => x.jobName === jobName);
+    state.pending[jobName] = {
+        jobName,
+        recordedAt: new Date().toISOString(),
+        preFailureCount: b?.errorCount48h ?? 0,
+        preLastError: b?.lastErrors[0] ?? null,
+        autoApply: tracker,
+        postRunOutcomes: [],
+    };
+    saveState(state);
+    logger.info({ job: jobName, kind: tracker.kind, file: tracker.file }, 'Recorded autoApply for verification — will track next runs');
+}
+/**
+ * Undo an autoApply by deleting the file the apply wrote. Best-effort:
+ * a missing file is not an error (might have been hand-deleted). Returns
+ * true if a file was actually removed.
+ */
+function revertAutoApply(tracker) {
+    try {
+        if (existsSync(tracker.file)) {
+            // Use unlinkSync from fs — kept dynamic to avoid a top-of-file import
+            // we don't otherwise need.
+            const { unlinkSync } = require('node:fs');
+            unlinkSync(tracker.file);
+            logger.warn({ file: tracker.file, kind: tracker.kind }, 'Reverted autoApply — fix did not help');
+            return true;
+        }
+    }
+    catch (err) {
+        logger.warn({ err, file: tracker.file }, 'Failed to delete autoApply file during revert');
+    }
+    return false;
+}
 /**
  * After a cron run completes, check whether we were waiting on a fix
- * verification for this job. If so, send the owner a verdict and clear it.
+ * verification for this job. Two flows:
+ *
+ *   1. Hand-edit (CRON.md) — verdict on the FIRST non-skipped run. Original
+ *      Phase 7 behavior, preserved.
+ *   2. AutoApply (advisor-rule / prompt-override) — accumulate up to
+ *      AUTOAPPLY_VERDICT_WINDOW outcomes, then decide. If 0 successes,
+ *      revert the file. Either way, DM the verdict.
  *
- * Skipped runs (circuit breaker, pre-check exit, etc.) don't carry signal
- * and shouldn't count as a verdict either way.
+ * Skipped runs don't carry signal and don't advance the window in either flow.
  */
 export async function checkAndDeliverVerification(entry, send) {
     if (entry.status === 'skipped')
@@ -123,15 +180,60 @@ export async function checkAndDeliverVerification(entry, send) {
     const pending = state.pending[entry.jobName];
     if (!pending)
         return;
+    // Hand-edit flow — single-run verdict, unchanged.
+    if (!pending.autoApply) {
+        delete state.pending[entry.jobName];
+        saveState(state);
+        const ok = entry.status === 'ok';
+        const verdict = ok ? '✅ succeeded' : '⚠️ still failing';
+        const ageMin = Math.max(1, Math.round((Date.now() - Date.parse(pending.recordedAt)) / 60000));
+        const detail = ok ? '' : `\nError: ${(entry.error ?? 'unknown').split('\n')[0].slice(0, 200)}`;
+        const msg = `**[Fix verification]** \`${entry.jobName}\` ${verdict} on its first run after edit (${ageMin}m later).${detail}`;
+        try {
+            await send(msg);
+        }
+        catch (err) {
+            logger.warn({ err, job: entry.jobName }, 'Failed to send fix verification DM');
+        }
+        return;
+    }
+    // AutoApply flow — accumulate the sample first.
+    const outcomes = pending.postRunOutcomes ?? [];
+    outcomes.push(entry.status);
+    pending.postRunOutcomes = outcomes;
+    if (outcomes.length < AUTOAPPLY_VERDICT_WINDOW) {
+        // Not enough sample yet — persist accumulated state, wait for more runs.
+        saveState(state);
+        return;
+    }
+    // Decision time.
     delete state.pending[entry.jobName];
     saveState(state);
-    const ok = entry.status === 'ok';
-    const verdict = ok ? '✅ succeeded' : '⚠️ still failing';
+    const successes = outcomes.filter(o => o === 'ok').length;
     const ageMin = Math.max(1, Math.round((Date.now() - Date.parse(pending.recordedAt)) / 60000));
-    const detail = ok
-        ? ''
-        : `\nError: ${(entry.error ?? 'unknown').split('\n')[0].slice(0, 200)}`;
-    const msg = `**[Fix verification]** \`${entry.jobName}\` ${verdict} on its first run after edit (${ageMin}m later).${detail}`;
+    const tracker = pending.autoApply;
+    const scopeLabel = tracker.scope
+        ? `${tracker.kind}:${tracker.scope}${tracker.scopeKey ? `:${tracker.scopeKey}` : ''}`
+        : `${tracker.kind}${tracker.ruleId ? `:${tracker.ruleId}` : ''}`;
+    if (successes === 0) {
+        // Fix didn't help — revert and notify.
+        const reverted = revertAutoApply(tracker);
+        const msg = `**[Fix verification — REVERTED]** \`${entry.jobName}\`: ` +
+            `auto-applied ${scopeLabel} did not help (0/${outcomes.length} runs succeeded over ${ageMin}m). ` +
+            (reverted ? `Reverted ${path.basename(tracker.file)}.` : `Tried to revert but file was already gone.`);
+        try {
+            await send(msg);
+        }
+        catch (err) {
+            logger.warn({ err, job: entry.jobName }, 'Failed to send fix-revert DM');
+        }
+        logger.warn({ job: entry.jobName, scopeLabel, reverted }, 'Auto-reverted ineffective autoApply');
+        return;
+    }
+    const verdict = successes === outcomes.length
+        ? `✅ verified — ${successes}/${outcomes.length} runs succeeded`
+        : `⚠️ partial — ${successes}/${outcomes.length} runs succeeded`;
+    const msg = `**[Fix verification]** \`${entry.jobName}\`: auto-applied ${scopeLabel} ${verdict} over ${ageMin}m.`;
     try {
         await send(msg);
     }
@@ -143,4 +245,8 @@ export async function checkAndDeliverVerification(entry, send) {
 export function listPendingVerifications() {
     return Object.values(loadState().pending);
 }
+/** Test helper — clear all state. */
+export function _resetVerificationState() {
+    saveState({ pending: {} });
+}
 //# sourceMappingURL=fix-verification.js.map

package/dist/gateway/router.d.ts CHANGED Viewed

@@ -170,7 +170,7 @@ export declare class Gateway {
      * Get recent transcript activity across all sessions.
      * Used by heartbeat to know what happened since the last check.
      */
-    getRecentActivity(sinceIso: string): Array<{
+    getRecentActivity(sinceIso: string, maxEntries?: number): Array<{
         sessionKey: string;
         role: string;
         content: string;

package/dist/gateway/router.js CHANGED Viewed

@@ -1447,8 +1447,8 @@ export class Gateway {
      * Get recent transcript activity across all sessions.
      * Used by heartbeat to know what happened since the last check.
      */
-    getRecentActivity(sinceIso) {
-        return this.assistant.getRecentActivity(sinceIso);
+    getRecentActivity(sinceIso, maxEntries) {
+        return this.assistant.getRecentActivity(sinceIso, maxEntries);
     }
     /**
      * Search memory (FTS5) for context relevant to a query.

package/dist/memory/store.js CHANGED Viewed

@@ -1094,6 +1094,7 @@ export class MemoryStore {
         }
         const rows = this.conn.prepare(sql).all(...params);
         const scored = [];
+        const nowMs = Date.now();
         for (const row of rows) {
             try {
                 const vec = embeddingsModule.deserializeEmbedding(row.embedding);
@@ -1109,6 +1110,16 @@ export class MemoryStore {
                 // Soft isolation: apply boost (only when not strict)
                 if (!strict && agentSlug && row.agent_slug === agentSlug)
                     score *= 1.4;
+                // Temporal decay — same policy as FTS scoring (Phase 9d). Without
+                // this, vector and FTS rankings disagree on freshness: FTS prefers
+                // recent at equal relevance but vector treats all timestamps
+                // equally, so MMR rerank surfaces stale matches when vector wins.
+                // Same 30-day half-life, same 0.4 floor — see store.ts FTS path
+                // for design rationale.
+                if (row.updated_at) {
+                    const daysOld = Math.max(0, (nowMs - new Date(row.updated_at).getTime()) / 86_400_000);
+                    score *= Math.max(0.4, temporalDecay(daysOld, 30));
+                }
                 scored.push({
                     sourceFile: row.source_file,
                     section: row.section,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.1.6",
+  "version": "1.1.8",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",