npm - clementine-agent - Versions diffs - 1.0.91 → 1.0.93 - Mend

clementine-agent 1.0.91 → 1.0.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agent/self-improve-loop.d.ts +102 -0
package/dist/agent/self-improve-loop.js +347 -0
package/dist/index.js +7 -0
package/package.json +1 -1

package/dist/agent/self-improve-loop.d.ts ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * Clementine TypeScript — Self-improve loop (autonomous fix consumer).
+ *
+ * Closes the gap between "we noticed a job is failing" and "we did
+ * something about it." Periodically scans
+ * ~/.clementine/self-improve/triggers/*.json (written by cron-scheduler
+ * when consecutiveErrors >= 3), classifies the failure pattern from
+ * recentErrors, and either:
+ *
+ *   - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
+ *     and DMs the OWNING agent via their bot
+ *   - Writes a proposal to self-improve/pending-changes/ and DMs the
+ *     owning agent the diagnosis (full audit-inbox button approval is
+ *     a separate Phase 8b ship)
+ *
+ * After processing, the trigger file is removed. The existing
+ * fix-verification system (cron-scheduler.ts) records preFailureCount
+ * when a job's config changes and tracks whether the next run succeeds.
+ *
+ * Routing rule: notifications go to the owning agent's DM via their
+ * own bot using `dispatcher.send(text, { agentSlug })`. Unowned crons
+ * (no agentSlug) → Clementine's main bot DMs the owner.
+ *
+ * Idempotent: re-applying the same fix to an already-fixed job is a
+ * no-op; the trigger gets removed regardless.
+ */
+export interface TriggerFile {
+    jobName: string;
+    consecutiveErrors: number;
+    recentErrors: string[];
+    triggeredAt: string;
+}
+export type FixCategory = 'safe-cron-config' | 'risky' | 'noop' | 'unknown';
+export interface FixRecipe {
+    category: FixCategory;
+    /** Description of what this fix does, for DMs. */
+    description: string;
+    /**
+     * For safe-cron-config: a function that mutates the job's frontmatter
+     * entry in-place. Returns true if any change was made (false = idempotent
+     * no-op because the fix is already applied).
+     */
+    apply?: (job: Record<string, unknown>) => boolean;
+}
+export interface SelfImproveDispatcher {
+    send(text: string, context?: {
+        agentSlug?: string;
+    }): Promise<{
+        delivered: boolean;
+        channelErrors: Record<string, string>;
+    }>;
+}
+export interface SelfImproveLoopOptions {
+    /**
+     * Override the fallback safety-net tick interval. The loop is primarily
+     * event-driven; this is just a backstop. Used by tests to disable the
+     * timer entirely (set to 0 or a very large number).
+     */
+    tickMs?: number;
+    /** Override directories for tests. */
+    triggersDir?: string;
+    pendingDir?: string;
+    cronPath?: string;
+    /**
+     * Disable the fs.watch event-driven path. Tests use this so they can
+     * call tick() directly without racing the watcher.
+     */
+    disableWatch?: boolean;
+}
+export declare function classifyFailure(recentErrors: string[]): FixRecipe;
+export declare class SelfImproveLoop {
+    private readonly tickMs;
+    private readonly triggersDir;
+    private readonly pendingDir;
+    private readonly cronPath;
+    private readonly dispatcher;
+    private readonly watchEnabled;
+    private timer;
+    private watcher;
+    private debounceTimer;
+    private running;
+    private ticking;
+    constructor(dispatcher: SelfImproveDispatcher, opts?: SelfImproveLoopOptions);
+    start(): void;
+    stop(): void;
+    /** Coalesce a burst of fs.watch events (multiple triggers landing in
+     * quick succession) into a single tick. */
+    private scheduleDebouncedTick;
+    /**
+     * Process all pending triggers. Public so tests + manual invocations
+     * (e.g., a `clementine self-improve tick` CLI command) can call it.
+     */
+    tick(): Promise<{
+        processed: number;
+        applied: number;
+        pending: number;
+        noop: number;
+    }>;
+    private processOne;
+    private notifyAgent;
+}
+//# sourceMappingURL=self-improve-loop.d.ts.map

package/dist/agent/self-improve-loop.js ADDED Viewed

@@ -0,0 +1,347 @@
+/**
+ * Clementine TypeScript — Self-improve loop (autonomous fix consumer).
+ *
+ * Closes the gap between "we noticed a job is failing" and "we did
+ * something about it." Periodically scans
+ * ~/.clementine/self-improve/triggers/*.json (written by cron-scheduler
+ * when consecutiveErrors >= 3), classifies the failure pattern from
+ * recentErrors, and either:
+ *
+ *   - Auto-applies a safe cron-config fix (mode, max_hours, max_turns)
+ *     and DMs the OWNING agent via their bot
+ *   - Writes a proposal to self-improve/pending-changes/ and DMs the
+ *     owning agent the diagnosis (full audit-inbox button approval is
+ *     a separate Phase 8b ship)
+ *
+ * After processing, the trigger file is removed. The existing
+ * fix-verification system (cron-scheduler.ts) records preFailureCount
+ * when a job's config changes and tracks whether the next run succeeds.
+ *
+ * Routing rule: notifications go to the owning agent's DM via their
+ * own bot using `dispatcher.send(text, { agentSlug })`. Unowned crons
+ * (no agentSlug) → Clementine's main bot DMs the owner.
+ *
+ * Idempotent: re-applying the same fix to an already-fixed job is a
+ * no-op; the trigger gets removed regardless.
+ */
+import { existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, watch, writeFileSync, } from 'node:fs';
+import path from 'node:path';
+import matter from 'gray-matter';
+import pino from 'pino';
+import { BASE_DIR, SYSTEM_DIR } from '../config.js';
+const logger = pino({ name: 'clementine.self-improve-loop' });
+/**
+ * Fallback tick interval. The loop is primarily event-driven via fs.watch
+ * on the triggers directory — this is just a slow safety net for cases
+ * where fs.watch dropped an event (rare but possible) or where the
+ * daemon booted with triggers already in place from before fs.watch was
+ * registered. 1h is plenty: the upstream cron scheduler runs at most
+ * once per minute, and a job needs 3+ consecutive errors to even produce
+ * a trigger, so the situation is already hours-stale by the time we see
+ * a trigger.
+ */
+const FALLBACK_TICK_MS = 60 * 60 * 1000;
+/** Coalesce a burst of fs.watch events into a single tick. */
+const WATCH_DEBOUNCE_MS = 2000;
+const TRIGGERS_DIR = path.join(BASE_DIR, 'self-improve', 'triggers');
+const PENDING_CHANGES_DIR = path.join(BASE_DIR, 'self-improve', 'pending-changes');
+const CRON_PATH = path.join(SYSTEM_DIR, 'CRON.md');
+// ── Pattern recognition ──────────────────────────────────────────────
+const PATTERNS = [
+    {
+        // "Reached maximum number of turns (8)"
+        match: /Reached maximum number of turns/i,
+        recipe: () => ({
+            category: 'safe-cron-config',
+            description: 'Hit max-turns ceiling repeatedly. Switching to unleashed mode (multi-phase) so the job can complete its workflow.',
+            apply: (job) => {
+                let changed = false;
+                if (job.mode !== 'unleashed') {
+                    job.mode = 'unleashed';
+                    changed = true;
+                }
+                if (typeof job.max_hours !== 'number' || job.max_hours < 1) {
+                    job.max_hours = 1;
+                    changed = true;
+                }
+                return changed;
+            },
+        }),
+    },
+    {
+        // "Autocompact is thrashing: the context refilled to the limit within 3 turns"
+        match: /Autocompact is thrashing/i,
+        recipe: () => ({
+            category: 'safe-cron-config',
+            description: 'Context window blowing up mid-run. Switching to unleashed mode so each phase starts with a fresh context.',
+            apply: (job) => {
+                let changed = false;
+                if (job.mode !== 'unleashed') {
+                    job.mode = 'unleashed';
+                    changed = true;
+                }
+                if (typeof job.max_hours !== 'number' || job.max_hours < 1) {
+                    job.max_hours = 1;
+                    changed = true;
+                }
+                return changed;
+            },
+        }),
+    },
+    {
+        // Already-fixed-in-code patterns
+        match: /This model does not support user-configurable task budgets|Budget exceeded for cron job/i,
+        recipe: () => ({
+            category: 'noop',
+            description: 'Old taskBudget rejection — already addressed in v1.0.90 (taskBudget no longer passed to SDK). Trigger cleared without action.',
+        }),
+    },
+];
+export function classifyFailure(recentErrors) {
+    const blob = recentErrors.join('\n').slice(0, 4000);
+    for (const { match, recipe } of PATTERNS) {
+        if (match.test(blob))
+            return recipe();
+    }
+    return {
+        category: 'unknown',
+        description: 'Unrecognized failure pattern. Owner needs to inspect the trigger file.',
+    };
+}
+function loadCronJob(jobName, cronPath) {
+    if (!existsSync(cronPath))
+        return null;
+    const raw = readFileSync(cronPath, 'utf-8');
+    const parsed = matter(raw);
+    const jobs = (parsed.data.jobs ?? []);
+    const job = jobs.find((j) => String(j.name ?? '') === jobName);
+    if (!job)
+        return null;
+    const agentSlug = typeof job.agentSlug === 'string' ? job.agentSlug : (typeof job.agent_slug === 'string' ? job.agent_slug : undefined);
+    return { agentSlug, job, raw, parsed };
+}
+/**
+ * Apply the recipe's mutator to the job's frontmatter and write CRON.md
+ * back atomically. Returns true if a change was actually written.
+ */
+function applyCronEdit(jobName, recipe, cronPath) {
+    if (!recipe.apply)
+        return false;
+    const lookup = loadCronJob(jobName, cronPath);
+    if (!lookup) {
+        logger.warn({ jobName }, 'Job not found in CRON.md — cannot apply fix');
+        return false;
+    }
+    const changed = recipe.apply(lookup.job);
+    if (!changed)
+        return false;
+    // Re-stringify with the existing content body preserved.
+    const updated = matter.stringify(lookup.parsed.content, lookup.parsed.data);
+    writeFileSync(cronPath, updated);
+    return true;
+}
+function writePendingChange(record, dir) {
+    mkdirSync(dir, { recursive: true });
+    const file = path.join(dir, `${record.id}.json`);
+    writeFileSync(file, JSON.stringify(record, null, 2));
+    return file;
+}
+// ── Main loop ────────────────────────────────────────────────────────
+export class SelfImproveLoop {
+    tickMs;
+    triggersDir;
+    pendingDir;
+    cronPath;
+    dispatcher;
+    watchEnabled;
+    timer = null;
+    watcher = null;
+    debounceTimer = null;
+    running = false;
+    ticking = false;
+    constructor(dispatcher, opts = {}) {
+        this.dispatcher = dispatcher;
+        this.tickMs = opts.tickMs ?? FALLBACK_TICK_MS;
+        this.triggersDir = opts.triggersDir ?? TRIGGERS_DIR;
+        this.pendingDir = opts.pendingDir ?? PENDING_CHANGES_DIR;
+        this.cronPath = opts.cronPath ?? CRON_PATH;
+        this.watchEnabled = opts.disableWatch !== true;
+    }
+    start() {
+        if (this.running)
+            return;
+        this.running = true;
+        // Run immediately so any backlog from the prior daemon gets handled
+        // without a long wait.
+        this.tick().catch((err) => logger.error({ err }, 'Initial self-improve tick failed'));
+        // Event-driven primary path: watch the triggers dir. cron-scheduler
+        // writes a file when a job hits consErrors >= 3; we react within
+        // ~2 seconds (debounce window) instead of polling every 10 minutes
+        // for a directory that's empty 99% of the time.
+        if (this.watchEnabled) {
+            try {
+                mkdirSync(this.triggersDir, { recursive: true });
+                this.watcher = watch(this.triggersDir, (eventType, filename) => {
+                    if (eventType !== 'rename' || !filename || !filename.endsWith('.json'))
+                        return;
+                    this.scheduleDebouncedTick();
+                });
+            }
+            catch (err) {
+                logger.warn({ err, dir: this.triggersDir }, 'Failed to watch triggers dir — falling back to polling only');
+            }
+        }
+        // Slow fallback safety net — covers fs.watch event drops + boot-with-backlog.
+        this.timer = setInterval(() => {
+            this.tick().catch((err) => logger.error({ err }, 'Self-improve fallback tick failed'));
+        }, this.tickMs);
+        logger.info({ fallbackTickMs: this.tickMs, watchEnabled: this.watchEnabled && this.watcher !== null }, 'Self-improve loop started');
+    }
+    stop() {
+        if (!this.running)
+            return;
+        this.running = false;
+        if (this.timer) {
+            clearInterval(this.timer);
+            this.timer = null;
+        }
+        if (this.watcher) {
+            try {
+                this.watcher.close();
+            }
+            catch { /* ignore */ }
+            this.watcher = null;
+        }
+        if (this.debounceTimer) {
+            clearTimeout(this.debounceTimer);
+            this.debounceTimer = null;
+        }
+        logger.info('Self-improve loop stopped');
+    }
+    /** Coalesce a burst of fs.watch events (multiple triggers landing in
+     * quick succession) into a single tick. */
+    scheduleDebouncedTick() {
+        if (this.debounceTimer)
+            clearTimeout(this.debounceTimer);
+        this.debounceTimer = setTimeout(() => {
+            this.debounceTimer = null;
+            this.tick().catch((err) => logger.error({ err }, 'Self-improve event-driven tick failed'));
+        }, WATCH_DEBOUNCE_MS);
+    }
+    /**
+     * Process all pending triggers. Public so tests + manual invocations
+     * (e.g., a `clementine self-improve tick` CLI command) can call it.
+     */
+    async tick() {
+        if (this.ticking)
+            return { processed: 0, applied: 0, pending: 0, noop: 0 };
+        this.ticking = true;
+        const counts = { processed: 0, applied: 0, pending: 0, noop: 0 };
+        try {
+            if (!existsSync(this.triggersDir))
+                return counts;
+            let files;
+            try {
+                files = readdirSync(this.triggersDir).filter((f) => f.endsWith('.json'));
+            }
+            catch {
+                return counts;
+            }
+            for (const file of files) {
+                const triggerPath = path.join(this.triggersDir, file);
+                let trigger;
+                try {
+                    trigger = JSON.parse(readFileSync(triggerPath, 'utf-8'));
+                }
+                catch (err) {
+                    logger.warn({ err, file }, 'Failed to parse trigger — removing');
+                    try {
+                        unlinkSync(triggerPath);
+                    }
+                    catch { /* ignore */ }
+                    continue;
+                }
+                try {
+                    await this.processOne(trigger, counts);
+                }
+                catch (err) {
+                    logger.warn({ err, jobName: trigger.jobName }, 'Failed to process trigger — leaving in place for next tick');
+                    continue;
+                }
+                // Successfully handled — remove the trigger
+                try {
+                    unlinkSync(triggerPath);
+                }
+                catch { /* ignore */ }
+                counts.processed++;
+            }
+        }
+        finally {
+            this.ticking = false;
+        }
+        if (counts.processed > 0) {
+            logger.info(counts, 'Self-improve loop: processed triggers');
+        }
+        return counts;
+    }
+    async processOne(trigger, counts) {
+        const recipe = classifyFailure(trigger.recentErrors);
+        const lookup = loadCronJob(trigger.jobName, this.cronPath);
+        const agentSlug = lookup?.agentSlug;
+        if (recipe.category === 'safe-cron-config') {
+            const applied = applyCronEdit(trigger.jobName, recipe, this.cronPath);
+            if (applied) {
+                counts.applied++;
+                await this.notifyAgent(agentSlug, [
+                    `🔧 **Auto-fixed** \`${trigger.jobName}\` after ${trigger.consecutiveErrors} consecutive failures.`,
+                    '',
+                    recipe.description,
+                    '',
+                    'I\'ll watch the next run to confirm it lands cleanly.',
+                ].join('\n'));
+            }
+            else {
+                counts.noop++;
+                logger.info({ jobName: trigger.jobName }, 'Fix recipe applied is already in place — trigger removed without further action');
+            }
+            return;
+        }
+        if (recipe.category === 'noop') {
+            counts.noop++;
+            logger.info({ jobName: trigger.jobName, reason: recipe.description }, 'Self-improve: no-op');
+            return;
+        }
+        // risky | unknown → write proposal + DM agent
+        const id = `proposal-${Date.now()}-${trigger.jobName.replace(/[^a-z0-9-]/gi, '_')}`;
+        const record = {
+            id,
+            jobName: trigger.jobName,
+            ...(agentSlug ? { agentSlug } : {}),
+            category: recipe.category,
+            description: recipe.description,
+            recentErrors: trigger.recentErrors,
+            consecutiveErrors: trigger.consecutiveErrors,
+            proposedAt: new Date().toISOString(),
+        };
+        const file = writePendingChange(record, this.pendingDir);
+        counts.pending++;
+        await this.notifyAgent(agentSlug, [
+            `⚠️ **${trigger.jobName}** has failed ${trigger.consecutiveErrors} times in a row.`,
+            '',
+            recipe.description,
+            '',
+            `Proposal saved to \`${file}\`. Review when convenient.`,
+            '',
+            '_(approve flow via #audit-inbox buttons coming in P8b)_',
+        ].join('\n'));
+    }
+    async notifyAgent(agentSlug, message) {
+        try {
+            await this.dispatcher.send(message, agentSlug && agentSlug !== 'clementine' ? { agentSlug } : {});
+        }
+        catch (err) {
+            logger.debug({ err, agentSlug }, 'Failed to dispatch self-improve notification (non-fatal)');
+        }
+    }
+}
+//# sourceMappingURL=self-improve-loop.js.map

package/dist/index.js CHANGED Viewed

@@ -665,6 +665,11 @@ async function asyncMain() {
     // agent's profile and routes output to their Discord channel.
     const { AgentHeartbeatManager } = await import('./gateway/agent-heartbeat-manager.js');
     const agentHeartbeats = new AgentHeartbeatManager(gateway.getAgentManager(), gateway);
+    // Self-improve loop — closes the gap between "trigger written" and
+    // "fix applied." Every 10 min, scans self-improve/triggers/, classifies
+    // failures, auto-applies safe cron-config fixes, escalates risky ones.
+    const { SelfImproveLoop } = await import('./agent/self-improve-loop.js');
+    const selfImproveLoop = new SelfImproveLoop(dispatcher);
     // ── Build channel tasks ──────────────────────────────────────────
     const channelTasks = [];
     const activeChannels = [];
@@ -762,6 +767,7 @@ async function asyncMain() {
     heartbeat.start();
     cronScheduler.start();
     agentHeartbeats.start();
+    selfImproveLoop.start();
     // Background-task hygiene: any task left in 'running' is from a prior
     // process. Mark them aborted so the lifecycle is honest. (P6b will add
     // resumability; for now fail-fast is clearer than silently re-running.)
@@ -966,6 +972,7 @@ async function asyncMain() {
     heartbeat.stop();
     cronScheduler.stop();
     agentHeartbeats.stop();
+    selfImproveLoop.stop();
     // ── Self-restart (enhanced with health check + rollback) ────────
     if (restartRequested) {
         // Clear our PID file BEFORE spawning the child, so ensureSingleton()

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.0.91",
+  "version": "1.0.93",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",