npm - clementine-agent - Versions diffs - 1.18.180 → 1.18.183 - Mend

clementine-agent 1.18.180 → 1.18.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/agent/assistant.js +2 -0
package/dist/agent/background-tasks.d.ts +20 -0
package/dist/agent/background-tasks.js +43 -0
package/dist/agent/run-skill.js +34 -28
package/dist/cli/dashboard.js +128 -5
package/dist/gateway/cron-scheduler.d.ts +13 -0
package/dist/gateway/cron-scheduler.js +65 -5
package/dist/index.js +14 -0
package/dist/lib/billing-buckets.d.ts +52 -0
package/dist/lib/billing-buckets.js +108 -0
package/dist/types.d.ts +1 -0
package/package.json +1 -1

package/dist/agent/assistant.js CHANGED Viewed

@@ -1208,6 +1208,8 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
 **Remembering:** Durable facts → memory_write(action="update_memory"). Daily context → note_take / memory_write(action="append_daily"). New person → note_create. New task → task_add.
 Save important facts immediately; a background agent also extracts after each exchange.
+**Recalling — REQUIRED behavior:** When the user references past work you don't have in immediate context — a URL, a deployment, a file you created, a task or background job you ran, a person/project/domain name you don't have inline — call \`memory_search\` (or \`transcript_search\` for chat history) BEFORE asking the user to provide it and BEFORE replying that you have no record. Saying "I don't see any record of that" without having searched is a memory failure, not an honest answer. Background tasks, cron runs, deployments, and prior chat turns are all in the SQLite memory store with dense embeddings — semantic search will surface them even when the wording doesn't match exactly.
 ## Self-Configuration (never tell ${owner} to edit a config file)
 Clementine is self-configuring. Every credential, every integration, every tool permission can be set by calling a tool — no hand-editing.

package/dist/agent/background-tasks.d.ts CHANGED Viewed

@@ -64,6 +64,26 @@ export declare function resumeBackgroundTask(id: string, opts?: BackgroundTaskOp
 export declare function interruptStaleRunningTasks(opts?: BackgroundTaskOptions): number;
 /** Backward-compatible export for callers/tests using the old name. */
 export declare const abortStaleRunningTasks: typeof interruptStaleRunningTasks;
+/**
+ * Find background tasks whose lifecycle messages were never mirrored into
+ * the originating chat session's memory — typically because they completed
+ * before 1.18.180 wired the mirror, or because the daemon was down when the
+ * delivery would have fired. Returns terminal-state tasks (done / failed /
+ * interrupted / aborted) that:
+ *   - have a sessionKey (so we know where to mirror them)
+ *   - lack a `mirroredAt` flag (haven't been mirrored yet)
+ *   - completed within the recency window (default: last 7 days)
+ *
+ * Caller (typically the cron-scheduler on daemon start) is responsible for
+ * doing the actual mirror via gateway.injectContext and then stamping each
+ * task with `markBackgroundTaskMirrored(id)`. Keeping the injection out of
+ * this module avoids a dependency cycle on the gateway.
+ */
+export declare function findUnmirroredDeliveries(opts?: BackgroundTaskOptions & {
+    sinceMs?: number;
+}): BackgroundTask[];
+/** Stamp `mirroredAt` so future boots don't re-mirror the same delivery. */
+export declare function markBackgroundTaskMirrored(id: string, opts?: BackgroundTaskOptions): void;
 /** Delete a task file. Callers should avoid deleting active tasks. */
 export declare function deleteBackgroundTask(id: string, opts?: BackgroundTaskOptions): void;
 /** Backward-compatible test helper alias. */

package/dist/agent/background-tasks.js CHANGED Viewed

@@ -205,6 +205,49 @@ export function interruptStaleRunningTasks(opts) {
 }
 /** Backward-compatible export for callers/tests using the old name. */
 export const abortStaleRunningTasks = interruptStaleRunningTasks;
+/**
+ * Find background tasks whose lifecycle messages were never mirrored into
+ * the originating chat session's memory — typically because they completed
+ * before 1.18.180 wired the mirror, or because the daemon was down when the
+ * delivery would have fired. Returns terminal-state tasks (done / failed /
+ * interrupted / aborted) that:
+ *   - have a sessionKey (so we know where to mirror them)
+ *   - lack a `mirroredAt` flag (haven't been mirrored yet)
+ *   - completed within the recency window (default: last 7 days)
+ *
+ * Caller (typically the cron-scheduler on daemon start) is responsible for
+ * doing the actual mirror via gateway.injectContext and then stamping each
+ * task with `markBackgroundTaskMirrored(id)`. Keeping the injection out of
+ * this module avoids a dependency cycle on the gateway.
+ */
+export function findUnmirroredDeliveries(opts) {
+    const sinceMs = opts?.sinceMs ?? 7 * 24 * 60 * 60_000;
+    const cutoff = Date.now() - sinceMs;
+    const terminal = ['done', 'failed', 'interrupted', 'aborted'];
+    const out = [];
+    for (const status of terminal) {
+        for (const task of listBackgroundTasks({ status }, opts)) {
+            if (task.mirroredAt)
+                continue; // already mirrored on a prior boot
+            if (!task.sessionKey)
+                continue; // no chat to mirror back to
+            const stampIso = task.completedAt ?? task.interruptedAt ?? task.createdAt;
+            const stamp = Date.parse(stampIso ?? '');
+            if (Number.isFinite(stamp) && stamp < cutoff)
+                continue;
+            out.push(task);
+        }
+    }
+    return out;
+}
+/** Stamp `mirroredAt` so future boots don't re-mirror the same delivery. */
+export function markBackgroundTaskMirrored(id, opts) {
+    const task = loadBackgroundTask(id, opts);
+    if (!task)
+        return;
+    task.mirroredAt = new Date().toISOString();
+    safeWrite(pathFor(id, opts), task);
+}
 /** Delete a task file. Callers should avoid deleting active tasks. */
 export function deleteBackgroundTask(id, opts) {
     try {

package/dist/agent/run-skill.js CHANGED Viewed

@@ -219,18 +219,26 @@ function shouldAutoDelegate(skill, source) {
     return true;
 }
 /**
- * Resolve the model string to use for an autonomous run. The 1M-context
- * variant gives the worker subagent 5× the room of the standard 200K
- * window — enough headroom that compaction is rare and the
- * "refetch-after-compact" loop pattern (seen in the 2026-05-11
- * imessage-triage failures) never occurs in practice.
+ * Resolve the model string to use for an autonomous run.
  *
- * The actual 1M routing is gated by the user's plan (see
- * config.ts:usesOneMillionContext) and the model family — Haiku doesn't
- * support 1M, and Sonnet 1M needs the [1m] suffix. We return the full
- * Sonnet model ID with [1m] appended; downstream
- * normalizeClaudeSdkOptionsForOneMillionContext strips it back off when
- * the plan doesn't support it.
+ * **Default: plain Sonnet (200K).** Sonnet `[1m]` is the "Extra Usage
+ * path" on Anthropic's billing — it is NOT covered by Max/Team/Enterprise
+ * subscriptions, regardless of `CLEMENTINE_1M_CONTEXT_MODE` (the mode
+ * flag only governs Opus long-context, which Max does cover). Defaulting
+ * autonomous work to Sonnet [1m] silently routes cron, scheduled-skill,
+ * heartbeat, and team-task runs onto a separate metered bill — surprising
+ * on Max plans where the standard Sonnet meter stays quiet but weekly
+ * usage climbs.
+ *
+ * Compaction risk on the 200K window is mitigated by the auto-delegating
+ * wrapper (1.18.173): the worker subagent runs in an isolated context
+ * containing only the skill body + its own tool turns, so even
+ * data-heavy procedures comfortably fit.
+ *
+ * Skills that genuinely need 1M (rare — verify the workload first) opt
+ * in explicitly via frontmatter `clementine.limits.model:
+ * claude-sonnet-4-6[1m]` (Extra Usage) or `claude-opus-4-7[1m]` (covered
+ * by Max). Callers may also override per-invocation via `options.model`.
  */
 function resolveAutonomousModel(explicitModel, skillModel) {
     // Caller's explicit model wins.
@@ -239,22 +247,17 @@ function resolveAutonomousModel(explicitModel, skillModel) {
     // Skill-declared model wins next.
     if (skillModel)
         return skillModel;
-    // Default: Sonnet [1m]. The normalizer will strip [1m] if the user's
-    // plan doesn't include it, falling back to standard Sonnet — still
-    // works, just with less headroom.
-    const base = MODELS.sonnet;
-    if (!base)
-        return undefined;
-    if (/\[1m\]/i.test(base))
-        return base;
-    return `${base}[1m]`;
+    // Default: plain Sonnet (no [1m]). Stays on the standard Sonnet meter
+    // covered by Max plans; no Extra Usage exposure.
+    return MODELS.sonnet;
 }
 /**
  * Build the AgentDefinition for the `skill-worker` subagent that
  * executes this skill in an isolated context. The subagent's system
  * prompt is the skill body; its tools are the skill's computed
- * allowlist; its model is the same 1M-context model the parent uses
- * (the worker is where the real data flows — the parent stays tiny).
+ * allowlist; its model is whatever resolveAutonomousModel returned —
+ * by default plain Sonnet 200K, which the isolated worker context
+ * comfortably fits without compaction.
  *
  * `description` is what the SDK shows the parent for routing decisions.
  * Since the parent is `forceSubagent`'d to this worker, the description
@@ -275,8 +278,9 @@ function buildSkillWorkerAgent(skill, renderedProcedure, effectiveTools, model,
             `## Procedure\n\n${renderedProcedure}`,
         tools: effectiveTools,
         // SDK accepts 'sonnet' / 'opus' / 'haiku' tier aliases OR full model
-        // IDs. We pass the full ID with [1m] when present; the SDK strips
-        // [1m] internally for plans that don't support it.
+        // IDs. Default is plain Sonnet (200K); when a skill or caller opts
+        // into a [1m] variant explicitly, we pass it through and the SDK
+        // strips [1m] internally for plans that don't support it.
         ...(model ? { model } : {}),
         effort: 'medium',
         maxTurns: workerMaxTurns,
@@ -368,10 +372,12 @@ export async function runSkill(name, options = {}) {
         ...(skill.layout === 'folder' ? [path.dirname(skill.filePath)] : []),
     ];
     const mutatingSkill = effectiveTools.some((t) => t === 'Write' || t === 'Edit' || t === 'Bash' || /__(write|edit|update|create|delete|send|post|patch|set)/i.test(t));
-    // 1.18.173: resolve the effective model. Autonomous runs default to
-    // Sonnet [1m] (1M context window) so the worker subagent has 5× the
-    // room of a standard 200K-window model. resolveAutonomousModel honors
-    // explicit overrides + skill-declared limits.model first.
+    // 1.18.182: resolve the effective model. Autonomous runs default to
+    // plain Sonnet (200K) — covered by the standard Sonnet meter on Max,
+    // no Extra Usage exposure. Worker-subagent isolation (1.18.173) keeps
+    // the 200K window comfortably under compaction even for heavy skills.
+    // resolveAutonomousModel honors explicit overrides + skill-declared
+    // limits.model first, so a skill that genuinely needs 1M can opt in.
     const skillModel = skill.frontmatter?.clementine?.limits?.model;
     const effectiveModel = autoDelegate
         ? resolveAutonomousModel(options.model, skillModel)

package/dist/cli/dashboard.js CHANGED Viewed

@@ -10667,7 +10667,7 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
     // ── Token Usage API ──────────────────────────────────────────────
     app.get('/api/metrics/usage', async (_req, res) => {
         if (!existsSync(MEMORY_DB_PATH)) {
-            res.json({ error: 'No DB', totalTokens: 0, byModel: [], bySource: [], byDay: [] });
+            res.json({ error: 'No DB', totalTokens: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
             return;
         }
         const Database = (await import('better-sqlite3')).default;
@@ -10676,32 +10676,86 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
             // Check if table exists
             const tableExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='usage_log'").get();
             if (!tableExists) {
-                res.json({ totalTokens: 0, totalInput: 0, totalOutput: 0, byModel: [], bySource: [], byDay: [] });
+                res.json({ totalTokens: 0, totalInput: 0, totalOutput: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
                 return;
             }
+            // 1.18.183: cost_cents may not exist on older installs (added via
+            // ALTER at store.ts:675). Probe before referencing so we degrade
+            // to "no cost data" rather than erroring out.
+            const columns = new Set(db.prepare('PRAGMA table_info(usage_log)').all().map((c) => c.name));
+            const costExpr = columns.has('cost_cents') ? 'COALESCE(SUM(cost_cents), 0)' : '0';
             const totals = db.prepare(`SELECT COALESCE(SUM(input_tokens), 0) as ti, COALESCE(SUM(output_tokens), 0) as to_,
-                COALESCE(SUM(cache_read_tokens), 0) as tcr, COALESCE(SUM(cache_creation_tokens), 0) as tcc
+                COALESCE(SUM(cache_read_tokens), 0) as tcr, COALESCE(SUM(cache_creation_tokens), 0) as tcc,
+                ${costExpr} as cost
          FROM usage_log`).get();
-            const byModel = db.prepare(`SELECT model, SUM(input_tokens) as input, SUM(output_tokens) as output, SUM(cache_read_tokens) as cacheRead
+            const byModel = db.prepare(`SELECT model, SUM(input_tokens) as input, SUM(output_tokens) as output,
+                SUM(cache_read_tokens) as cacheRead, ${costExpr} as costCents, COUNT(*) as queries
          FROM usage_log GROUP BY model ORDER BY input DESC`).all();
             const bySource = db.prepare(`SELECT source, SUM(input_tokens) as input, SUM(output_tokens) as output
          FROM usage_log GROUP BY source ORDER BY input DESC`).all();
             const byDay = db.prepare(`SELECT date(created_at) as day, SUM(input_tokens) as input, SUM(output_tokens) as output
          FROM usage_log WHERE created_at >= date('now', '-7 days')
          GROUP BY date(created_at) ORDER BY day`).all();
+            // 1.18.183: per-bucket aggregation. Same byModel rows, classified
+            // by billing bucket (Sonnet 200K / Sonnet 1M Extra Usage / Opus /
+            // Opus 1M / Haiku) so the dashboard can render Max-meter vs
+            // Extra-Usage breakdown. See src/lib/billing-buckets.ts.
+            const { classifyBillingBucket, BUCKET_DISPLAY_ORDER } = await import('../lib/billing-buckets.js');
+            const bucketMap = new Map();
+            for (const row of byModel) {
+                const b = classifyBillingBucket(row.model);
+                const existing = bucketMap.get(b.id);
+                if (existing) {
+                    existing.costCents += Number(row.costCents) || 0;
+                    existing.inputTokens += Number(row.input) || 0;
+                    existing.outputTokens += Number(row.output) || 0;
+                    existing.queries += Number(row.queries) || 0;
+                    if (!existing.models.includes(row.model))
+                        existing.models.push(row.model);
+                }
+                else {
+                    bucketMap.set(b.id, {
+                        id: b.id,
+                        label: b.label,
+                        family: b.family,
+                        context: b.context,
+                        meteredOnMax: b.meteredOnMax,
+                        costCents: Number(row.costCents) || 0,
+                        inputTokens: Number(row.input) || 0,
+                        outputTokens: Number(row.output) || 0,
+                        queries: Number(row.queries) || 0,
+                        models: [row.model],
+                    });
+                }
+            }
+            // Render in canonical order (Extra Usage anchors last so callouts
+            // hit the eye after the in-plan rows).
+            const byBucket = BUCKET_DISPLAY_ORDER
+                .map((id) => bucketMap.get(id))
+                .filter((b) => b !== undefined);
+            const bucketTotals = byBucket.reduce((acc, b) => {
+                if (b.meteredOnMax === 'extra')
+                    acc.extraCostCents += b.costCents;
+                else
+                    acc.planCostCents += b.costCents;
+                return acc;
+            }, { planCostCents: 0, extraCostCents: 0 });
             res.json({
                 totalInput: totals.ti,
                 totalOutput: totals.to_,
                 totalCacheRead: totals.tcr,
                 totalCacheCreation: totals.tcc,
                 totalTokens: totals.ti + totals.to_,
+                totalCostCents: totals.cost,
                 byModel,
                 bySource,
                 byDay,
+                byBucket,
+                bucketTotals,
             });
         }
         catch (err) {
-            res.json({ error: String(err), totalTokens: 0, byModel: [], bySource: [], byDay: [] });
+            res.json({ error: String(err), totalTokens: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
         }
         finally {
             db.close();
@@ -37199,6 +37253,16 @@ function formatTokens(n) {
   return String(n);
 }
+// 1.18.183: dollars for the billing-bucket panel. Cents in, "$X.XX"
+// out. Sub-cent values render as "<$0.01" so a tiny but nonzero
+// Extra Usage line still reads as "you have exposure" rather than "$0".
+function formatCents(cents) {
+  var c = Number(cents) || 0;
+  if (c === 0) return '$0.00';
+  if (c > 0 && c < 1) return '<$0.01';
+  return '$' + (c / 100).toFixed(2);
+}
 function formatBytes(n) {
   if (n == null) return '—';
   if (n < 1024) return n + ' B';
@@ -38610,6 +38674,65 @@ async function refreshMetrics() {
     html += statTile(cacheEff + '%', 'Cache Hit Rate', cacheEff >= 50 ? 'var(--green)' : cacheEff >= 20 ? 'var(--yellow)' : 'var(--text-muted)');
     html += '</div>';
+    // 1.18.183: Spend by Billing Bucket — separates Max-covered usage
+    // from Extra Usage exposure (Sonnet [1m] etc.). Surfaces the bucket
+    // a model lives in, not just its token count, so a quiet Sonnet
+    // meter no longer hides rising weekly spend on a separate billing
+    // line. See src/lib/billing-buckets.ts.
+    if (u.byBucket && u.byBucket.length > 0) {
+      var planCost = (u.bucketTotals && u.bucketTotals.planCostCents) || 0;
+      var extraCost = (u.bucketTotals && u.bucketTotals.extraCostCents) || 0;
+      var totalCost = planCost + extraCost;
+      var hasExtra = extraCost > 0;
+      html += '<div class="card" style="margin-top:16px"><div class="card-header">'
+        + 'Spend by Billing Bucket'
+        + '<span style="float:right;font-size:11px;color:var(--text-muted);font-weight:400">'
+        + 'In-plan ' + esc(formatCents(planCost))
+        + ' &middot; Extra Usage ' + (hasExtra
+          ? '<span style="color:var(--orange,#f80);font-weight:600">' + esc(formatCents(extraCost)) + '</span>'
+          : esc(formatCents(extraCost)))
+        + '</span></div><div class="card-body">';
+      // Banner when Extra Usage > 0 — this is the whole point of the
+      // panel. Max plans don't comp Sonnet [1m]; surfacing it here means
+      // the user can spot it without checking the Anthropic Console.
+      if (hasExtra) {
+        html += '<div style="margin-bottom:12px;padding:10px 12px;border-radius:6px;'
+          + 'background:rgba(255,128,0,0.08);border-left:3px solid var(--orange,#f80);'
+          + 'font-size:12px;line-height:1.5">'
+          + '<strong>Extra Usage detected.</strong> '
+          + esc(formatCents(extraCost))
+          + ' of recent spend is on the Anthropic Extra Usage path (typically Sonnet 1M), '
+          + 'which is <strong>not</strong> covered by Max / Team / Enterprise subscriptions. '
+          + 'Switch heavy autonomous skills to <code>claude-opus-4-7[1m]</code> via skill frontmatter '
+          + '<code>clementine.limits.model</code>, or drop <code>[1m]</code> to stay on the standard Sonnet meter.'
+          + '</div>';
+      }
+      var maxBucketCost = Math.max.apply(null, u.byBucket.map(function(b) { return b.costCents || 0; }).concat([1]));
+      for (var bi = 0; bi < u.byBucket.length; bi++) {
+        var bk = u.byBucket[bi];
+        var bkPct = maxBucketCost > 0 ? Math.round(((bk.costCents || 0) / maxBucketCost) * 100) : 0;
+        var bkColor = bk.meteredOnMax === 'extra' ? 'var(--orange,#f80)'
+          : (bk.family === 'opus' ? 'var(--purple)'
+            : (bk.family === 'sonnet' ? 'var(--blue)'
+              : (bk.family === 'haiku' ? 'var(--green)' : 'var(--text-muted)')));
+        var bkShareNum = totalCost > 0 ? Math.round(((bk.costCents || 0) / totalCost) * 100) : 0;
+        html += '<div style="margin-bottom:10px">'
+          + '<div class="kv-row">'
+          + '<span class="kv-key">' + esc(bk.label) + '</span>'
+          + '<span class="kv-val" title="' + esc(formatTokens(bk.inputTokens || 0)) + ' input &middot; ' + esc(formatTokens(bk.outputTokens || 0)) + ' output &middot; ' + (bk.queries || 0) + ' calls">'
+          + esc(formatCents(bk.costCents || 0))
+          + '<span style="color:var(--text-muted);font-size:11px;margin-left:6px">' + bkShareNum + '%</span>'
+          + '</span>'
+          + '</div>'
+          + '<div class="metric-bar-track"><div class="metric-bar-fill" style="width:' + bkPct + '%;background:' + bkColor + '"></div></div>'
+          + '</div>';
+      }
+      html += '</div></div>';
+    }
     // Tokens by Model
     if (u.byModel && u.byModel.length > 0) {
       html += '<div class="card"><div class="card-header">Tokens by Model</div><div class="card-body">';

package/dist/gateway/cron-scheduler.d.ts CHANGED Viewed

@@ -195,8 +195,21 @@ export declare class CronScheduler {
      * any record of a Netlify site." injectContext writes into both the
      * pending-context map (visible to the next SDK turn) and the memory
      * store (searchable later by the assistant).
+     *
+     * If `taskId` is provided, stamps the task with `mirroredAt` so the
+     * startup backfill won't replay it on the next daemon restart.
      */
     private mirrorBackgroundTaskToChat;
+    /**
+     * Boot-time backfill. Mirrors any terminal-state background task whose
+     * lifecycle message never landed in the originating chat session's
+     * memory — typically because it finished before 1.18.180 wired the
+     * mirror, or because the daemon was down when delivery would have
+     * fired. Idempotent via the `mirroredAt` flag on each task file.
+     */
+    mirrorOrphanedBackgroundDeliveries(): {
+        mirrored: number;
+    };
     /** Same idea for workflows. Workflows can be agent-scoped via WorkflowDefinition.agentSlug. */
     private dispatchContextForWorkflow;
     private runJob;

package/dist/gateway/cron-scheduler.js CHANGED Viewed

@@ -39,7 +39,7 @@ import { parseAllWorkflows as parseAllWorkflowsSync } from '../agent/workflow-ru
 import { SelfImproveLoop } from '../agent/self-improve.js';
 import { loadPromptOverridesForJob, watchPromptOverrides } from '../agent/prompt-overrides/loader.js';
 import { logAuditJsonl } from '../agent/hooks.js';
-import { listBackgroundTasks, loadBackgroundTask, markDone as markBgTaskDone, markFailed as markBgTaskFailed, markRunning as markBgTaskRunning, updateBackgroundTask, } from '../agent/background-tasks.js';
+import { findUnmirroredDeliveries, listBackgroundTasks, loadBackgroundTask, markBackgroundTaskMirrored, markDone as markBgTaskDone, markFailed as markBgTaskFailed, markRunning as markBgTaskRunning, updateBackgroundTask, } from '../agent/background-tasks.js';
 import { outcomeStatusFromGoalDisposition, recentDecisions, recordDecisionOutcome, } from '../agent/proactive-ledger.js';
 import { formatCreditBlock, getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked, } from './credit-guard.js';
 import { isRunHealthFailure } from './job-health.js';
@@ -953,8 +953,11 @@ export class CronScheduler {
      * any record of a Netlify site." injectContext writes into both the
      * pending-context map (visible to the next SDK turn) and the memory
      * store (searchable later by the assistant).
+     *
+     * If `taskId` is provided, stamps the task with `mirroredAt` so the
+     * startup backfill won't replay it on the next daemon restart.
      */
-    mirrorBackgroundTaskToChat(sessionKey, userTextPlaceholder, assistantText) {
+    mirrorBackgroundTaskToChat(sessionKey, userTextPlaceholder, assistantText, taskId) {
         if (!sessionKey)
             return;
         try {
@@ -963,11 +966,47 @@ export class CronScheduler {
                 model: 'bg-task',
                 countExchange: true,
             });
+            if (taskId) {
+                try {
+                    markBackgroundTaskMirrored(taskId);
+                }
+                catch { /* non-fatal */ }
+            }
         }
         catch (err) {
             logger.debug({ err, sessionKey }, 'Failed to mirror background task message into chat memory');
         }
     }
+    /**
+     * Boot-time backfill. Mirrors any terminal-state background task whose
+     * lifecycle message never landed in the originating chat session's
+     * memory — typically because it finished before 1.18.180 wired the
+     * mirror, or because the daemon was down when delivery would have
+     * fired. Idempotent via the `mirroredAt` flag on each task file.
+     */
+    mirrorOrphanedBackgroundDeliveries() {
+        let mirrored = 0;
+        try {
+            for (const task of findUnmirroredDeliveries()) {
+                const promptSnippet = (task.prompt ?? '').slice(0, 200);
+                const headSummary = `${task.id} (${task.status})`;
+                const body = (task.result ?? task.error ?? '(no saved output)').slice(0, 1500);
+                const placeholder = `[Background task ${headSummary} delivered: ${promptSnippet}]`;
+                const message = task.status === 'done'
+                    ? `**Background task ${task.id} done** — ${promptSnippet}\n\n${body}`
+                    : `**Background task ${task.id} ${task.status}** — ${promptSnippet}\n\n${body}`;
+                this.mirrorBackgroundTaskToChat(task.sessionKey, placeholder, message, task.id);
+                mirrored++;
+            }
+            if (mirrored > 0) {
+                logger.info({ mirrored }, 'Mirrored orphaned background task deliveries into chat memory');
+            }
+        }
+        catch (err) {
+            logger.warn({ err }, 'Background-task backfill failed — non-fatal');
+        }
+        return { mirrored };
+    }
     /** Same idea for workflows. Workflows can be agent-scoped via WorkflowDefinition.agentSlug. */
     dispatchContextForWorkflow(name) {
         const wf = this.workflowDefs.find(w => w.name === name);
@@ -1947,6 +1986,9 @@ export class CronScheduler {
             // memory so the assistant remembers it has a task running. Without
             // this, the next chat turn the user sends comes back to a session
             // that has no idea any bg: work was ever queued.
+            // Note: we do NOT stamp `mirroredAt` here — that's reserved for the
+            // terminal-state mirror (done/failed) so the backfill only counts
+            // deliveries, not intent-to-run.
             this.mirrorBackgroundTaskToChat(started.sessionKey, `[Background task ${started.id} queued: ${started.prompt.slice(0, 200)}]`, startMessage);
             // Don't await — fire-and-forget. The 3s tick continues to scan.
             const maxHours = Math.max(0.05, started.maxMinutes / 60);
@@ -1992,8 +2034,10 @@ export class CronScheduler {
                     .catch((err) => logger.debug({ err, id: started.id }, 'Failed to dispatch background task result'));
                 // Mirror into chat memory so a follow-up like "fix the site"
                 // doesn't get a blank stare — the assistant needs to remember
-                // it just deployed something and where it lives.
-                this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Background task ${completed.id} delivered: ${started.prompt.slice(0, 200)}]`, deliveryMessage);
+                // it just deployed something and where it lives. Stamp
+                // `mirroredAt` so the startup backfill won't replay this on the
+                // next restart.
+                this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Background task ${completed.id} delivered: ${started.prompt.slice(0, 200)}]`, deliveryMessage, completed.id);
             }).catch((err) => {
                 clearInterval(progressTimer);
                 const errStr = String(err).slice(0, 500);
@@ -2010,7 +2054,7 @@ export class CronScheduler {
                     .catch(() => { });
                 // Mirror failures too — the next chat turn should know the task
                 // died rather than silently pretending it never happened.
-                this.mirrorBackgroundTaskToChat(failed.sessionKey, `[Background task ${failed.id} failed: ${started.prompt.slice(0, 200)}]`, failMessage);
+                this.mirrorBackgroundTaskToChat(failed.sessionKey, `[Background task ${failed.id} failed: ${started.prompt.slice(0, 200)}]`, failMessage, failed.id);
             });
         }
     }
@@ -2392,6 +2436,14 @@ export class CronScheduler {
             const response = await this.gateway.handleWorkflow(wf, inputs ?? {});
             if (response && response !== '*(workflow completed — no output)*') {
                 await this.dispatcher.send(`**[Workflow: ${name}]**\n\n${response.slice(0, 1500)}`, this.dispatchContextForWorkflow(name));
+                // Mirror under a workflow-scoped session so semantic search can
+                // surface this run regardless of who triggered it.
+                try {
+                    this.gateway.injectContext(`workflow:${name}`, `[Workflow ${name} ran]`, response, { pending: false, model: 'workflow', countExchange: true });
+                }
+                catch (err) {
+                    logger.debug({ err, workflow: name }, 'workflow transcript mirror failed (non-fatal)');
+                }
                 // Inject into owner's DM session
                 if (DISCORD_OWNER_ID && DISCORD_OWNER_ID !== '0') {
                     this.gateway.injectContext(`discord:user:${DISCORD_OWNER_ID}`, `[Workflow: ${name}]`, response);
@@ -2405,6 +2457,14 @@ export class CronScheduler {
             logger.error({ err, workflow: name }, `Workflow '${name}' failed`);
             const errMsg = `Workflow '${name}' failed: ${String(err).slice(0, 300)}`;
             await this.dispatcher.send(errMsg, this.dispatchContextForWorkflow(name));
+            // Mirror failures into memory too — "what happened to that workflow?"
+            // should find something instead of nothing.
+            try {
+                this.gateway.injectContext(`workflow:${name}`, `[Workflow ${name} failed]`, errMsg, { pending: false, model: 'workflow', countExchange: true });
+            }
+            catch (mirrorErr) {
+                logger.debug({ err: mirrorErr, workflow: name }, 'workflow failure mirror failed (non-fatal)');
+            }
             return errMsg;
         }
         finally {

package/dist/index.js CHANGED Viewed

@@ -901,6 +901,20 @@ async function asyncMain() {
     catch (err) {
         logger.warn({ err }, 'Background task hygiene check failed — non-fatal');
     }
+    // Backfill orphaned bg-task deliveries into chat memory. Picks up any
+    // task that finished in the last 7 days whose lifecycle message was
+    // never mirrored (e.g. completed before 1.18.180 wired the mirror, or
+    // while the daemon was down). Idempotent via the `mirroredAt` flag on
+    // each task file — safe to run on every boot.
+    try {
+        const result = cronScheduler.mirrorOrphanedBackgroundDeliveries();
+        if (result.mirrored > 0) {
+            logger.info({ count: result.mirrored }, 'Backfilled orphaned background task deliveries into chat memory');
+        }
+    }
+    catch (err) {
+        logger.warn({ err }, 'Background-task delivery backfill failed — non-fatal');
+    }
     const timerInterval = startTimerChecker(dispatcher, gateway);
     // Start brain ingest scheduler (polls registered REST sources on their cron)
     try {

package/dist/lib/billing-buckets.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Anthropic billing-bucket classifier.
+ *
+ * Maps a Claude model string (full ID or SDK tier alias) to the metering
+ * bucket Anthropic bills against. The headline distinction on Max /
+ * Team / Enterprise plans is in-plan (covered by the subscription's
+ * usage allowance) vs. Extra Usage (billed separately, surprises the
+ * meter watcher).
+ *
+ * **Why this matters (2026-05-11)**: Sonnet `[1m]` is on the Extra Usage
+ * path even with Max. Max covers Opus long-context but not Sonnet 1M.
+ * Without per-bucket aggregation, the dashboard cost number conflates
+ * "covered by my plan" with "billed separately" and the user has no way
+ * to spot Extra Usage exposure until the invoice arrives. See
+ * memory/feedback_sonnet_1m_extra_usage.md.
+ *
+ * Pure function, no I/O. Safe to call from any layer.
+ */
+export type BillingBucketId = 'sonnet' | 'sonnet-1m' | 'opus' | 'opus-1m' | 'haiku' | 'other';
+export type BillingBucketMetering =
+/** Counts against the Max/Team/Enterprise plan's usage allowance. */
+'plan'
+/** Billed separately as Extra Usage even when the user has Max. */
+ | 'extra';
+export interface BillingBucket {
+    /** Stable bucket id, suitable for grouping/keys. */
+    id: BillingBucketId;
+    /** Human-readable label for UI ("Sonnet 200K", "Sonnet 1M — Extra Usage"). */
+    label: string;
+    /** Model family irrespective of context window. */
+    family: 'sonnet' | 'opus' | 'haiku' | 'other';
+    /** Context window class. */
+    context: '200k' | '1m';
+    /** How Anthropic bills this on a Max plan. */
+    meteredOnMax: BillingBucketMetering;
+}
+/**
+ * Classify a model string into its billing bucket.
+ *
+ * Accepts:
+ *  - Full model IDs: `claude-sonnet-4-6`, `claude-sonnet-4-6[1m]`,
+ *    `claude-opus-4-7[1m]`, `claude-haiku-4-5-20251001`, etc.
+ *  - SDK tier aliases: `sonnet`, `opus`, `haiku` (no `[1m]` form for
+ *    tier aliases — they always resolve to standard context).
+ *  - Empty / unknown / non-Claude strings → `'other'` bucket.
+ */
+export declare function classifyBillingBucket(model: string | undefined | null): BillingBucket;
+/** Canonical render order for the dashboard panel. */
+export declare const BUCKET_DISPLAY_ORDER: readonly BillingBucketId[];
+/** Convenience: is this bucket on the Extra Usage path for Max plans? */
+export declare function isExtraUsage(bucket: BillingBucket): boolean;
+//# sourceMappingURL=billing-buckets.d.ts.map

package/dist/lib/billing-buckets.js ADDED Viewed

@@ -0,0 +1,108 @@
+/**
+ * Anthropic billing-bucket classifier.
+ *
+ * Maps a Claude model string (full ID or SDK tier alias) to the metering
+ * bucket Anthropic bills against. The headline distinction on Max /
+ * Team / Enterprise plans is in-plan (covered by the subscription's
+ * usage allowance) vs. Extra Usage (billed separately, surprises the
+ * meter watcher).
+ *
+ * **Why this matters (2026-05-11)**: Sonnet `[1m]` is on the Extra Usage
+ * path even with Max. Max covers Opus long-context but not Sonnet 1M.
+ * Without per-bucket aggregation, the dashboard cost number conflates
+ * "covered by my plan" with "billed separately" and the user has no way
+ * to spot Extra Usage exposure until the invoice arrives. See
+ * memory/feedback_sonnet_1m_extra_usage.md.
+ *
+ * Pure function, no I/O. Safe to call from any layer.
+ */
+/**
+ * Classify a model string into its billing bucket.
+ *
+ * Accepts:
+ *  - Full model IDs: `claude-sonnet-4-6`, `claude-sonnet-4-6[1m]`,
+ *    `claude-opus-4-7[1m]`, `claude-haiku-4-5-20251001`, etc.
+ *  - SDK tier aliases: `sonnet`, `opus`, `haiku` (no `[1m]` form for
+ *    tier aliases — they always resolve to standard context).
+ *  - Empty / unknown / non-Claude strings → `'other'` bucket.
+ */
+export function classifyBillingBucket(model) {
+    const m = String(model ?? '').toLowerCase().trim();
+    if (!m)
+        return OTHER;
+    const is1m = /\[1m\]/i.test(m);
+    // Tier aliases — no context-window suffix possible.
+    if (m === 'sonnet')
+        return SONNET_200K;
+    if (m === 'opus')
+        return OPUS_200K;
+    if (m === 'haiku')
+        return HAIKU;
+    // Full model IDs. Order matters — check opus before sonnet because
+    // "opusplan" contains "opus" but not "sonnet"; reverse would still be
+    // safe today, but explicit ordering is more robust to future names.
+    if (m.includes('opus'))
+        return is1m ? OPUS_1M : OPUS_200K;
+    if (m.includes('sonnet'))
+        return is1m ? SONNET_1M : SONNET_200K;
+    if (m.includes('haiku'))
+        return HAIKU; // 1M not supported on Haiku
+    return { ...OTHER, label: model || 'Unknown' };
+}
+/** Stable singletons so equality checks and bucket-key lookups are cheap. */
+const SONNET_200K = {
+    id: 'sonnet',
+    label: 'Sonnet (200K)',
+    family: 'sonnet',
+    context: '200k',
+    meteredOnMax: 'plan',
+};
+const SONNET_1M = {
+    id: 'sonnet-1m',
+    label: 'Sonnet (1M) — Extra Usage',
+    family: 'sonnet',
+    context: '1m',
+    meteredOnMax: 'extra',
+};
+const OPUS_200K = {
+    id: 'opus',
+    label: 'Opus (200K)',
+    family: 'opus',
+    context: '200k',
+    meteredOnMax: 'plan',
+};
+const OPUS_1M = {
+    id: 'opus-1m',
+    label: 'Opus (1M)',
+    family: 'opus',
+    context: '1m',
+    meteredOnMax: 'plan',
+};
+const HAIKU = {
+    id: 'haiku',
+    label: 'Haiku',
+    family: 'haiku',
+    context: '200k',
+    meteredOnMax: 'plan',
+};
+const OTHER = {
+    id: 'other',
+    label: 'Unknown',
+    family: 'other',
+    context: '200k',
+    meteredOnMax: 'plan',
+};
+/** Canonical render order for the dashboard panel. */
+export const BUCKET_DISPLAY_ORDER = [
+    'sonnet',
+    'haiku',
+    'opus',
+    'opus-1m',
+    'sonnet-1m', // Extra Usage stays last so it visually anchors the callout
+    'other',
+];
+/** Convenience: is this bucket on the Extra Usage path for Max plans? */
+export function isExtraUsage(bucket) {
+    return bucket.meteredOnMax === 'extra';
+}
+//# sourceMappingURL=billing-buckets.js.map

package/dist/types.d.ts CHANGED Viewed

@@ -295,6 +295,7 @@ export interface BackgroundTask {
     resultPath?: string;
     error?: string;
     deliverableNote?: string;
+    mirroredAt?: string;
 }
 /**
  * State for one specialist agent's heartbeat scheduler. Persisted at

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.180",
+  "version": "1.18.183",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",