clementine-agent 1.18.180 → 1.18.183

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1208,6 +1208,8 @@ Obsidian vault with YAML frontmatter, [[wikilinks]], #tags.
1208
1208
  **Remembering:** Durable facts → memory_write(action="update_memory"). Daily context → note_take / memory_write(action="append_daily"). New person → note_create. New task → task_add.
1209
1209
  Save important facts immediately; a background agent also extracts after each exchange.
1210
1210
 
1211
+ **Recalling — REQUIRED behavior:** When the user references past work you don't have in immediate context — a URL, a deployment, a file you created, a task or background job you ran, a person/project/domain name you don't have inline — call \`memory_search\` (or \`transcript_search\` for chat history) BEFORE asking the user to provide it and BEFORE replying that you have no record. Saying "I don't see any record of that" without having searched is a memory failure, not an honest answer. Background tasks, cron runs, deployments, and prior chat turns are all in the SQLite memory store with dense embeddings — semantic search will surface them even when the wording doesn't match exactly.
1212
+
1211
1213
  ## Self-Configuration (never tell ${owner} to edit a config file)
1212
1214
 
1213
1215
  Clementine is self-configuring. Every credential, every integration, every tool permission can be set by calling a tool — no hand-editing.
@@ -64,6 +64,26 @@ export declare function resumeBackgroundTask(id: string, opts?: BackgroundTaskOp
64
64
  export declare function interruptStaleRunningTasks(opts?: BackgroundTaskOptions): number;
65
65
  /** Backward-compatible export for callers/tests using the old name. */
66
66
  export declare const abortStaleRunningTasks: typeof interruptStaleRunningTasks;
67
+ /**
68
+ * Find background tasks whose lifecycle messages were never mirrored into
69
+ * the originating chat session's memory — typically because they completed
70
+ * before 1.18.180 wired the mirror, or because the daemon was down when the
71
+ * delivery would have fired. Returns terminal-state tasks (done / failed /
72
+ * interrupted / aborted) that:
73
+ * - have a sessionKey (so we know where to mirror them)
74
+ * - lack a `mirroredAt` flag (haven't been mirrored yet)
75
+ * - completed within the recency window (default: last 7 days)
76
+ *
77
+ * Caller (typically the cron-scheduler on daemon start) is responsible for
78
+ * doing the actual mirror via gateway.injectContext and then stamping each
79
+ * task with `markBackgroundTaskMirrored(id)`. Keeping the injection out of
80
+ * this module avoids a dependency cycle on the gateway.
81
+ */
82
+ export declare function findUnmirroredDeliveries(opts?: BackgroundTaskOptions & {
83
+ sinceMs?: number;
84
+ }): BackgroundTask[];
85
+ /** Stamp `mirroredAt` so future boots don't re-mirror the same delivery. */
86
+ export declare function markBackgroundTaskMirrored(id: string, opts?: BackgroundTaskOptions): void;
67
87
  /** Delete a task file. Callers should avoid deleting active tasks. */
68
88
  export declare function deleteBackgroundTask(id: string, opts?: BackgroundTaskOptions): void;
69
89
  /** Backward-compatible test helper alias. */
@@ -205,6 +205,49 @@ export function interruptStaleRunningTasks(opts) {
205
205
  }
206
206
  /** Backward-compatible export for callers/tests using the old name. */
207
207
  export const abortStaleRunningTasks = interruptStaleRunningTasks;
208
+ /**
209
+ * Find background tasks whose lifecycle messages were never mirrored into
210
+ * the originating chat session's memory — typically because they completed
211
+ * before 1.18.180 wired the mirror, or because the daemon was down when the
212
+ * delivery would have fired. Returns terminal-state tasks (done / failed /
213
+ * interrupted / aborted) that:
214
+ * - have a sessionKey (so we know where to mirror them)
215
+ * - lack a `mirroredAt` flag (haven't been mirrored yet)
216
+ * - completed within the recency window (default: last 7 days)
217
+ *
218
+ * Caller (typically the cron-scheduler on daemon start) is responsible for
219
+ * doing the actual mirror via gateway.injectContext and then stamping each
220
+ * task with `markBackgroundTaskMirrored(id)`. Keeping the injection out of
221
+ * this module avoids a dependency cycle on the gateway.
222
+ */
223
+ export function findUnmirroredDeliveries(opts) {
224
+ const sinceMs = opts?.sinceMs ?? 7 * 24 * 60 * 60_000;
225
+ const cutoff = Date.now() - sinceMs;
226
+ const terminal = ['done', 'failed', 'interrupted', 'aborted'];
227
+ const out = [];
228
+ for (const status of terminal) {
229
+ for (const task of listBackgroundTasks({ status }, opts)) {
230
+ if (task.mirroredAt)
231
+ continue; // already mirrored on a prior boot
232
+ if (!task.sessionKey)
233
+ continue; // no chat to mirror back to
234
+ const stampIso = task.completedAt ?? task.interruptedAt ?? task.createdAt;
235
+ const stamp = Date.parse(stampIso ?? '');
236
+ if (Number.isFinite(stamp) && stamp < cutoff)
237
+ continue;
238
+ out.push(task);
239
+ }
240
+ }
241
+ return out;
242
+ }
243
+ /** Stamp `mirroredAt` so future boots don't re-mirror the same delivery. */
244
+ export function markBackgroundTaskMirrored(id, opts) {
245
+ const task = loadBackgroundTask(id, opts);
246
+ if (!task)
247
+ return;
248
+ task.mirroredAt = new Date().toISOString();
249
+ safeWrite(pathFor(id, opts), task);
250
+ }
208
251
  /** Delete a task file. Callers should avoid deleting active tasks. */
209
252
  export function deleteBackgroundTask(id, opts) {
210
253
  try {
@@ -219,18 +219,26 @@ function shouldAutoDelegate(skill, source) {
219
219
  return true;
220
220
  }
221
221
  /**
222
- * Resolve the model string to use for an autonomous run. The 1M-context
223
- * variant gives the worker subagent 5× the room of the standard 200K
224
- * window — enough headroom that compaction is rare and the
225
- * "refetch-after-compact" loop pattern (seen in the 2026-05-11
226
- * imessage-triage failures) never occurs in practice.
222
+ * Resolve the model string to use for an autonomous run.
227
223
  *
228
- * The actual 1M routing is gated by the user's plan (see
229
- * config.ts:usesOneMillionContext) and the model family Haiku doesn't
230
- * support 1M, and Sonnet 1M needs the [1m] suffix. We return the full
231
- * Sonnet model ID with [1m] appended; downstream
232
- * normalizeClaudeSdkOptionsForOneMillionContext strips it back off when
233
- * the plan doesn't support it.
224
+ * **Default: plain Sonnet (200K).** Sonnet `[1m]` is the "Extra Usage
225
+ * path" on Anthropic's billingit is NOT covered by Max/Team/Enterprise
226
+ * subscriptions, regardless of `CLEMENTINE_1M_CONTEXT_MODE` (the mode
227
+ * flag only governs Opus long-context, which Max does cover). Defaulting
228
+ * autonomous work to Sonnet [1m] silently routes cron, scheduled-skill,
229
+ * heartbeat, and team-task runs onto a separate metered bill — surprising
230
+ * on Max plans where the standard Sonnet meter stays quiet but weekly
231
+ * usage climbs.
232
+ *
233
+ * Compaction risk on the 200K window is mitigated by the auto-delegating
234
+ * wrapper (1.18.173): the worker subagent runs in an isolated context
235
+ * containing only the skill body + its own tool turns, so even
236
+ * data-heavy procedures comfortably fit.
237
+ *
238
+ * Skills that genuinely need 1M (rare — verify the workload first) opt
239
+ * in explicitly via frontmatter `clementine.limits.model:
240
+ * claude-sonnet-4-6[1m]` (Extra Usage) or `claude-opus-4-7[1m]` (covered
241
+ * by Max). Callers may also override per-invocation via `options.model`.
234
242
  */
235
243
  function resolveAutonomousModel(explicitModel, skillModel) {
236
244
  // Caller's explicit model wins.
@@ -239,22 +247,17 @@ function resolveAutonomousModel(explicitModel, skillModel) {
239
247
  // Skill-declared model wins next.
240
248
  if (skillModel)
241
249
  return skillModel;
242
- // Default: Sonnet [1m]. The normalizer will strip [1m] if the user's
243
- // plan doesn't include it, falling back to standard Sonnet — still
244
- // works, just with less headroom.
245
- const base = MODELS.sonnet;
246
- if (!base)
247
- return undefined;
248
- if (/\[1m\]/i.test(base))
249
- return base;
250
- return `${base}[1m]`;
250
+ // Default: plain Sonnet (no [1m]). Stays on the standard Sonnet meter
251
+ // covered by Max plans; no Extra Usage exposure.
252
+ return MODELS.sonnet;
251
253
  }
252
254
  /**
253
255
  * Build the AgentDefinition for the `skill-worker` subagent that
254
256
  * executes this skill in an isolated context. The subagent's system
255
257
  * prompt is the skill body; its tools are the skill's computed
256
- * allowlist; its model is the same 1M-context model the parent uses
257
- * (the worker is where the real data flows — the parent stays tiny).
258
+ * allowlist; its model is whatever resolveAutonomousModel returned
259
+ * by default plain Sonnet 200K, which the isolated worker context
260
+ * comfortably fits without compaction.
258
261
  *
259
262
  * `description` is what the SDK shows the parent for routing decisions.
260
263
  * Since the parent is `forceSubagent`'d to this worker, the description
@@ -275,8 +278,9 @@ function buildSkillWorkerAgent(skill, renderedProcedure, effectiveTools, model,
275
278
  `## Procedure\n\n${renderedProcedure}`,
276
279
  tools: effectiveTools,
277
280
  // SDK accepts 'sonnet' / 'opus' / 'haiku' tier aliases OR full model
278
- // IDs. We pass the full ID with [1m] when present; the SDK strips
279
- // [1m] internally for plans that don't support it.
281
+ // IDs. Default is plain Sonnet (200K); when a skill or caller opts
282
+ // into a [1m] variant explicitly, we pass it through and the SDK
283
+ // strips [1m] internally for plans that don't support it.
280
284
  ...(model ? { model } : {}),
281
285
  effort: 'medium',
282
286
  maxTurns: workerMaxTurns,
@@ -368,10 +372,12 @@ export async function runSkill(name, options = {}) {
368
372
  ...(skill.layout === 'folder' ? [path.dirname(skill.filePath)] : []),
369
373
  ];
370
374
  const mutatingSkill = effectiveTools.some((t) => t === 'Write' || t === 'Edit' || t === 'Bash' || /__(write|edit|update|create|delete|send|post|patch|set)/i.test(t));
371
- // 1.18.173: resolve the effective model. Autonomous runs default to
372
- // Sonnet [1m] (1M context window) so the worker subagent has the
373
- // room of a standard 200K-window model. resolveAutonomousModel honors
374
- // explicit overrides + skill-declared limits.model first.
375
+ // 1.18.182: resolve the effective model. Autonomous runs default to
376
+ // plain Sonnet (200K) covered by the standard Sonnet meter on Max,
377
+ // no Extra Usage exposure. Worker-subagent isolation (1.18.173) keeps
378
+ // the 200K window comfortably under compaction even for heavy skills.
379
+ // resolveAutonomousModel honors explicit overrides + skill-declared
380
+ // limits.model first, so a skill that genuinely needs 1M can opt in.
375
381
  const skillModel = skill.frontmatter?.clementine?.limits?.model;
376
382
  const effectiveModel = autoDelegate
377
383
  ? resolveAutonomousModel(options.model, skillModel)
@@ -10667,7 +10667,7 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
10667
10667
  // ── Token Usage API ──────────────────────────────────────────────
10668
10668
  app.get('/api/metrics/usage', async (_req, res) => {
10669
10669
  if (!existsSync(MEMORY_DB_PATH)) {
10670
- res.json({ error: 'No DB', totalTokens: 0, byModel: [], bySource: [], byDay: [] });
10670
+ res.json({ error: 'No DB', totalTokens: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
10671
10671
  return;
10672
10672
  }
10673
10673
  const Database = (await import('better-sqlite3')).default;
@@ -10676,32 +10676,86 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
10676
10676
  // Check if table exists
10677
10677
  const tableExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='usage_log'").get();
10678
10678
  if (!tableExists) {
10679
- res.json({ totalTokens: 0, totalInput: 0, totalOutput: 0, byModel: [], bySource: [], byDay: [] });
10679
+ res.json({ totalTokens: 0, totalInput: 0, totalOutput: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
10680
10680
  return;
10681
10681
  }
10682
+ // 1.18.183: cost_cents may not exist on older installs (added via
10683
+ // ALTER at store.ts:675). Probe before referencing so we degrade
10684
+ // to "no cost data" rather than erroring out.
10685
+ const columns = new Set(db.prepare('PRAGMA table_info(usage_log)').all().map((c) => c.name));
10686
+ const costExpr = columns.has('cost_cents') ? 'COALESCE(SUM(cost_cents), 0)' : '0';
10682
10687
  const totals = db.prepare(`SELECT COALESCE(SUM(input_tokens), 0) as ti, COALESCE(SUM(output_tokens), 0) as to_,
10683
- COALESCE(SUM(cache_read_tokens), 0) as tcr, COALESCE(SUM(cache_creation_tokens), 0) as tcc
10688
+ COALESCE(SUM(cache_read_tokens), 0) as tcr, COALESCE(SUM(cache_creation_tokens), 0) as tcc,
10689
+ ${costExpr} as cost
10684
10690
  FROM usage_log`).get();
10685
- const byModel = db.prepare(`SELECT model, SUM(input_tokens) as input, SUM(output_tokens) as output, SUM(cache_read_tokens) as cacheRead
10691
+ const byModel = db.prepare(`SELECT model, SUM(input_tokens) as input, SUM(output_tokens) as output,
10692
+ SUM(cache_read_tokens) as cacheRead, ${costExpr} as costCents, COUNT(*) as queries
10686
10693
  FROM usage_log GROUP BY model ORDER BY input DESC`).all();
10687
10694
  const bySource = db.prepare(`SELECT source, SUM(input_tokens) as input, SUM(output_tokens) as output
10688
10695
  FROM usage_log GROUP BY source ORDER BY input DESC`).all();
10689
10696
  const byDay = db.prepare(`SELECT date(created_at) as day, SUM(input_tokens) as input, SUM(output_tokens) as output
10690
10697
  FROM usage_log WHERE created_at >= date('now', '-7 days')
10691
10698
  GROUP BY date(created_at) ORDER BY day`).all();
10699
+ // 1.18.183: per-bucket aggregation. Same byModel rows, classified
10700
+ // by billing bucket (Sonnet 200K / Sonnet 1M Extra Usage / Opus /
10701
+ // Opus 1M / Haiku) so the dashboard can render Max-meter vs
10702
+ // Extra-Usage breakdown. See src/lib/billing-buckets.ts.
10703
+ const { classifyBillingBucket, BUCKET_DISPLAY_ORDER } = await import('../lib/billing-buckets.js');
10704
+ const bucketMap = new Map();
10705
+ for (const row of byModel) {
10706
+ const b = classifyBillingBucket(row.model);
10707
+ const existing = bucketMap.get(b.id);
10708
+ if (existing) {
10709
+ existing.costCents += Number(row.costCents) || 0;
10710
+ existing.inputTokens += Number(row.input) || 0;
10711
+ existing.outputTokens += Number(row.output) || 0;
10712
+ existing.queries += Number(row.queries) || 0;
10713
+ if (!existing.models.includes(row.model))
10714
+ existing.models.push(row.model);
10715
+ }
10716
+ else {
10717
+ bucketMap.set(b.id, {
10718
+ id: b.id,
10719
+ label: b.label,
10720
+ family: b.family,
10721
+ context: b.context,
10722
+ meteredOnMax: b.meteredOnMax,
10723
+ costCents: Number(row.costCents) || 0,
10724
+ inputTokens: Number(row.input) || 0,
10725
+ outputTokens: Number(row.output) || 0,
10726
+ queries: Number(row.queries) || 0,
10727
+ models: [row.model],
10728
+ });
10729
+ }
10730
+ }
10731
+ // Render in canonical order (Extra Usage anchors last so callouts
10732
+ // hit the eye after the in-plan rows).
10733
+ const byBucket = BUCKET_DISPLAY_ORDER
10734
+ .map((id) => bucketMap.get(id))
10735
+ .filter((b) => b !== undefined);
10736
+ const bucketTotals = byBucket.reduce((acc, b) => {
10737
+ if (b.meteredOnMax === 'extra')
10738
+ acc.extraCostCents += b.costCents;
10739
+ else
10740
+ acc.planCostCents += b.costCents;
10741
+ return acc;
10742
+ }, { planCostCents: 0, extraCostCents: 0 });
10692
10743
  res.json({
10693
10744
  totalInput: totals.ti,
10694
10745
  totalOutput: totals.to_,
10695
10746
  totalCacheRead: totals.tcr,
10696
10747
  totalCacheCreation: totals.tcc,
10697
10748
  totalTokens: totals.ti + totals.to_,
10749
+ totalCostCents: totals.cost,
10698
10750
  byModel,
10699
10751
  bySource,
10700
10752
  byDay,
10753
+ byBucket,
10754
+ bucketTotals,
10701
10755
  });
10702
10756
  }
10703
10757
  catch (err) {
10704
- res.json({ error: String(err), totalTokens: 0, byModel: [], bySource: [], byDay: [] });
10758
+ res.json({ error: String(err), totalTokens: 0, byModel: [], bySource: [], byDay: [], byBucket: [], bucketTotals: { planCostCents: 0, extraCostCents: 0 } });
10705
10759
  }
10706
10760
  finally {
10707
10761
  db.close();
@@ -37199,6 +37253,16 @@ function formatTokens(n) {
37199
37253
  return String(n);
37200
37254
  }
37201
37255
 
37256
+ // 1.18.183: dollars for the billing-bucket panel. Cents in, "$X.XX"
37257
+ // out. Sub-cent values render as "<$0.01" so a tiny but nonzero
37258
+ // Extra Usage line still reads as "you have exposure" rather than "$0".
37259
+ function formatCents(cents) {
37260
+ var c = Number(cents) || 0;
37261
+ if (c === 0) return '$0.00';
37262
+ if (c > 0 && c < 1) return '<$0.01';
37263
+ return '$' + (c / 100).toFixed(2);
37264
+ }
37265
+
37202
37266
  function formatBytes(n) {
37203
37267
  if (n == null) return '—';
37204
37268
  if (n < 1024) return n + ' B';
@@ -38610,6 +38674,65 @@ async function refreshMetrics() {
38610
38674
  html += statTile(cacheEff + '%', 'Cache Hit Rate', cacheEff >= 50 ? 'var(--green)' : cacheEff >= 20 ? 'var(--yellow)' : 'var(--text-muted)');
38611
38675
  html += '</div>';
38612
38676
 
38677
+ // 1.18.183: Spend by Billing Bucket — separates Max-covered usage
38678
+ // from Extra Usage exposure (Sonnet [1m] etc.). Surfaces the bucket
38679
+ // a model lives in, not just its token count, so a quiet Sonnet
38680
+ // meter no longer hides rising weekly spend on a separate billing
38681
+ // line. See src/lib/billing-buckets.ts.
38682
+ if (u.byBucket && u.byBucket.length > 0) {
38683
+ var planCost = (u.bucketTotals && u.bucketTotals.planCostCents) || 0;
38684
+ var extraCost = (u.bucketTotals && u.bucketTotals.extraCostCents) || 0;
38685
+ var totalCost = planCost + extraCost;
38686
+ var hasExtra = extraCost > 0;
38687
+
38688
+ html += '<div class="card" style="margin-top:16px"><div class="card-header">'
38689
+ + 'Spend by Billing Bucket'
38690
+ + '<span style="float:right;font-size:11px;color:var(--text-muted);font-weight:400">'
38691
+ + 'In-plan ' + esc(formatCents(planCost))
38692
+ + ' &middot; Extra Usage ' + (hasExtra
38693
+ ? '<span style="color:var(--orange,#f80);font-weight:600">' + esc(formatCents(extraCost)) + '</span>'
38694
+ : esc(formatCents(extraCost)))
38695
+ + '</span></div><div class="card-body">';
38696
+
38697
+ // Banner when Extra Usage > 0 — this is the whole point of the
38698
+ // panel. Max plans don't comp Sonnet [1m]; surfacing it here means
38699
+ // the user can spot it without checking the Anthropic Console.
38700
+ if (hasExtra) {
38701
+ html += '<div style="margin-bottom:12px;padding:10px 12px;border-radius:6px;'
38702
+ + 'background:rgba(255,128,0,0.08);border-left:3px solid var(--orange,#f80);'
38703
+ + 'font-size:12px;line-height:1.5">'
38704
+ + '<strong>Extra Usage detected.</strong> '
38705
+ + esc(formatCents(extraCost))
38706
+ + ' of recent spend is on the Anthropic Extra Usage path (typically Sonnet 1M), '
38707
+ + 'which is <strong>not</strong> covered by Max / Team / Enterprise subscriptions. '
38708
+ + 'Switch heavy autonomous skills to <code>claude-opus-4-7[1m]</code> via skill frontmatter '
38709
+ + '<code>clementine.limits.model</code>, or drop <code>[1m]</code> to stay on the standard Sonnet meter.'
38710
+ + '</div>';
38711
+ }
38712
+
38713
+ var maxBucketCost = Math.max.apply(null, u.byBucket.map(function(b) { return b.costCents || 0; }).concat([1]));
38714
+ for (var bi = 0; bi < u.byBucket.length; bi++) {
38715
+ var bk = u.byBucket[bi];
38716
+ var bkPct = maxBucketCost > 0 ? Math.round(((bk.costCents || 0) / maxBucketCost) * 100) : 0;
38717
+ var bkColor = bk.meteredOnMax === 'extra' ? 'var(--orange,#f80)'
38718
+ : (bk.family === 'opus' ? 'var(--purple)'
38719
+ : (bk.family === 'sonnet' ? 'var(--blue)'
38720
+ : (bk.family === 'haiku' ? 'var(--green)' : 'var(--text-muted)')));
38721
+ var bkShareNum = totalCost > 0 ? Math.round(((bk.costCents || 0) / totalCost) * 100) : 0;
38722
+ html += '<div style="margin-bottom:10px">'
38723
+ + '<div class="kv-row">'
38724
+ + '<span class="kv-key">' + esc(bk.label) + '</span>'
38725
+ + '<span class="kv-val" title="' + esc(formatTokens(bk.inputTokens || 0)) + ' input &middot; ' + esc(formatTokens(bk.outputTokens || 0)) + ' output &middot; ' + (bk.queries || 0) + ' calls">'
38726
+ + esc(formatCents(bk.costCents || 0))
38727
+ + '<span style="color:var(--text-muted);font-size:11px;margin-left:6px">' + bkShareNum + '%</span>'
38728
+ + '</span>'
38729
+ + '</div>'
38730
+ + '<div class="metric-bar-track"><div class="metric-bar-fill" style="width:' + bkPct + '%;background:' + bkColor + '"></div></div>'
38731
+ + '</div>';
38732
+ }
38733
+ html += '</div></div>';
38734
+ }
38735
+
38613
38736
  // Tokens by Model
38614
38737
  if (u.byModel && u.byModel.length > 0) {
38615
38738
  html += '<div class="card"><div class="card-header">Tokens by Model</div><div class="card-body">';
@@ -195,8 +195,21 @@ export declare class CronScheduler {
195
195
  * any record of a Netlify site." injectContext writes into both the
196
196
  * pending-context map (visible to the next SDK turn) and the memory
197
197
  * store (searchable later by the assistant).
198
+ *
199
+ * If `taskId` is provided, stamps the task with `mirroredAt` so the
200
+ * startup backfill won't replay it on the next daemon restart.
198
201
  */
199
202
  private mirrorBackgroundTaskToChat;
203
+ /**
204
+ * Boot-time backfill. Mirrors any terminal-state background task whose
205
+ * lifecycle message never landed in the originating chat session's
206
+ * memory — typically because it finished before 1.18.180 wired the
207
+ * mirror, or because the daemon was down when delivery would have
208
+ * fired. Idempotent via the `mirroredAt` flag on each task file.
209
+ */
210
+ mirrorOrphanedBackgroundDeliveries(): {
211
+ mirrored: number;
212
+ };
200
213
  /** Same idea for workflows. Workflows can be agent-scoped via WorkflowDefinition.agentSlug. */
201
214
  private dispatchContextForWorkflow;
202
215
  private runJob;
@@ -39,7 +39,7 @@ import { parseAllWorkflows as parseAllWorkflowsSync } from '../agent/workflow-ru
39
39
  import { SelfImproveLoop } from '../agent/self-improve.js';
40
40
  import { loadPromptOverridesForJob, watchPromptOverrides } from '../agent/prompt-overrides/loader.js';
41
41
  import { logAuditJsonl } from '../agent/hooks.js';
42
- import { listBackgroundTasks, loadBackgroundTask, markDone as markBgTaskDone, markFailed as markBgTaskFailed, markRunning as markBgTaskRunning, updateBackgroundTask, } from '../agent/background-tasks.js';
42
+ import { findUnmirroredDeliveries, listBackgroundTasks, loadBackgroundTask, markBackgroundTaskMirrored, markDone as markBgTaskDone, markFailed as markBgTaskFailed, markRunning as markBgTaskRunning, updateBackgroundTask, } from '../agent/background-tasks.js';
43
43
  import { outcomeStatusFromGoalDisposition, recentDecisions, recordDecisionOutcome, } from '../agent/proactive-ledger.js';
44
44
  import { formatCreditBlock, getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked, } from './credit-guard.js';
45
45
  import { isRunHealthFailure } from './job-health.js';
@@ -953,8 +953,11 @@ export class CronScheduler {
953
953
  * any record of a Netlify site." injectContext writes into both the
954
954
  * pending-context map (visible to the next SDK turn) and the memory
955
955
  * store (searchable later by the assistant).
956
+ *
957
+ * If `taskId` is provided, stamps the task with `mirroredAt` so the
958
+ * startup backfill won't replay it on the next daemon restart.
956
959
  */
957
- mirrorBackgroundTaskToChat(sessionKey, userTextPlaceholder, assistantText) {
960
+ mirrorBackgroundTaskToChat(sessionKey, userTextPlaceholder, assistantText, taskId) {
958
961
  if (!sessionKey)
959
962
  return;
960
963
  try {
@@ -963,11 +966,47 @@ export class CronScheduler {
963
966
  model: 'bg-task',
964
967
  countExchange: true,
965
968
  });
969
+ if (taskId) {
970
+ try {
971
+ markBackgroundTaskMirrored(taskId);
972
+ }
973
+ catch { /* non-fatal */ }
974
+ }
966
975
  }
967
976
  catch (err) {
968
977
  logger.debug({ err, sessionKey }, 'Failed to mirror background task message into chat memory');
969
978
  }
970
979
  }
980
+ /**
981
+ * Boot-time backfill. Mirrors any terminal-state background task whose
982
+ * lifecycle message never landed in the originating chat session's
983
+ * memory — typically because it finished before 1.18.180 wired the
984
+ * mirror, or because the daemon was down when delivery would have
985
+ * fired. Idempotent via the `mirroredAt` flag on each task file.
986
+ */
987
+ mirrorOrphanedBackgroundDeliveries() {
988
+ let mirrored = 0;
989
+ try {
990
+ for (const task of findUnmirroredDeliveries()) {
991
+ const promptSnippet = (task.prompt ?? '').slice(0, 200);
992
+ const headSummary = `${task.id} (${task.status})`;
993
+ const body = (task.result ?? task.error ?? '(no saved output)').slice(0, 1500);
994
+ const placeholder = `[Background task ${headSummary} delivered: ${promptSnippet}]`;
995
+ const message = task.status === 'done'
996
+ ? `**Background task ${task.id} done** — ${promptSnippet}\n\n${body}`
997
+ : `**Background task ${task.id} ${task.status}** — ${promptSnippet}\n\n${body}`;
998
+ this.mirrorBackgroundTaskToChat(task.sessionKey, placeholder, message, task.id);
999
+ mirrored++;
1000
+ }
1001
+ if (mirrored > 0) {
1002
+ logger.info({ mirrored }, 'Mirrored orphaned background task deliveries into chat memory');
1003
+ }
1004
+ }
1005
+ catch (err) {
1006
+ logger.warn({ err }, 'Background-task backfill failed — non-fatal');
1007
+ }
1008
+ return { mirrored };
1009
+ }
971
1010
  /** Same idea for workflows. Workflows can be agent-scoped via WorkflowDefinition.agentSlug. */
972
1011
  dispatchContextForWorkflow(name) {
973
1012
  const wf = this.workflowDefs.find(w => w.name === name);
@@ -1947,6 +1986,9 @@ export class CronScheduler {
1947
1986
  // memory so the assistant remembers it has a task running. Without
1948
1987
  // this, the next chat turn the user sends comes back to a session
1949
1988
  // that has no idea any bg: work was ever queued.
1989
+ // Note: we do NOT stamp `mirroredAt` here — that's reserved for the
1990
+ // terminal-state mirror (done/failed) so the backfill only counts
1991
+ // deliveries, not intent-to-run.
1950
1992
  this.mirrorBackgroundTaskToChat(started.sessionKey, `[Background task ${started.id} queued: ${started.prompt.slice(0, 200)}]`, startMessage);
1951
1993
  // Don't await — fire-and-forget. The 3s tick continues to scan.
1952
1994
  const maxHours = Math.max(0.05, started.maxMinutes / 60);
@@ -1992,8 +2034,10 @@ export class CronScheduler {
1992
2034
  .catch((err) => logger.debug({ err, id: started.id }, 'Failed to dispatch background task result'));
1993
2035
  // Mirror into chat memory so a follow-up like "fix the site"
1994
2036
  // doesn't get a blank stare — the assistant needs to remember
1995
- // it just deployed something and where it lives.
1996
- this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Background task ${completed.id} delivered: ${started.prompt.slice(0, 200)}]`, deliveryMessage);
2037
+ // it just deployed something and where it lives. Stamp
2038
+ // `mirroredAt` so the startup backfill won't replay this on the
2039
+ // next restart.
2040
+ this.mirrorBackgroundTaskToChat(completed.sessionKey, `[Background task ${completed.id} delivered: ${started.prompt.slice(0, 200)}]`, deliveryMessage, completed.id);
1997
2041
  }).catch((err) => {
1998
2042
  clearInterval(progressTimer);
1999
2043
  const errStr = String(err).slice(0, 500);
@@ -2010,7 +2054,7 @@ export class CronScheduler {
2010
2054
  .catch(() => { });
2011
2055
  // Mirror failures too — the next chat turn should know the task
2012
2056
  // died rather than silently pretending it never happened.
2013
- this.mirrorBackgroundTaskToChat(failed.sessionKey, `[Background task ${failed.id} failed: ${started.prompt.slice(0, 200)}]`, failMessage);
2057
+ this.mirrorBackgroundTaskToChat(failed.sessionKey, `[Background task ${failed.id} failed: ${started.prompt.slice(0, 200)}]`, failMessage, failed.id);
2014
2058
  });
2015
2059
  }
2016
2060
  }
@@ -2392,6 +2436,14 @@ export class CronScheduler {
2392
2436
  const response = await this.gateway.handleWorkflow(wf, inputs ?? {});
2393
2437
  if (response && response !== '*(workflow completed — no output)*') {
2394
2438
  await this.dispatcher.send(`**[Workflow: ${name}]**\n\n${response.slice(0, 1500)}`, this.dispatchContextForWorkflow(name));
2439
+ // Mirror under a workflow-scoped session so semantic search can
2440
+ // surface this run regardless of who triggered it.
2441
+ try {
2442
+ this.gateway.injectContext(`workflow:${name}`, `[Workflow ${name} ran]`, response, { pending: false, model: 'workflow', countExchange: true });
2443
+ }
2444
+ catch (err) {
2445
+ logger.debug({ err, workflow: name }, 'workflow transcript mirror failed (non-fatal)');
2446
+ }
2395
2447
  // Inject into owner's DM session
2396
2448
  if (DISCORD_OWNER_ID && DISCORD_OWNER_ID !== '0') {
2397
2449
  this.gateway.injectContext(`discord:user:${DISCORD_OWNER_ID}`, `[Workflow: ${name}]`, response);
@@ -2405,6 +2457,14 @@ export class CronScheduler {
2405
2457
  logger.error({ err, workflow: name }, `Workflow '${name}' failed`);
2406
2458
  const errMsg = `Workflow '${name}' failed: ${String(err).slice(0, 300)}`;
2407
2459
  await this.dispatcher.send(errMsg, this.dispatchContextForWorkflow(name));
2460
+ // Mirror failures into memory too — "what happened to that workflow?"
2461
+ // should find something instead of nothing.
2462
+ try {
2463
+ this.gateway.injectContext(`workflow:${name}`, `[Workflow ${name} failed]`, errMsg, { pending: false, model: 'workflow', countExchange: true });
2464
+ }
2465
+ catch (mirrorErr) {
2466
+ logger.debug({ err: mirrorErr, workflow: name }, 'workflow failure mirror failed (non-fatal)');
2467
+ }
2408
2468
  return errMsg;
2409
2469
  }
2410
2470
  finally {
package/dist/index.js CHANGED
@@ -901,6 +901,20 @@ async function asyncMain() {
901
901
  catch (err) {
902
902
  logger.warn({ err }, 'Background task hygiene check failed — non-fatal');
903
903
  }
904
+ // Backfill orphaned bg-task deliveries into chat memory. Picks up any
905
+ // task that finished in the last 7 days whose lifecycle message was
906
+ // never mirrored (e.g. completed before 1.18.180 wired the mirror, or
907
+ // while the daemon was down). Idempotent via the `mirroredAt` flag on
908
+ // each task file — safe to run on every boot.
909
+ try {
910
+ const result = cronScheduler.mirrorOrphanedBackgroundDeliveries();
911
+ if (result.mirrored > 0) {
912
+ logger.info({ count: result.mirrored }, 'Backfilled orphaned background task deliveries into chat memory');
913
+ }
914
+ }
915
+ catch (err) {
916
+ logger.warn({ err }, 'Background-task delivery backfill failed — non-fatal');
917
+ }
904
918
  const timerInterval = startTimerChecker(dispatcher, gateway);
905
919
  // Start brain ingest scheduler (polls registered REST sources on their cron)
906
920
  try {
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Anthropic billing-bucket classifier.
3
+ *
4
+ * Maps a Claude model string (full ID or SDK tier alias) to the metering
5
+ * bucket Anthropic bills against. The headline distinction on Max /
6
+ * Team / Enterprise plans is in-plan (covered by the subscription's
7
+ * usage allowance) vs. Extra Usage (billed separately, surprises the
8
+ * meter watcher).
9
+ *
10
+ * **Why this matters (2026-05-11)**: Sonnet `[1m]` is on the Extra Usage
11
+ * path even with Max. Max covers Opus long-context but not Sonnet 1M.
12
+ * Without per-bucket aggregation, the dashboard cost number conflates
13
+ * "covered by my plan" with "billed separately" and the user has no way
14
+ * to spot Extra Usage exposure until the invoice arrives. See
15
+ * memory/feedback_sonnet_1m_extra_usage.md.
16
+ *
17
+ * Pure function, no I/O. Safe to call from any layer.
18
+ */
19
+ export type BillingBucketId = 'sonnet' | 'sonnet-1m' | 'opus' | 'opus-1m' | 'haiku' | 'other';
20
+ export type BillingBucketMetering =
21
+ /** Counts against the Max/Team/Enterprise plan's usage allowance. */
22
+ 'plan'
23
+ /** Billed separately as Extra Usage even when the user has Max. */
24
+ | 'extra';
25
+ export interface BillingBucket {
26
+ /** Stable bucket id, suitable for grouping/keys. */
27
+ id: BillingBucketId;
28
+ /** Human-readable label for UI ("Sonnet 200K", "Sonnet 1M — Extra Usage"). */
29
+ label: string;
30
+ /** Model family irrespective of context window. */
31
+ family: 'sonnet' | 'opus' | 'haiku' | 'other';
32
+ /** Context window class. */
33
+ context: '200k' | '1m';
34
+ /** How Anthropic bills this on a Max plan. */
35
+ meteredOnMax: BillingBucketMetering;
36
+ }
37
+ /**
38
+ * Classify a model string into its billing bucket.
39
+ *
40
+ * Accepts:
41
+ * - Full model IDs: `claude-sonnet-4-6`, `claude-sonnet-4-6[1m]`,
42
+ * `claude-opus-4-7[1m]`, `claude-haiku-4-5-20251001`, etc.
43
+ * - SDK tier aliases: `sonnet`, `opus`, `haiku` (no `[1m]` form for
44
+ * tier aliases — they always resolve to standard context).
45
+ * - Empty / unknown / non-Claude strings → `'other'` bucket.
46
+ */
47
+ export declare function classifyBillingBucket(model: string | undefined | null): BillingBucket;
48
+ /** Canonical render order for the dashboard panel. */
49
+ export declare const BUCKET_DISPLAY_ORDER: readonly BillingBucketId[];
50
+ /** Convenience: is this bucket on the Extra Usage path for Max plans? */
51
+ export declare function isExtraUsage(bucket: BillingBucket): boolean;
52
+ //# sourceMappingURL=billing-buckets.d.ts.map
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Anthropic billing-bucket classifier.
3
+ *
4
+ * Maps a Claude model string (full ID or SDK tier alias) to the metering
5
+ * bucket Anthropic bills against. The headline distinction on Max /
6
+ * Team / Enterprise plans is in-plan (covered by the subscription's
7
+ * usage allowance) vs. Extra Usage (billed separately, surprises the
8
+ * meter watcher).
9
+ *
10
+ * **Why this matters (2026-05-11)**: Sonnet `[1m]` is on the Extra Usage
11
+ * path even with Max. Max covers Opus long-context but not Sonnet 1M.
12
+ * Without per-bucket aggregation, the dashboard cost number conflates
13
+ * "covered by my plan" with "billed separately" and the user has no way
14
+ * to spot Extra Usage exposure until the invoice arrives. See
15
+ * memory/feedback_sonnet_1m_extra_usage.md.
16
+ *
17
+ * Pure function, no I/O. Safe to call from any layer.
18
+ */
19
+ /**
20
+ * Classify a model string into its billing bucket.
21
+ *
22
+ * Accepts:
23
+ * - Full model IDs: `claude-sonnet-4-6`, `claude-sonnet-4-6[1m]`,
24
+ * `claude-opus-4-7[1m]`, `claude-haiku-4-5-20251001`, etc.
25
+ * - SDK tier aliases: `sonnet`, `opus`, `haiku` (no `[1m]` form for
26
+ * tier aliases — they always resolve to standard context).
27
+ * - Empty / unknown / non-Claude strings → `'other'` bucket.
28
+ */
29
+ export function classifyBillingBucket(model) {
30
+ const m = String(model ?? '').toLowerCase().trim();
31
+ if (!m)
32
+ return OTHER;
33
+ const is1m = /\[1m\]/i.test(m);
34
+ // Tier aliases — no context-window suffix possible.
35
+ if (m === 'sonnet')
36
+ return SONNET_200K;
37
+ if (m === 'opus')
38
+ return OPUS_200K;
39
+ if (m === 'haiku')
40
+ return HAIKU;
41
+ // Full model IDs. Order matters — check opus before sonnet because
42
+ // "opusplan" contains "opus" but not "sonnet"; reverse would still be
43
+ // safe today, but explicit ordering is more robust to future names.
44
+ if (m.includes('opus'))
45
+ return is1m ? OPUS_1M : OPUS_200K;
46
+ if (m.includes('sonnet'))
47
+ return is1m ? SONNET_1M : SONNET_200K;
48
+ if (m.includes('haiku'))
49
+ return HAIKU; // 1M not supported on Haiku
50
+ return { ...OTHER, label: model || 'Unknown' };
51
+ }
52
+ /** Stable singletons so equality checks and bucket-key lookups are cheap. */
53
+ const SONNET_200K = {
54
+ id: 'sonnet',
55
+ label: 'Sonnet (200K)',
56
+ family: 'sonnet',
57
+ context: '200k',
58
+ meteredOnMax: 'plan',
59
+ };
60
+ const SONNET_1M = {
61
+ id: 'sonnet-1m',
62
+ label: 'Sonnet (1M) — Extra Usage',
63
+ family: 'sonnet',
64
+ context: '1m',
65
+ meteredOnMax: 'extra',
66
+ };
67
+ const OPUS_200K = {
68
+ id: 'opus',
69
+ label: 'Opus (200K)',
70
+ family: 'opus',
71
+ context: '200k',
72
+ meteredOnMax: 'plan',
73
+ };
74
+ const OPUS_1M = {
75
+ id: 'opus-1m',
76
+ label: 'Opus (1M)',
77
+ family: 'opus',
78
+ context: '1m',
79
+ meteredOnMax: 'plan',
80
+ };
81
+ const HAIKU = {
82
+ id: 'haiku',
83
+ label: 'Haiku',
84
+ family: 'haiku',
85
+ context: '200k',
86
+ meteredOnMax: 'plan',
87
+ };
88
+ const OTHER = {
89
+ id: 'other',
90
+ label: 'Unknown',
91
+ family: 'other',
92
+ context: '200k',
93
+ meteredOnMax: 'plan',
94
+ };
95
+ /** Canonical render order for the dashboard panel. */
96
+ export const BUCKET_DISPLAY_ORDER = [
97
+ 'sonnet',
98
+ 'haiku',
99
+ 'opus',
100
+ 'opus-1m',
101
+ 'sonnet-1m', // Extra Usage stays last so it visually anchors the callout
102
+ 'other',
103
+ ];
104
+ /** Convenience: is this bucket on the Extra Usage path for Max plans? */
105
+ export function isExtraUsage(bucket) {
106
+ return bucket.meteredOnMax === 'extra';
107
+ }
108
+ //# sourceMappingURL=billing-buckets.js.map
package/dist/types.d.ts CHANGED
@@ -295,6 +295,7 @@ export interface BackgroundTask {
295
295
  resultPath?: string;
296
296
  error?: string;
297
297
  deliverableNote?: string;
298
+ mirroredAt?: string;
298
299
  }
299
300
  /**
300
301
  * State for one specialist agent's heartbeat scheduler. Persisted at
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.180",
3
+ "version": "1.18.183",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",