clementine-agent 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -168,6 +168,29 @@ export class SelfImproveLoop {
168
168
  logger.info('Captured SOUL.md baseline for drift detection');
169
169
  }
170
170
  const state = this.loadState();
171
+ // If a prior run aborted on an infrastructure error that can't be fixed
172
+ // by retrying (malformed MCP tool schema, bad auth, etc.), don't spin
173
+ // the loop pointlessly. Wait at least 24h before re-probing — this gives
174
+ // the owner time to fix the infra and prevents us from writing dozens
175
+ // of identical error experiments. The failure monitor surfaces the
176
+ // infraError to the owner via the broken-jobs pipeline.
177
+ if (state.infraError && state.lastRunAt) {
178
+ const hoursSinceRun = (Date.now() - Date.parse(state.lastRunAt)) / 3_600_000;
179
+ if (Number.isFinite(hoursSinceRun) && hoursSinceRun < 24) {
180
+ logger.warn({
181
+ category: state.infraError.category,
182
+ diagnostic: state.infraError.diagnostic,
183
+ hoursSinceRun: Math.round(hoursSinceRun),
184
+ }, 'Self-improve skipped — prior infra error still in cooldown. See Broken Jobs panel.');
185
+ state.status = 'completed';
186
+ this.saveState(state);
187
+ return state;
188
+ }
189
+ // Past cooldown — clear the flag and probe fresh. If it still errors,
190
+ // the loop will set it again.
191
+ logger.info('Self-improve: infra error cooldown elapsed, probing again');
192
+ delete state.infraError;
193
+ }
171
194
  state.status = 'running';
172
195
  state.lastRunAt = new Date().toISOString();
173
196
  state.currentIteration = 0;
@@ -2075,6 +2075,16 @@ export async function cmdDashboard(opts) {
2075
2075
  res.status(500).json({ error: String(err) });
2076
2076
  }
2077
2077
  });
2078
+ // ── Broken jobs (failure monitor) ───────────────────────────────
2079
+ app.get('/api/cron/broken-jobs', async (_req, res) => {
2080
+ try {
2081
+ const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
2082
+ res.json({ jobs: computeBrokenJobs() });
2083
+ }
2084
+ catch (err) {
2085
+ res.status(500).json({ error: String(err) });
2086
+ }
2087
+ });
2078
2088
  // ── Cron trace viewer ──────────────────────────────────────────
2079
2089
  app.get('/api/cron/traces/:job', (req, res) => {
2080
2090
  try {
@@ -9075,6 +9085,7 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
9075
9085
  <div class="page-title">Scheduled Tasks</div>
9076
9086
  <div class="tab-bar" id="automations-tabs">
9077
9087
  <button class="active" onclick="switchTab('automations','scheduled')">Scheduled Tasks</button>
9088
+ <button onclick="switchTab('automations','broken')">Broken Jobs <span class="tab-badge" id="tab-broken-count" title="repeatedly failing" style="display:none;background:#ef4444;color:#fff">0</span></button>
9078
9089
  <button onclick="switchTab('automations','timers')">Timers <span class="tab-badge" id="tab-timer-count" style="display:none">0</span></button>
9079
9090
  <button onclick="switchTab('automations','self-improve')">Self-Improve <span class="tab-badge" id="tab-si-pending" style="display:none">0</span></button>
9080
9091
  <button onclick="switchTab('automations','skills')">Skills <span class="tab-badge" id="tab-skill-count" style="display:none">0</span><span class="tab-badge" id="tab-pending-skill-count" title="pending approval" style="display:none;background:#f59e0b;color:#000">0</span></button>
@@ -9084,6 +9095,15 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
9084
9095
  <div class="tab-pane active" id="tab-automations-scheduled">
9085
9096
  <div id="panel-cron"><div class="empty-state">Loading...</div></div>
9086
9097
  </div>
9098
+ <div class="tab-pane" id="tab-automations-broken">
9099
+ <div class="card">
9100
+ <div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
9101
+ <span>Repeatedly Failing Jobs (last 48h)</span>
9102
+ <span class="badge badge-gray" id="broken-count-badge" style="font-size:10px">0 jobs</span>
9103
+ </div>
9104
+ <div class="card-body" id="panel-broken-jobs"><div class="empty-state">Loading...</div></div>
9105
+ </div>
9106
+ </div>
9087
9107
  <div class="tab-pane" id="tab-automations-timers">
9088
9108
  <div class="card">
9089
9109
  <div class="card-body" id="panel-timers"><div class="empty-state">Loading...</div></div>
@@ -10307,7 +10327,7 @@ function navigateTo(page, opts) {
10307
10327
  updateBuilderMode();
10308
10328
  document.getElementById('builder-input').focus();
10309
10329
  }
10310
- if (page === 'automations') { refreshCron(); refreshTimers(); refreshSelfImprove(); refreshSkills(); }
10330
+ if (page === 'automations') { refreshCron(); refreshTimers(); refreshSelfImprove(); refreshSkills(); refreshBrokenJobs(); }
10311
10331
  if (page === 'intelligence') { refreshMemory(); }
10312
10332
  if (page === 'settings') { refreshSettings(); refreshRemoteAccess(); refreshSalesforce(); refreshClaudeIntegrations(); refreshMcpServers(); }
10313
10333
  if (page === 'logs') refreshLogs();
@@ -10348,6 +10368,7 @@ function switchTab(group, tab) {
10348
10368
  // Tab-specific refresh
10349
10369
  if (group === 'automations') {
10350
10370
  if (tab === 'scheduled') refreshCron();
10371
+ if (tab === 'broken') refreshBrokenJobs();
10351
10372
  if (tab === 'timers') refreshTimers();
10352
10373
  if (tab === 'self-improve') refreshSelfImprove();
10353
10374
  if (tab === 'workflows') refreshWorkflows();
@@ -16141,6 +16162,62 @@ async function expandSkill(name) {
16141
16162
  } catch(e) { toast('Failed to load skill', 'error'); }
16142
16163
  }
16143
16164
 
16165
+ async function refreshBrokenJobs() {
16166
+ try {
16167
+ var r = await apiFetch('/api/cron/broken-jobs');
16168
+ var d = await r.json();
16169
+ var jobs = d.jobs || [];
16170
+ var tabBadge = document.getElementById('tab-broken-count');
16171
+ if (tabBadge) {
16172
+ tabBadge.textContent = String(jobs.length);
16173
+ tabBadge.style.display = jobs.length > 0 ? '' : 'none';
16174
+ }
16175
+ var countBadge = document.getElementById('broken-count-badge');
16176
+ if (countBadge) countBadge.textContent = jobs.length + ' job' + (jobs.length !== 1 ? 's' : '');
16177
+ var container = document.getElementById('panel-broken-jobs');
16178
+ if (!container) return;
16179
+ if (jobs.length === 0) {
16180
+ container.innerHTML = '<div class="empty-state">All jobs healthy in the last 48h.</div>';
16181
+ return;
16182
+ }
16183
+ var html = '<div style="display:flex;flex-direction:column;gap:12px">';
16184
+ for (var j of jobs) {
16185
+ var breaker = j.circuitBreakerEngagedAt
16186
+ ? '<span class="badge" style="background:rgba(239,68,68,0.15);color:#ef4444;font-size:10px">circuit broken</span>'
16187
+ : '';
16188
+ var lastErrorAt = j.lastErrorAt ? timeAgo(j.lastErrorAt) : 'unknown';
16189
+ var failureRatio = j.errorCount48h + '/' + j.totalRuns48h;
16190
+ var advisorLine = j.lastAdvisorOpinion
16191
+ ? '<div style="font-size:11px;color:var(--text-muted);margin-top:6px"><strong>Advisor:</strong> ' + esc(j.lastAdvisorOpinion) + '</div>'
16192
+ : '';
16193
+ var errorsHtml = '';
16194
+ if (j.lastErrors && j.lastErrors.length > 0) {
16195
+ errorsHtml = '<div style="margin-top:8px;display:flex;flex-direction:column;gap:4px">';
16196
+ for (var e of j.lastErrors) {
16197
+ errorsHtml += '<pre style="font-size:11px;color:var(--text-secondary);background:var(--bg-tertiary);padding:6px 8px;border-radius:4px;white-space:pre-wrap;word-break:break-word;margin:0;max-height:120px;overflow-y:auto">' + esc(e) + '</pre>';
16198
+ }
16199
+ errorsHtml += '</div>';
16200
+ }
16201
+ var agentTag = j.agentSlug
16202
+ ? '<span class="badge badge-blue" style="font-size:10px">' + esc(j.agentSlug) + '</span>'
16203
+ : '';
16204
+ html += '<div style="padding:12px;border:1px solid var(--border);border-radius:8px;background:var(--bg-secondary)">'
16205
+ + '<div style="display:flex;align-items:center;gap:8px;flex-wrap:wrap">'
16206
+ + '<strong>' + esc(j.jobName) + '</strong> ' + agentTag + ' ' + breaker
16207
+ + '<span style="margin-left:auto;font-size:11px;color:var(--text-muted)">' + failureRatio + ' failed \\u00b7 last error ' + lastErrorAt + '</span>'
16208
+ + '</div>'
16209
+ + errorsHtml
16210
+ + advisorLine
16211
+ + '</div>';
16212
+ }
16213
+ html += '</div>';
16214
+ container.innerHTML = html;
16215
+ } catch(e) {
16216
+ var c = document.getElementById('panel-broken-jobs');
16217
+ if (c) c.innerHTML = '<div class="empty-state" style="color:var(--red)">Failed to load broken jobs</div>';
16218
+ }
16219
+ }
16220
+
16144
16221
  async function refreshPendingSkills() {
16145
16222
  try {
16146
16223
  var r = await apiFetch('/api/skills/pending');
@@ -87,6 +87,11 @@ export declare class CronScheduler {
87
87
  private watchAgentsDir;
88
88
  private unwatchAgentsDir;
89
89
  reloadJobs(): void;
90
+ /**
91
+ * Wrap runLog.append so every completion also checks whether a fix
92
+ * verification is pending and DMs the verdict if so.
93
+ */
94
+ private _logRun;
90
95
  private runJob;
91
96
  /**
92
97
  * Log an advisor event to the events JSONL file for dashboard surfacing.
@@ -491,6 +491,9 @@ export class CronScheduler {
491
491
  this.watchingAgents = false;
492
492
  }
493
493
  reloadJobs() {
494
+ // Snapshot the pre-reload job definitions so fix-verification can diff
495
+ // and flag any currently-failing job whose config just changed.
496
+ const oldJobs = this.jobs.map(j => ({ ...j }));
494
497
  // Stop existing scheduled tasks (but NOT the file watcher)
495
498
  for (const [name, task] of this.scheduledTasks) {
496
499
  task.stop();
@@ -580,6 +583,30 @@ export class CronScheduler {
580
583
  logger.info(`Cron job '${def.name}' scheduled: ${def.schedule} (${SYSTEM_TIMEZONE})`);
581
584
  }
582
585
  }
586
+ // Fix-verification: detect any currently-failing job whose definition just
587
+ // changed, and record a pending verification for their next run.
588
+ // Skipped on the first load (oldJobs empty) since there's no edit to verify.
589
+ if (oldJobs.length > 0) {
590
+ import('./fix-verification.js').then(({ recordEditsForFailingJobs }) => {
591
+ try {
592
+ recordEditsForFailingJobs(oldJobs, this.jobs);
593
+ }
594
+ catch (err) {
595
+ logger.warn({ err }, 'Fix-verification capture failed');
596
+ }
597
+ }).catch(err => logger.warn({ err }, 'Fix-verification import failed'));
598
+ }
599
+ }
600
+ /**
601
+ * Wrap runLog.append so every completion also checks whether a fix
602
+ * verification is pending and DMs the verdict if so.
603
+ */
604
+ _logRun(entry) {
605
+ this.runLog.append(entry);
606
+ import('./fix-verification.js').then(({ checkAndDeliverVerification }) => {
607
+ checkAndDeliverVerification(entry, (text) => this.dispatcher.send(text, {}))
608
+ .catch(err => logger.warn({ err, job: entry.jobName }, 'Fix verification DM failed'));
609
+ }).catch(err => logger.warn({ err }, 'Fix-verification import failed'));
583
610
  }
584
611
  async runJob(job) {
585
612
  // Agent status check — skip if agent is paused/terminated
@@ -649,7 +676,7 @@ export class CronScheduler {
649
676
  // Non-zero exit or timeout → skip the job
650
677
  const exitCode = preCheckErr.status ?? 1;
651
678
  logger.info({ job: job.name, exitCode }, 'Pre-check failed — skipping job (no work to do)');
652
- this.runLog.append({
679
+ this._logRun({
653
680
  jobName: job.name,
654
681
  startedAt: new Date().toISOString(),
655
682
  finishedAt: new Date().toISOString(),
@@ -690,7 +717,7 @@ export class CronScheduler {
690
717
  });
691
718
  if (!approved) {
692
719
  logger.info({ job: job.name }, 'Cron job skipped by owner');
693
- this.runLog.append({
720
+ this._logRun({
694
721
  jobName: job.name,
695
722
  startedAt: new Date().toISOString(),
696
723
  finishedAt: new Date().toISOString(),
@@ -709,7 +736,7 @@ export class CronScheduler {
709
736
  const advice = getExecutionAdvice(job.name, job);
710
737
  if (advice.shouldSkip) {
711
738
  logger.info({ job: job.name, reason: advice.skipReason }, 'Execution advisor: circuit breaker — skipping job');
712
- this.runLog.append({
739
+ this._logRun({
713
740
  jobName: job.name,
714
741
  startedAt: new Date().toISOString(),
715
742
  finishedAt: new Date().toISOString(),
@@ -876,7 +903,7 @@ export class CronScheduler {
876
903
  this.gateway.injectContext(`discord:user:${DISCORD_OWNER_ID}`, `[Scheduled cron: ${job.name}]`, response);
877
904
  }
878
905
  }
879
- this.runLog.append(entry);
906
+ this._logRun(entry);
880
907
  // Fire-and-forget: extract procedural skill from successful long-running cron jobs
881
908
  if (entry.status === 'ok' && entry.durationMs > 30_000 && response && response.length > 500) {
882
909
  this.gateway.extractCronSkill(job.name, job.prompt, response, entry.durationMs, job.agentSlug)
@@ -902,7 +929,7 @@ export class CronScheduler {
902
929
  const errorType = errTerminalReason
903
930
  ? classifyTerminalReason(errTerminalReason)
904
931
  : classifyError(err);
905
- this.runLog.append({
932
+ this._logRun({
906
933
  jobName: job.name,
907
934
  startedAt: startedAt.toISOString(),
908
935
  finishedAt: finishedAt.toISOString(),
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Clementine TypeScript — Cron failure monitor.
3
+ *
4
+ * Surfaces cron jobs that have been failing repeatedly so they don't sit
5
+ * silently broken (which is what happened to ross-the-sdr:reply-detection —
6
+ * the existing circuit breaker fired ONCE at consErrors=5 and then went
7
+ * quiet for days).
8
+ *
9
+ * Threshold: a job is "broken" if either
10
+ * - it has >= 3 error/retried entries in the last 48h, OR
11
+ * - the circuit breaker engaged for it within the last 48h.
12
+ *
13
+ * Per-job 24h cooldown prevents re-spamming the owner with the same news.
14
+ *
15
+ * Read-only with respect to the cron run logs and advisor events; mutates
16
+ * only its own state file (cron/failure-monitor.json).
17
+ */
18
+ export interface BrokenJob {
19
+ jobName: string;
20
+ agentSlug?: string;
21
+ errorCount48h: number;
22
+ totalRuns48h: number;
23
+ lastErrorAt: string | null;
24
+ lastErrors: string[];
25
+ circuitBreakerEngagedAt: string | null;
26
+ lastAdvisorOpinion: string | null;
27
+ }
28
+ /**
29
+ * Compute the current set of broken jobs by scanning all run logs.
30
+ * Pure function (state-free) — used both by the monitor sweep and the dashboard endpoint.
31
+ */
32
+ export declare function computeBrokenJobs(now?: number): BrokenJob[];
33
+ /**
34
+ * Run a sweep: identify currently-broken jobs, pick the ones we haven't
35
+ * notified about recently, and dispatch one consolidated DM.
36
+ *
37
+ * Returns the jobs that triggered a fresh notification (mostly for tests/logs).
38
+ */
39
+ export declare function runFailureSweep(send: (text: string) => Promise<unknown>, now?: number): Promise<BrokenJob[]>;
40
+ //# sourceMappingURL=failure-monitor.d.ts.map
@@ -0,0 +1,416 @@
1
+ /**
2
+ * Clementine TypeScript — Cron failure monitor.
3
+ *
4
+ * Surfaces cron jobs that have been failing repeatedly so they don't sit
5
+ * silently broken (which is what happened to ross-the-sdr:reply-detection —
6
+ * the existing circuit breaker fired ONCE at consErrors=5 and then went
7
+ * quiet for days).
8
+ *
9
+ * Threshold: a job is "broken" if either
10
+ * - it has >= 3 error/retried entries in the last 48h, OR
11
+ * - the circuit breaker engaged for it within the last 48h.
12
+ *
13
+ * Per-job 24h cooldown prevents re-spamming the owner with the same news.
14
+ *
15
+ * Read-only with respect to the cron run logs and advisor events; mutates
16
+ * only its own state file (cron/failure-monitor.json).
17
+ */
18
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from 'node:fs';
19
+ import path from 'node:path';
20
+ import pino from 'pino';
21
+ import { BASE_DIR } from '../config.js';
22
+ const logger = pino({ name: 'clementine.failure-monitor' });
23
+ const RUNS_DIR = path.join(BASE_DIR, 'cron', 'runs');
24
+ const ADVISOR_EVENTS_FILE = path.join(BASE_DIR, 'cron', 'advisor-events.jsonl');
25
+ const STATE_FILE = path.join(BASE_DIR, 'cron', 'failure-monitor.json');
26
+ const SELF_IMPROVE_STATE_FILE = path.join(BASE_DIR, 'self-improve', 'state.json');
27
+ const SELF_IMPROVE_LOG_FILE = path.join(BASE_DIR, 'self-improve', 'experiment-log.jsonl');
28
+ /** A job is broken if it crosses any of these thresholds in the lookback window. */
29
+ const ERRORS_IN_WINDOW = 3;
30
+ const WINDOW_HOURS = 48;
31
+ /**
32
+ * Independent of the window — a job whose last N runs are all failures is
33
+ * broken even if they're spread over days (daily cron jobs can't accumulate
34
+ * 3 failures in 48h, but 2 consecutive BLOCKED days is still broken).
35
+ */
36
+ const CONSECUTIVE_FAILURES = 2;
37
+ /** Don't re-DM the owner about the same broken job within this window. */
38
+ const NOTIFY_COOLDOWN_HOURS = 24;
39
+ function loadState() {
40
+ try {
41
+ if (!existsSync(STATE_FILE))
42
+ return { notified: {} };
43
+ const raw = JSON.parse(readFileSync(STATE_FILE, 'utf-8'));
44
+ return { notified: raw.notified ?? {} };
45
+ }
46
+ catch {
47
+ return { notified: {} };
48
+ }
49
+ }
50
+ function saveState(state) {
51
+ try {
52
+ mkdirSync(path.dirname(STATE_FILE), { recursive: true });
53
+ writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
54
+ }
55
+ catch (err) {
56
+ logger.warn({ err }, 'Failed to persist failure-monitor state');
57
+ }
58
+ }
59
+ function readRunLog(filePath) {
60
+ try {
61
+ return readFileSync(filePath, 'utf-8')
62
+ .trim()
63
+ .split('\n')
64
+ .filter(Boolean)
65
+ .map(line => {
66
+ try {
67
+ return JSON.parse(line);
68
+ }
69
+ catch {
70
+ return null;
71
+ }
72
+ })
73
+ .filter((e) => e !== null);
74
+ }
75
+ catch {
76
+ return [];
77
+ }
78
+ }
79
+ function isFailure(entry) {
80
+ return entry.status === 'error' || entry.status === 'retried' || isSemanticFailure(entry);
81
+ }
82
+ /**
83
+ * "Semantic failure" — a run the scheduler called `ok` but whose agent output
84
+ * self-reports the task didn't actually complete. We only flag on explicit
85
+ * block/failure markers in the preview; the duration-vs-output heuristic was
86
+ * tested against the live corpus and produced too many false positives on
87
+ * legitimately quiet jobs (healthchecks, inbox probes that return empty
88
+ * when there's nothing to report).
89
+ *
90
+ * Markers are drawn from observed failure modes in Ross's cron jobs
91
+ * (kernel-vs-local Bash, "BLOCKED (no local bash access)") plus generic
92
+ * agent self-reports.
93
+ */
94
+ function isSemanticFailure(entry) {
95
+ if (entry.status !== 'ok')
96
+ return false;
97
+ const preview = (entry.outputPreview ?? '').trim();
98
+ if (!preview)
99
+ return false;
100
+ const previewLower = preview.toLowerCase();
101
+ // Match on word boundaries so "BLOCKED" matches "Result: BLOCKED" but
102
+ // "blockedBy" in a stray JSON fragment doesn't.
103
+ const markerRegexes = [
104
+ /\b(blocked|task_blocked|task_incomplete)\b/,
105
+ /\b(failed|could not|unable to|no local bash|permission denied)\b/,
106
+ /__nothing__/,
107
+ ];
108
+ for (const re of markerRegexes) {
109
+ if (re.test(previewLower))
110
+ return true;
111
+ }
112
+ return false;
113
+ }
114
+ /**
115
+ * Pull the most recent circuit-breaker engagement for a job, looking at the
116
+ * entire advisor log (not just the 48h window). A stuck breaker counts as a
117
+ * broken job even if it last fired weeks ago, because while engaged the job
118
+ * stops running entirely and produces no new failure entries.
119
+ *
120
+ * Returns the engagement timestamp (if currently engaged with no subsequent
121
+ * recovery) and the most recent advisor opinion string, if any.
122
+ */
123
+ function lastCircuitBreakerEvent(jobName) {
124
+ if (!existsSync(ADVISOR_EVENTS_FILE))
125
+ return { engagedAt: null, lastOpinion: null };
126
+ let engagedAt = null;
127
+ let lastOpinion = null;
128
+ try {
129
+ const lines = readFileSync(ADVISOR_EVENTS_FILE, 'utf-8').trim().split('\n');
130
+ for (const line of lines) {
131
+ try {
132
+ const evt = JSON.parse(line);
133
+ if (evt.jobName !== jobName)
134
+ continue;
135
+ // Capture the most recent opinion regardless of type
136
+ lastOpinion = `${evt.type}: ${evt.detail}`;
137
+ if (evt.type === 'circuit-breaker')
138
+ engagedAt = evt.timestamp;
139
+ if (evt.type === 'circuit-recovery' || evt.type === 'auto-disabled')
140
+ engagedAt = null;
141
+ }
142
+ catch { /* skip malformed */ }
143
+ }
144
+ }
145
+ catch { /* non-fatal */ }
146
+ return { engagedAt, lastOpinion };
147
+ }
148
+ /**
149
+ * Compute the current set of broken jobs by scanning all run logs.
150
+ * Pure function (state-free) — used both by the monitor sweep and the dashboard endpoint.
151
+ */
152
+ export function computeBrokenJobs(now = Date.now()) {
153
+ if (!existsSync(RUNS_DIR))
154
+ return [];
155
+ const sinceMs = now - WINDOW_HOURS * 60 * 60 * 1000;
156
+ const broken = [];
157
+ let files = [];
158
+ try {
159
+ files = readdirSync(RUNS_DIR).filter(f => f.endsWith('.jsonl'));
160
+ }
161
+ catch {
162
+ return [];
163
+ }
164
+ const dormantCutoffMs = now - 7 * 24 * 60 * 60 * 1000;
165
+ for (const file of files) {
166
+ const entries = readRunLog(path.join(RUNS_DIR, file));
167
+ if (entries.length === 0)
168
+ continue;
169
+ const jobName = entries[0].jobName;
170
+ // Skip dormant jobs — if the last run is >7 days old the job is
171
+ // probably removed or renamed and its historical failures aren't
172
+ // actionable. Circuit breaker still counts because an engaged breaker
173
+ // is itself "the job stopped running".
174
+ const lastEntry = entries[entries.length - 1];
175
+ const lastRunMs = Date.parse(lastEntry.startedAt);
176
+ // Always consult the breaker state — a stuck breaker is the primary
177
+ // signal for "job has been silently broken for days".
178
+ const cb = lastCircuitBreakerEvent(jobName);
179
+ if (!cb.engagedAt && Number.isFinite(lastRunMs) && lastRunMs < dormantCutoffMs) {
180
+ continue;
181
+ }
182
+ const inWindow = entries.filter(e => {
183
+ const ts = Date.parse(e.startedAt);
184
+ return Number.isFinite(ts) && ts >= sinceMs;
185
+ });
186
+ const failures = inWindow.filter(isFailure);
187
+ // Consecutive-failure signal: scan from most recent entry backward.
188
+ // Stops at the first non-failure (ignoring 'skipped' which is neither
189
+ // signal). Catches daily jobs that fail every run without accumulating
190
+ // 3 in a 48h window.
191
+ let consecutiveFailures = 0;
192
+ for (let i = entries.length - 1; i >= 0; i--) {
193
+ const e = entries[i];
194
+ if (e.status === 'skipped')
195
+ continue;
196
+ if (isFailure(e))
197
+ consecutiveFailures++;
198
+ else
199
+ break;
200
+ }
201
+ const meetsThreshold = failures.length >= ERRORS_IN_WINDOW
202
+ || consecutiveFailures >= CONSECUTIVE_FAILURES
203
+ || !!cb.engagedAt;
204
+ if (!meetsThreshold)
205
+ continue;
206
+ // Gather up to 3 distinct error messages, newest first. Prefer in-window
207
+ // errors; if the breaker is engaged and there are no recent runs, fall
208
+ // back to the most recent errors anywhere in the log.
209
+ const errSource = failures.length > 0
210
+ ? failures
211
+ : entries.filter(isFailure);
212
+ const distinctErrors = [];
213
+ const seen = new Set();
214
+ for (let i = errSource.length - 1; i >= 0 && distinctErrors.length < 3; i--) {
215
+ const err = (errSource[i].error ?? '').trim();
216
+ if (!err)
217
+ continue;
218
+ const key = err.slice(0, 120);
219
+ if (seen.has(key))
220
+ continue;
221
+ seen.add(key);
222
+ distinctErrors.push(err.slice(0, 400));
223
+ }
224
+ const lastFailureEntry = failures[failures.length - 1] ?? errSource[errSource.length - 1] ?? null;
225
+ const agentSlug = jobName.includes(':') ? jobName.split(':')[0] : undefined;
226
+ broken.push({
227
+ jobName,
228
+ agentSlug,
229
+ errorCount48h: failures.length,
230
+ totalRuns48h: inWindow.length,
231
+ lastErrorAt: lastFailureEntry?.startedAt ?? null,
232
+ lastErrors: distinctErrors,
233
+ circuitBreakerEngagedAt: cb.engagedAt,
234
+ lastAdvisorOpinion: cb.lastOpinion,
235
+ });
236
+ }
237
+ // Also check the self-improve loop — it has its own log (not cron/runs/).
238
+ const siBroken = detectSelfImproveBreakage(now);
239
+ if (siBroken)
240
+ broken.push(siBroken);
241
+ // Most recently failing first
242
+ broken.sort((a, b) => {
243
+ const aT = a.lastErrorAt ? Date.parse(a.lastErrorAt) : 0;
244
+ const bT = b.lastErrorAt ? Date.parse(b.lastErrorAt) : 0;
245
+ return bT - aT;
246
+ });
247
+ return broken;
248
+ }
249
+ /**
250
+ * The self-improve loop writes to its own experiment-log.jsonl, not cron/runs/.
251
+ * Its breakage pattern is: state.lastRunAt keeps getting updated nightly but
252
+ * no new experiments are being appended (they're all failing pre-iteration),
253
+ * OR the most recent experiments are all errors, OR state.infraError is set.
254
+ *
255
+ * Returns a synthetic BrokenJob for the self-improve pseudo-job, or null if
256
+ * healthy / no data.
257
+ */
258
+ function detectSelfImproveBreakage(now) {
259
+ if (!existsSync(SELF_IMPROVE_STATE_FILE))
260
+ return null;
261
+ let state = {};
262
+ try {
263
+ state = JSON.parse(readFileSync(SELF_IMPROVE_STATE_FILE, 'utf-8'));
264
+ }
265
+ catch {
266
+ return null;
267
+ }
268
+ const experiments = [];
269
+ if (existsSync(SELF_IMPROVE_LOG_FILE)) {
270
+ try {
271
+ const lines = readFileSync(SELF_IMPROVE_LOG_FILE, 'utf-8').trim().split('\n').filter(Boolean);
272
+ for (const line of lines.slice(-10)) {
273
+ try {
274
+ experiments.push(JSON.parse(line));
275
+ }
276
+ catch { /* skip */ }
277
+ }
278
+ }
279
+ catch { /* non-fatal */ }
280
+ }
281
+ const lastRunMs = state.lastRunAt ? Date.parse(state.lastRunAt) : 0;
282
+ const lookback48h = now - 48 * 60 * 60 * 1000;
283
+ const staleLookback = now - 7 * 24 * 60 * 60 * 1000; // 7 days
284
+ const recentExperiments = experiments.filter(e => {
285
+ const ts = e.startedAt ? Date.parse(e.startedAt) : 0;
286
+ return Number.isFinite(ts) && ts >= staleLookback;
287
+ });
288
+ const recentErrors = recentExperiments.filter(e => e.approvalStatus === 'denied' && (e.reason?.startsWith('Error') ?? false));
289
+ // Three break modes:
290
+ // a. state.infraError is set (loop detected unfixable infra issue)
291
+ // b. all 3+ most recent experiments within lookback are errors
292
+ // c. loop ran recently but no new experiments appeared (silent early-exit)
293
+ const hasInfraError = !!state.infraError;
294
+ const allRecentErrored = recentExperiments.length >= 3
295
+ && recentExperiments.every(e => e.approvalStatus === 'denied');
296
+ const silentEarlyExit = lastRunMs > lookback48h
297
+ && recentExperiments.length === 0;
298
+ if (!hasInfraError && !allRecentErrored && !silentEarlyExit)
299
+ return null;
300
+ const lastErrors = [];
301
+ for (let i = experiments.length - 1; i >= 0 && lastErrors.length < 3; i--) {
302
+ const err = (experiments[i].error ?? '').trim();
303
+ if (!err)
304
+ continue;
305
+ lastErrors.push(err.slice(0, 400));
306
+ }
307
+ // If we don't have an explicit infraError but the last recorded error
308
+ // looks schema-related, surface it — this captures the state where all
309
+ // iterations died with the same API 400 but state.infraError never got
310
+ // persisted (happens when MAX_INFRA_ERRORS isn't crossed within a run).
311
+ const lastLoggedError = experiments.length > 0 ? (experiments[experiments.length - 1].error ?? '') : '';
312
+ const inferredInfraSchema = /input_schema|tools\.\d+\.custom/i.test(lastLoggedError);
313
+ let opinion;
314
+ if (hasInfraError) {
315
+ opinion = `infra: ${state.infraError.category} — ${state.infraError.diagnostic.slice(0, 200)}`;
316
+ }
317
+ else if (silentEarlyExit && inferredInfraSchema) {
318
+ opinion = 'loop ran but produced no experiments — last logged error was an MCP tool schema validation (API 400). Check external MCP servers (claude_desktop_config.json, Claude Code settings) for a recently-updated package exposing a malformed input_schema.';
319
+ }
320
+ else if (silentEarlyExit) {
321
+ opinion = 'loop ran but produced no experiments — likely crashing before iteration (check metrics gathering or hypothesis generation)';
322
+ }
323
+ else {
324
+ opinion = `${recentErrors.length}/${recentExperiments.length} recent iterations errored`;
325
+ }
326
+ return {
327
+ jobName: 'self-improve',
328
+ agentSlug: undefined,
329
+ errorCount48h: recentErrors.length,
330
+ totalRuns48h: recentExperiments.length,
331
+ lastErrorAt: experiments[experiments.length - 1]?.startedAt ?? state.lastRunAt ?? null,
332
+ lastErrors,
333
+ circuitBreakerEngagedAt: hasInfraError ? state.lastRunAt ?? null : null,
334
+ lastAdvisorOpinion: opinion,
335
+ };
336
+ }
337
+ /** Format a broken-job report for the owner DM. */
338
+ function formatReport(jobs) {
339
+ const lines = [];
340
+ lines.push(`🚨 **${jobs.length} cron job${jobs.length === 1 ? '' : 's'} repeatedly failing** (last ${WINDOW_HOURS}h)`);
341
+ lines.push('');
342
+ for (const j of jobs) {
343
+ const breaker = j.circuitBreakerEngagedAt ? ' · circuit breaker engaged' : '';
344
+ lines.push(`• \`${j.jobName}\` — ${j.errorCount48h}/${j.totalRuns48h} runs failed${breaker}`);
345
+ if (j.lastErrors.length > 0) {
346
+ const preview = j.lastErrors[0].split('\n')[0].slice(0, 140);
347
+ lines.push(` Last error: ${preview}`);
348
+ }
349
+ if (j.lastAdvisorOpinion) {
350
+ lines.push(` Advisor: ${j.lastAdvisorOpinion.slice(0, 140)}`);
351
+ }
352
+ }
353
+ lines.push('');
354
+ lines.push('Open the dashboard → Broken Jobs panel for the full picture.');
355
+ return lines.join('\n');
356
+ }
357
+ /**
358
+ * Run a sweep: identify currently-broken jobs, pick the ones we haven't
359
+ * notified about recently, and dispatch one consolidated DM.
360
+ *
361
+ * Returns the jobs that triggered a fresh notification (mostly for tests/logs).
362
+ */
363
+ export async function runFailureSweep(send, now = Date.now()) {
364
+ const broken = computeBrokenJobs(now);
365
+ if (broken.length === 0) {
366
+ // Clear cooldowns for jobs that recovered so future failures notify promptly.
367
+ const state = loadState();
368
+ let mutated = false;
369
+ for (const name of Object.keys(state.notified)) {
370
+ if (!broken.find(b => b.jobName === name)) {
371
+ delete state.notified[name];
372
+ mutated = true;
373
+ }
374
+ }
375
+ if (mutated)
376
+ saveState(state);
377
+ return [];
378
+ }
379
+ const state = loadState();
380
+ const cooldownMs = NOTIFY_COOLDOWN_HOURS * 60 * 60 * 1000;
381
+ const fresh = [];
382
+ for (const job of broken) {
383
+ const prev = state.notified[job.jobName];
384
+ if (prev && now - Date.parse(prev.lastNotifiedAt) < cooldownMs)
385
+ continue;
386
+ fresh.push(job);
387
+ }
388
+ if (fresh.length === 0)
389
+ return [];
390
+ try {
391
+ await send(formatReport(fresh));
392
+ const stamp = new Date(now).toISOString();
393
+ for (const job of fresh) {
394
+ state.notified[job.jobName] = { lastNotifiedAt: stamp, lastErrorCount: job.errorCount48h };
395
+ }
396
+ saveState(state);
397
+ appendAuditLog('notified', fresh.map(j => j.jobName));
398
+ logger.info({ count: fresh.length, jobs: fresh.map(j => j.jobName) }, 'Failure monitor: notified owner');
399
+ }
400
+ catch (err) {
401
+ logger.warn({ err }, 'Failure monitor: notification dispatch failed');
402
+ }
403
+ return fresh;
404
+ }
405
+ function appendAuditLog(action, jobNames) {
406
+ try {
407
+ const auditPath = path.join(BASE_DIR, 'cron', 'failure-monitor.log');
408
+ appendFileSync(auditPath, JSON.stringify({
409
+ action,
410
+ jobs: jobNames,
411
+ timestamp: new Date().toISOString(),
412
+ }) + '\n');
413
+ }
414
+ catch { /* non-fatal */ }
415
+ }
416
+ //# sourceMappingURL=failure-monitor.js.map
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Clementine TypeScript — Cron fix verification.
3
+ *
4
+ * When a CRON.md (global or per-agent) is edited, we record a "pending
5
+ * verification" for any job whose definition changed AND that is currently
6
+ * in a failing state. After that job's next non-skipped run, we DM the
7
+ * owner with the verdict — succeeded or still failing — so a self-reported
8
+ * "fix" can't go untested again.
9
+ */
10
+ import type { CronJobDefinition, CronRunEntry } from '../types.js';
11
+ interface PendingVerification {
12
+ jobName: string;
13
+ recordedAt: string;
14
+ preFailureCount: number;
15
+ preLastError: string | null;
16
+ }
17
+ /**
18
+ * Compare an old and new jobs list and record verifications for any job that:
19
+ * - exists in both lists (new jobs aren't "fixes" of existing problems)
20
+ * - has its definition hash changed
21
+ * - is currently in a failing state per failure-monitor
22
+ *
23
+ * Disabled jobs and removed jobs are tracked too: if a previously failing
24
+ * job gets disabled or removed in the edit, we surface that as a "removed
25
+ * pending verification" rather than waiting for a run that will never come.
26
+ */
27
+ export declare function recordEditsForFailingJobs(oldJobs: CronJobDefinition[], newJobs: CronJobDefinition[]): void;
28
+ /**
29
+ * After a cron run completes, check whether we were waiting on a fix
30
+ * verification for this job. If so, send the owner a verdict and clear it.
31
+ *
32
+ * Skipped runs (circuit breaker, pre-check exit, etc.) don't carry signal
33
+ * and shouldn't count as a verdict either way.
34
+ */
35
+ export declare function checkAndDeliverVerification(entry: CronRunEntry, send: (text: string) => Promise<unknown>): Promise<void>;
36
+ /** Read-only accessor for dashboards or debugging. */
37
+ export declare function listPendingVerifications(): PendingVerification[];
38
+ export {};
39
+ //# sourceMappingURL=fix-verification.d.ts.map
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Clementine TypeScript — Cron fix verification.
3
+ *
4
+ * When a CRON.md (global or per-agent) is edited, we record a "pending
5
+ * verification" for any job whose definition changed AND that is currently
6
+ * in a failing state. After that job's next non-skipped run, we DM the
7
+ * owner with the verdict — succeeded or still failing — so a self-reported
8
+ * "fix" can't go untested again.
9
+ */
10
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, } from 'node:fs';
11
+ import path from 'node:path';
12
+ import crypto from 'node:crypto';
13
+ import pino from 'pino';
14
+ import { BASE_DIR } from '../config.js';
15
+ import { computeBrokenJobs } from './failure-monitor.js';
16
+ const logger = pino({ name: 'clementine.fix-verification' });
17
+ const STATE_FILE = path.join(BASE_DIR, 'cron', 'fix-verifications.json');
18
+ function loadState() {
19
+ try {
20
+ if (!existsSync(STATE_FILE))
21
+ return { pending: {} };
22
+ const raw = JSON.parse(readFileSync(STATE_FILE, 'utf-8'));
23
+ return { pending: raw.pending ?? {} };
24
+ }
25
+ catch {
26
+ return { pending: {} };
27
+ }
28
+ }
29
+ function saveState(state) {
30
+ try {
31
+ mkdirSync(path.dirname(STATE_FILE), { recursive: true });
32
+ writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
33
+ }
34
+ catch (err) {
35
+ logger.warn({ err }, 'Failed to persist fix-verification state');
36
+ }
37
+ }
38
+ /**
39
+ * Hash the job fields a fix could touch. Schedule + prompt + tier + mode +
40
+ * model + maxTurns + maxHours + workDir + preCheck + successCriteria are the
41
+ * only fields a "fix" would realistically change. We deliberately ignore
42
+ * `enabled` because disabling isn't a fix.
43
+ */
44
+ function jobHash(j) {
45
+ const data = JSON.stringify({
46
+ schedule: j.schedule,
47
+ prompt: j.prompt,
48
+ tier: j.tier,
49
+ maxTurns: j.maxTurns,
50
+ model: j.model,
51
+ workDir: j.workDir,
52
+ mode: j.mode,
53
+ maxHours: j.maxHours,
54
+ preCheck: j.preCheck,
55
+ successCriteria: j.successCriteria,
56
+ });
57
+ return crypto.createHash('sha1').update(data).digest('hex').slice(0, 12);
58
+ }
59
+ /**
60
+ * Compare an old and new jobs list and record verifications for any job that:
61
+ * - exists in both lists (new jobs aren't "fixes" of existing problems)
62
+ * - has its definition hash changed
63
+ * - is currently in a failing state per failure-monitor
64
+ *
65
+ * Disabled jobs and removed jobs are tracked too: if a previously failing
66
+ * job gets disabled or removed in the edit, we surface that as a "removed
67
+ * pending verification" rather than waiting for a run that will never come.
68
+ */
69
+ export function recordEditsForFailingJobs(oldJobs, newJobs) {
70
+ const oldByName = new Map(oldJobs.map(j => [j.name, j]));
71
+ const newByName = new Map(newJobs.map(j => [j.name, j]));
72
+ const broken = computeBrokenJobs();
73
+ const brokenByName = new Map(broken.map(b => [b.jobName, b]));
74
+ const state = loadState();
75
+ const stamp = new Date().toISOString();
76
+ let mutated = false;
77
+ for (const [name, oj] of oldByName) {
78
+ const b = brokenByName.get(name);
79
+ if (!b)
80
+ continue; // not currently broken — nothing to verify
81
+ const nj = newByName.get(name);
82
+ if (!nj) {
83
+ // Job removed entirely. Treat as resolved by removal.
84
+ delete state.pending[name];
85
+ mutated = true;
86
+ logger.info({ job: name }, 'Failing job removed from CRON.md — verification cleared');
87
+ continue;
88
+ }
89
+ if (!nj.enabled) {
90
+ // Job disabled. Don't wait for a run; clear and note.
91
+ delete state.pending[name];
92
+ mutated = true;
93
+ logger.info({ job: name }, 'Failing job disabled in CRON.md — verification cleared');
94
+ continue;
95
+ }
96
+ if (jobHash(oj) === jobHash(nj))
97
+ continue; // no relevant changes
98
+ state.pending[name] = {
99
+ jobName: name,
100
+ recordedAt: stamp,
101
+ preFailureCount: b.errorCount48h,
102
+ preLastError: b.lastErrors[0] ?? null,
103
+ };
104
+ mutated = true;
105
+ logger.info({ job: name, preFailureCount: b.errorCount48h }, 'Recorded pending fix verification');
106
+ }
107
+ if (mutated)
108
+ saveState(state);
109
+ }
110
+ /**
111
+ * After a cron run completes, check whether we were waiting on a fix
112
+ * verification for this job. If so, send the owner a verdict and clear it.
113
+ *
114
+ * Skipped runs (circuit breaker, pre-check exit, etc.) don't carry signal
115
+ * and shouldn't count as a verdict either way.
116
+ */
117
+ export async function checkAndDeliverVerification(entry, send) {
118
+ if (entry.status === 'skipped')
119
+ return;
120
+ const state = loadState();
121
+ const pending = state.pending[entry.jobName];
122
+ if (!pending)
123
+ return;
124
+ delete state.pending[entry.jobName];
125
+ saveState(state);
126
+ const ok = entry.status === 'ok';
127
+ const verdict = ok ? '✅ succeeded' : '⚠️ still failing';
128
+ const ageMin = Math.max(1, Math.round((Date.now() - Date.parse(pending.recordedAt)) / 60000));
129
+ const detail = ok
130
+ ? ''
131
+ : `\nError: ${(entry.error ?? 'unknown').split('\n')[0].slice(0, 200)}`;
132
+ const msg = `**[Fix verification]** \`${entry.jobName}\` ${verdict} on its first run after edit (${ageMin}m later).${detail}`;
133
+ try {
134
+ await send(msg);
135
+ }
136
+ catch (err) {
137
+ logger.warn({ err, job: entry.jobName }, 'Failed to send fix verification DM');
138
+ }
139
+ }
140
+ /** Read-only accessor for dashboards or debugging. */
141
+ export function listPendingVerifications() {
142
+ return Object.values(loadState().pending);
143
+ }
144
+ //# sourceMappingURL=fix-verification.js.map
@@ -103,6 +103,13 @@ export class HeartbeatScheduler {
103
103
  catch (err) {
104
104
  logger.warn({ err }, 'Session eviction failed');
105
105
  }
106
+ // Cron failure sweep — surface jobs that have been silently failing.
107
+ // Runs every tick; per-job 24h cooldown lives inside the monitor.
108
+ import('./failure-monitor.js').then(({ runFailureSweep }) => {
109
+ runFailureSweep((text) => this.dispatcher.send(text, {})).catch(err => {
110
+ logger.warn({ err }, 'Failure sweep failed');
111
+ });
112
+ }).catch(err => logger.warn({ err }, 'Failure sweep import failed'));
106
113
  const now = new Date();
107
114
  const hour = now.getHours();
108
115
  // ── Nightly tasks: run regardless of active hours ─────────────────
@@ -626,10 +633,41 @@ export class HeartbeatScheduler {
626
633
  const prompt = buildInsightPrompt(signals);
627
634
  if (!prompt)
628
635
  return;
629
- // Run lightweight LLM call via gateway
630
- const response = await this.gateway.handleCronJob('insight-check', prompt, 1, // tier 1
631
- 1, // max 1 turn (just rating + message)
632
- 'haiku');
636
+ // Run lightweight LLM call via gateway. Log success AND failure to the
637
+ // cron run log so the failure monitor can see hourly breakage.
638
+ // maxTurns bumped 1 3 because the agent needs to fan out ~4 parallel
639
+ // tool calls (activity_history, outlook_inbox, goal_list, task_list)
640
+ // before composing its rating — at 1 turn it always crashes with
641
+ // "Reached maximum number of turns".
642
+ const icStartedAt = new Date();
643
+ let response = null;
644
+ try {
645
+ response = await this.gateway.handleCronJob('insight-check', prompt, 1, // tier 1
646
+ 3, // max 3 turns (parallel tool fan-out + synthesis)
647
+ 'haiku');
648
+ this.runLog.append({
649
+ jobName: 'insight-check',
650
+ startedAt: icStartedAt.toISOString(),
651
+ finishedAt: new Date().toISOString(),
652
+ status: 'ok',
653
+ durationMs: Date.now() - icStartedAt.getTime(),
654
+ attempt: 1,
655
+ outputPreview: (response ?? '').slice(0, 200),
656
+ });
657
+ }
658
+ catch (err) {
659
+ this.runLog.append({
660
+ jobName: 'insight-check',
661
+ startedAt: icStartedAt.toISOString(),
662
+ finishedAt: new Date().toISOString(),
663
+ status: 'error',
664
+ durationMs: Date.now() - icStartedAt.getTime(),
665
+ attempt: 1,
666
+ error: String(err).slice(0, 400),
667
+ errorType: 'transient',
668
+ });
669
+ throw err;
670
+ }
633
671
  if (!response)
634
672
  return;
635
673
  const insight = parseInsightResponse(response);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.14",
3
+ "version": "1.0.15",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",