clementine-agent 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1340,6 +1340,32 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
1340
1340
  parts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
1341
1341
  }
1342
1342
  }
1343
+ // Inject recent feedback signals (closes the feedback → behavior loop).
1344
+ // Without this block, user thumbs-down + comments live in the feedback
1345
+ // table and never reach the agent's awareness — only the skill-suppress
1346
+ // filter consumed them. We surface aggregates + the last few commented
1347
+ // negatives so the agent can self-adjust on the next turn. Skipped when
1348
+ // there's nothing to report (no noise).
1349
+ if (this.memoryStore?.getRecentFeedbackSignals) {
1350
+ try {
1351
+ const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
1352
+ if (sig.negative > 0) {
1353
+ const lines = [];
1354
+ const total = sig.positive + sig.negative;
1355
+ const ratio = total > 0 ? Math.round((sig.negative / total) * 100) : 0;
1356
+ lines.push(`Last 14 days: ${sig.negative} negative / ${sig.positive} positive (${ratio}% negative).`);
1357
+ if (sig.negativesWithComments.length > 0) {
1358
+ lines.push('Recent negative comments — adjust accordingly:');
1359
+ for (const n of sig.negativesWithComments) {
1360
+ const comment = n.comment.length > 200 ? n.comment.slice(0, 200) + '…' : n.comment;
1361
+ lines.push(`- (${n.channel}) ${comment}`);
1362
+ }
1363
+ }
1364
+ parts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
1365
+ }
1366
+ }
1367
+ catch { /* non-fatal */ }
1368
+ }
1343
1369
  // Proactive skill injection: match user message against skill triggers
1344
1370
  if (this._lastUserMessage && !isAutonomous) {
1345
1371
  try {
@@ -48,6 +48,13 @@ export interface FixRecipe {
48
48
  category: FixCategory;
49
49
  /** Description of what this fix does, for DMs. */
50
50
  description: string;
51
+ /**
52
+ * Frontmatter keys this recipe may touch. Used to snapshot prior values
53
+ * before apply() runs so an ineffective fix can be reverted by post-fix
54
+ * verification without restoring fields the recipe never owned. Required
55
+ * for safe-cron-config recipes that participate in autoApply verification.
56
+ */
57
+ fields?: readonly string[];
51
58
  /**
52
59
  * For safe-cron-config: a function that mutates the job's frontmatter
53
60
  * entry in-place. Returns true if any change was made (false = idempotent
@@ -55,6 +55,7 @@ const PATTERNS = [
55
55
  recipe: () => ({
56
56
  category: 'safe-cron-config',
57
57
  description: 'Hit max-turns ceiling repeatedly. Switching to unleashed mode (multi-phase) so the job can complete its workflow.',
58
+ fields: ['mode', 'max_hours'],
58
59
  apply: (job) => {
59
60
  let changed = false;
60
61
  if (job.mode !== 'unleashed') {
@@ -201,17 +202,26 @@ function loadCronJob(trigger, cronPath, agentsDir) {
201
202
  /**
202
203
  * Apply the recipe's mutator to the job's frontmatter and write the CRON.md
203
204
  * (central or agent-scoped, whichever the lookup resolved to) back atomically.
204
- * Returns true if a change was actually written.
205
+ * Returns the captured prevFields snapshot when a change was written, or
206
+ * null when no change was needed (idempotent re-apply). prevFields uses
207
+ * `null` to represent "field was absent before the fix" — the revert path
208
+ * deletes the key in that case.
205
209
  */
206
210
  function applyCronEdit(lookup, recipe) {
207
211
  if (!recipe.apply)
208
- return false;
212
+ return null;
213
+ // Snapshot only the fields the recipe declared it would touch — over-broad
214
+ // snapshots would clobber concurrent edits during a revert.
215
+ const prevFields = {};
216
+ for (const key of recipe.fields ?? []) {
217
+ prevFields[key] = key in lookup.job ? lookup.job[key] : null;
218
+ }
209
219
  const changed = recipe.apply(lookup.job);
210
220
  if (!changed)
211
- return false;
221
+ return null;
212
222
  const updated = matter.stringify(lookup.parsed.content, lookup.parsed.data);
213
223
  writeFileSync(lookup.cronPath, updated);
214
- return true;
224
+ return prevFields;
215
225
  }
216
226
  function writePendingChange(record, dir) {
217
227
  mkdirSync(dir, { recursive: true });
@@ -370,9 +380,25 @@ export class SelfImproveLoop {
370
380
  logger.warn({ jobName: trigger.jobName, agentSlug }, 'Job not found in any CRON.md — cannot apply fix');
371
381
  return;
372
382
  }
373
- const applied = applyCronEdit(lookup, recipe);
374
- if (applied) {
383
+ const prevFields = applyCronEdit(lookup, recipe);
384
+ if (prevFields) {
375
385
  counts.applied++;
386
+ // Register the edit for post-fix verification. The verifier watches
387
+ // the next AUTOAPPLY_VERDICT_WINDOW non-skipped runs and reverts
388
+ // prevFields if 0 succeed. Lazy import avoids pulling the gateway
389
+ // graph into the agent layer at module-load time.
390
+ try {
391
+ const { recordAutoApplyForVerification } = await import('../gateway/fix-verification.js');
392
+ recordAutoApplyForVerification(trigger.jobName, {
393
+ kind: 'cron-config',
394
+ file: lookup.cronPath,
395
+ bareName: lookup.bareName,
396
+ prevFields,
397
+ });
398
+ }
399
+ catch (err) {
400
+ logger.warn({ err, jobName: trigger.jobName }, 'Failed to register cron-config autoApply for verification (non-fatal)');
401
+ }
376
402
  const where = lookup.agentSlug
377
403
  ? `\`agents/${lookup.agentSlug}/CRON.md\``
378
404
  : '`CRON.md`';
@@ -381,7 +407,7 @@ export class SelfImproveLoop {
381
407
  '',
382
408
  recipe.description,
383
409
  '',
384
- `Edit applied to ${where}. I'll watch the next run to confirm it lands cleanly.`,
410
+ `Edit applied to ${where}. Verifying over the next 3 runs I'll revert automatically if it doesn't help.`,
385
411
  ].join('\n'));
386
412
  }
387
413
  else {
@@ -32,5 +32,24 @@ export declare function cmdBrowserEnable(): Promise<void>;
32
32
  */
33
33
  export declare function maybePromptBrowserHarness(): Promise<void>;
34
34
  export declare function cmdBrowserDisable(): Promise<void>;
35
+ /**
36
+ * Non-interactive connect — meant for callers that aren't a TTY (MCP tool,
37
+ * daemon-internal callers). Returns a structured result instead of prompting
38
+ * or printing decorative output. Caller decides how to surface failures.
39
+ *
40
+ * Behavior:
41
+ * - CDP already up → { ok: true, alreadyConnected: true }
42
+ * - No Chrome running → launch with flag, poll, return result
43
+ * - Chrome running without flag → if allowQuitChrome=false, refuse with
44
+ * a clear message; if true, quit + relaunch (DESTRUCTIVE — closes tabs).
45
+ */
46
+ export declare function runConnectNonInteractive(opts?: {
47
+ allowQuitChrome?: boolean;
48
+ }): Promise<{
49
+ ok: boolean;
50
+ message: string;
51
+ alreadyConnected?: boolean;
52
+ needsForceQuit?: boolean;
53
+ }>;
35
54
  export declare function cmdBrowserConnect(): Promise<void>;
36
55
  //# sourceMappingURL=browser.d.ts.map
@@ -354,11 +354,85 @@ export async function cmdBrowserDisable() {
354
354
  console.log();
355
355
  }
356
356
  /**
357
- * Core connect logic quits any running Chrome and relaunches with
358
- * --remote-debugging-port=9222 so browser-harness can connect.
357
+ * Non-interactive connect — meant for callers that aren't a TTY (MCP tool,
358
+ * daemon-internal callers). Returns a structured result instead of prompting
359
+ * or printing decorative output. Caller decides how to surface failures.
359
360
  *
360
- * Returns true when CDP is reachable on :9222 at the end, false otherwise.
361
- * Never calls process.exit so it's safe to call from the auto-prompt flow.
361
+ * Behavior:
362
+ * - CDP already up { ok: true, alreadyConnected: true }
363
+ * - No Chrome running → launch with flag, poll, return result
364
+ * - Chrome running without flag → if allowQuitChrome=false, refuse with
365
+ * a clear message; if true, quit + relaunch (DESTRUCTIVE — closes tabs).
366
+ */
367
+ export async function runConnectNonInteractive(opts = {}) {
368
+ if (await probeCdp()) {
369
+ return { ok: true, alreadyConnected: true, message: 'Already connected — Chrome is running with remote debugging on :9222.' };
370
+ }
371
+ if (process.platform !== 'darwin' && process.platform !== 'linux') {
372
+ return {
373
+ ok: false,
374
+ message: 'Auto-connect is only supported on macOS and Linux. Launch Chrome manually with --remote-debugging-port=9222.',
375
+ };
376
+ }
377
+ if (isChromeRunning() && !opts.allowQuitChrome) {
378
+ return {
379
+ ok: false,
380
+ needsForceQuit: true,
381
+ message: 'Chrome is running without remote debugging. Connecting requires quitting Chrome and relaunching with --remote-debugging-port=9222 (this closes your current Chrome windows). Re-run with force_quit=true to proceed, or quit Chrome yourself first and call this again.',
382
+ };
383
+ }
384
+ if (isChromeRunning() && opts.allowQuitChrome) {
385
+ try {
386
+ if (process.platform === 'darwin') {
387
+ execSync('osascript -e \'tell application "Google Chrome" to quit\'', { stdio: 'pipe' });
388
+ }
389
+ else {
390
+ try {
391
+ execSync('pkill -TERM -x "google-chrome|chromium|chrome"', { stdio: 'pipe' });
392
+ }
393
+ catch { /* ok */ }
394
+ }
395
+ for (let i = 0; i < 15; i++) {
396
+ if (!isChromeRunning())
397
+ break;
398
+ await new Promise(r => setTimeout(r, 300));
399
+ }
400
+ }
401
+ catch {
402
+ return { ok: false, message: 'Failed to quit Chrome. Quit it manually and try again.' };
403
+ }
404
+ }
405
+ try {
406
+ if (process.platform === 'darwin') {
407
+ execSync('open -na "Google Chrome" --args --remote-debugging-port=9222', { stdio: 'pipe' });
408
+ }
409
+ else {
410
+ const candidates = ['google-chrome', 'chromium', 'chrome'];
411
+ const bin = candidates.find(commandExists);
412
+ if (!bin) {
413
+ return { ok: false, message: 'No Chrome / Chromium binary found in PATH.' };
414
+ }
415
+ execSync(`nohup ${bin} --remote-debugging-port=9222 >/dev/null 2>&1 &`, { stdio: 'pipe' });
416
+ }
417
+ }
418
+ catch (e) {
419
+ return { ok: false, message: `Failed to launch Chrome: ${String(e).slice(0, 200)}` };
420
+ }
421
+ for (let i = 0; i < 24; i++) {
422
+ await new Promise(r => setTimeout(r, 250));
423
+ if (await probeCdp()) {
424
+ return { ok: true, message: 'Connected — Chrome is running with remote debugging on :9222.' };
425
+ }
426
+ }
427
+ return {
428
+ ok: false,
429
+ message: 'Chrome launched, but CDP socket isn\'t responding yet. Check that Chrome started successfully, then verify with: curl http://localhost:9222/json/version',
430
+ };
431
+ }
432
+ /**
433
+ * Interactive CLI connect — wraps runConnectNonInteractive with TTY prompts
434
+ * and decorative output. Used by `clementine browser connect` and the auto-
435
+ * prompt flow.
362
436
  */
363
437
  async function runConnect(opts = {}) {
364
438
  // 1. Already connected? Done.
@@ -6603,6 +6603,15 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
6603
6603
  const stateFile = path.join(siDir, 'state.json');
6604
6604
  const logFile = path.join(siDir, 'experiment-log.jsonl');
6605
6605
  const pendingDir = path.join(siDir, 'pending-changes');
6606
+ // Active failure triggers — written by cron-scheduler when a job hits
6607
+ // 3+ consecutive errors; consumed by self-improve-loop on its next tick.
6608
+ // Surfacing them here gives the user a "work in progress" view between
6609
+ // tick boundaries (event-driven debounce + 1h fallback).
6610
+ const triggersDir = path.join(siDir, 'triggers');
6611
+ // Pending fix verifications — auto-applied fixes that are soaking
6612
+ // through the 3-run verdict window (cron-config / advisor-rule /
6613
+ // prompt-override). Reverts automatically if 0 succeed.
6614
+ const verificationsFile = path.join(BASE_DIR, 'cron', 'fix-verifications.json');
6606
6615
  let state = null;
6607
6616
  if (existsSync(stateFile)) {
6608
6617
  try {
@@ -6632,7 +6641,29 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
6632
6641
  }
6633
6642
  catch { /* ignore */ }
6634
6643
  }
6635
- res.json({ state, experiments, pending });
6644
+ let triggers = [];
6645
+ if (existsSync(triggersDir)) {
6646
+ try {
6647
+ triggers = readdirSync(triggersDir).filter(f => f.endsWith('.json'))
6648
+ .map(f => { try {
6649
+ return JSON.parse(readFileSync(path.join(triggersDir, f), 'utf-8'));
6650
+ }
6651
+ catch {
6652
+ return null;
6653
+ } })
6654
+ .filter(Boolean);
6655
+ }
6656
+ catch { /* ignore */ }
6657
+ }
6658
+ let verifications = [];
6659
+ if (existsSync(verificationsFile)) {
6660
+ try {
6661
+ const raw = JSON.parse(readFileSync(verificationsFile, 'utf-8'));
6662
+ verifications = Object.values(raw.pending ?? {});
6663
+ }
6664
+ catch { /* ignore */ }
6665
+ }
6666
+ res.json({ state, experiments, pending, triggers, verifications });
6636
6667
  });
6637
6668
  app.post('/api/self-improve/run', async (_req, res) => {
6638
6669
  try {
@@ -12930,14 +12961,34 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
12930
12961
  </div>
12931
12962
  </div>
12932
12963
  <div class="tab-pane" id="tab-intelligence-learning">
12933
- <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:12px">
12934
- <div style="font-size:13px;color:var(--text-secondary)">Self-improvement runs nightly at 1 AM. You can also trigger it manually.</div>
12964
+ <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:12px;gap:12px;flex-wrap:wrap">
12965
+ <div style="font-size:13px;color:var(--text-secondary);max-width:680px">
12966
+ Self-improvement runs nightly at 1 AM. The autonomous loop also auto-fixes failing crons (3+ consecutive errors) and verifies each fix over the next 3 runs &mdash; reverting automatically if it doesn't help.
12967
+ </div>
12935
12968
  <button class="btn-sm btn-primary" onclick="siRunCycle()" id="si-run-btn">Run Now</button>
12936
12969
  </div>
12937
12970
  <div class="grid-2" id="si-status-cards">
12938
12971
  <div class="skel-block"><div class="skel-row med"></div><div class="skel-row short"></div></div>
12939
12972
  <div class="skel-block"><div class="skel-row med"></div><div class="skel-row short"></div></div>
12940
12973
  </div>
12974
+ <div class="card" style="margin-top:16px">
12975
+ <div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
12976
+ <span>Active failures</span>
12977
+ <span class="tab-badge" id="tab-si-triggers" style="display:none;background:#ef4444;color:#fff">0</span>
12978
+ </div>
12979
+ <div class="card-body" id="si-triggers-list" style="padding:0">
12980
+ <div class="empty-state" style="padding:14px">No active failures &mdash; nothing has tripped 3+ consecutive errors.</div>
12981
+ </div>
12982
+ </div>
12983
+ <div class="card" style="margin-top:16px">
12984
+ <div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
12985
+ <span>Verifying fixes</span>
12986
+ <span class="tab-badge" id="tab-si-verifying" style="display:none;background:#f59e0b;color:#000">0</span>
12987
+ </div>
12988
+ <div class="card-body" id="si-verifying-list" style="padding:0">
12989
+ <div class="empty-state" style="padding:14px">No fixes currently soaking. Auto-fixes are verified over 3 runs and reverted if 0 succeed.</div>
12990
+ </div>
12991
+ </div>
12941
12992
  <div class="card" style="margin-top:16px">
12942
12993
  <div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
12943
12994
  <span>Pending Proposals</span>
@@ -21291,6 +21342,7 @@ async function refreshMemoryHealth() {
21291
21342
  html += '<div style="flex:1;min-width:240px">';
21292
21343
  html += '<div style="font-weight:600;margin-bottom:4px">Retrieval running on sparse vectors for ' + missing.toLocaleString() + ' chunks</div>';
21293
21344
  html += '<div style="font-size:12px;color:var(--text-muted)">Backfill builds 768-dim neural embeddings for semantic search. First run downloads ~440MB.</div>';
21345
+ html += '<div style="font-size:11px;color:var(--text-muted);margin-top:4px">Auto-backfill runs every 6h (~100 chunks/cycle). Use the buttons below to push faster.</div>';
21294
21346
  html += '</div>';
21295
21347
  html += '<button class="btn-sm" onclick="memoryHealthAction(\\'reembed-dense\\', { limit: 200 })" title="Embed up to 200 chunks now">Backfill 200</button>';
21296
21348
  html += '<button class="btn-sm" onclick="memoryHealthAction(\\'reembed-dense\\', { limit: 2000 })" title="Embed up to 2000 chunks now (slower)">Backfill 2000</button>';
@@ -24409,12 +24461,77 @@ async function refreshSelfImprove() {
24409
24461
  const state = d.state;
24410
24462
  const experiments = d.experiments || [];
24411
24463
  const pending = d.pending || [];
24464
+ const triggers = d.triggers || [];
24465
+ const verifications = d.verifications || [];
24412
24466
 
24413
- // Update tab badge
24467
+ // Update tab badge — combine human-attention queues so the sidebar
24468
+ // count reflects "things that need you to look at", not just proposals.
24469
+ const attentionCount = pending.length + triggers.length;
24414
24470
  const badge = document.getElementById('nav-si-pending');
24415
- if (badge) badge.textContent = pending.length || '0';
24471
+ if (badge) badge.textContent = attentionCount || '0';
24416
24472
  var _sib = document.getElementById('tab-si-pending');
24417
24473
  if (_sib) { _sib.textContent = pending.length || '0'; _sib.style.display = pending.length > 0 ? '' : 'none'; }
24474
+ var _sit = document.getElementById('tab-si-triggers');
24475
+ if (_sit) { _sit.textContent = triggers.length || '0'; _sit.style.display = triggers.length > 0 ? '' : 'none'; }
24476
+ var _siv = document.getElementById('tab-si-verifying');
24477
+ if (_siv) { _siv.textContent = verifications.length || '0'; _siv.style.display = verifications.length > 0 ? '' : 'none'; }
24478
+
24479
+ // Active failure triggers — jobs at 3+ consecutive errors; the loop
24480
+ // will pick these up on its next tick (event-driven; ~2s debounce).
24481
+ const triggersEl = document.getElementById('si-triggers-list');
24482
+ if (triggersEl) {
24483
+ if (triggers.length === 0) {
24484
+ triggersEl.innerHTML = '<div class="empty-state" style="padding:14px">No active failures &mdash; nothing has tripped 3+ consecutive errors.</div>';
24485
+ } else {
24486
+ triggersEl.innerHTML = triggers.map(function(t) {
24487
+ var owner = t.agentSlug ? '@' + esc(t.agentSlug) : 'global';
24488
+ var when = t.triggeredAt ? new Date(t.triggeredAt).toLocaleString() : '—';
24489
+ var firstError = (t.recentErrors && t.recentErrors[0]) ? String(t.recentErrors[0]).slice(0, 200) : '';
24490
+ return '<div style="padding:12px;border-bottom:1px solid var(--border)">' +
24491
+ '<div style="display:flex;justify-content:space-between;align-items:baseline;gap:8px;flex-wrap:wrap">' +
24492
+ '<div><strong>' + esc(t.jobName || '—') + '</strong> ' +
24493
+ '<span style="font-size:11px;color:var(--text-muted)">&middot; owner: ' + owner + '</span> ' +
24494
+ '<span style="font-size:11px;color:var(--danger,#ef4444)">&middot; ' + (t.consecutiveErrors || 0) + ' consecutive errors</span></div>' +
24495
+ '<span style="font-size:11px;color:var(--text-muted)">' + esc(when) + '</span>' +
24496
+ '</div>' +
24497
+ (firstError ? '<div style="margin-top:6px;font-size:12px;color:var(--text-secondary);font-family:ui-monospace,monospace">' + esc(firstError) + '</div>' : '') +
24498
+ '</div>';
24499
+ }).join('');
24500
+ }
24501
+ }
24502
+
24503
+ // Pending fix verifications — auto-fixes soaking through the 3-run window.
24504
+ const verifyEl = document.getElementById('si-verifying-list');
24505
+ if (verifyEl) {
24506
+ if (verifications.length === 0) {
24507
+ verifyEl.innerHTML = '<div class="empty-state" style="padding:14px">No fixes currently soaking. Auto-fixes are verified over 3 runs and reverted if 0 succeed.</div>';
24508
+ } else {
24509
+ verifyEl.innerHTML = verifications.map(function(v) {
24510
+ var outcomes = v.postRunOutcomes || [];
24511
+ var dots = '';
24512
+ for (var i = 0; i < 3; i++) {
24513
+ var o = outcomes[i];
24514
+ var color = o === 'ok' ? 'var(--success,#10b981)' : o === 'error' || o === 'retried' ? 'var(--danger,#ef4444)' : 'var(--border)';
24515
+ dots += '<span title="' + (o || 'pending') + '" style="display:inline-block;width:10px;height:10px;border-radius:50%;background:' + color + ';margin-right:4px"></span>';
24516
+ }
24517
+ var kind = v.autoApply && v.autoApply.kind ? v.autoApply.kind : 'hand-edit';
24518
+ var when = v.recordedAt ? new Date(v.recordedAt).toLocaleString() : '—';
24519
+ var fileLabel = v.autoApply && v.autoApply.file ? v.autoApply.file.split('/').slice(-3).join('/') : '';
24520
+ return '<div style="padding:12px;border-bottom:1px solid var(--border)">' +
24521
+ '<div style="display:flex;justify-content:space-between;align-items:baseline;gap:8px;flex-wrap:wrap">' +
24522
+ '<div><strong>' + esc(v.jobName || '—') + '</strong> ' +
24523
+ '<span style="font-size:11px;color:var(--text-muted)">&middot; ' + esc(kind) + '</span></div>' +
24524
+ '<div style="font-size:11px;color:var(--text-muted)">' + esc(when) + '</div>' +
24525
+ '</div>' +
24526
+ '<div style="margin-top:8px;display:flex;align-items:center;gap:10px;font-size:12px;color:var(--text-secondary)">' +
24527
+ '<span>' + dots + '</span>' +
24528
+ '<span>' + outcomes.length + ' / 3 runs sampled</span>' +
24529
+ (fileLabel ? '<span style="font-family:ui-monospace,monospace;color:var(--text-muted)">' + esc(fileLabel) + '</span>' : '') +
24530
+ '</div>' +
24531
+ '</div>';
24532
+ }).join('');
24533
+ }
24534
+ }
24418
24535
 
24419
24536
  // Status cards
24420
24537
  const cards = document.getElementById('si-status-cards');
@@ -26,9 +26,13 @@ interface PendingVerification {
26
26
  /**
27
27
  * Tracks an autoApply that's currently being verified. When the verdict
28
28
  * window closes negatively, revertFix() uses these fields to undo.
29
+ *
30
+ * - `advisor-rule` and `prompt-override` revert by deleting the written file.
31
+ * - `cron-config` reverts by re-applying the captured `prevFields` to the
32
+ * named job inside CRON.md (deleting CRON.md would be catastrophic).
29
33
  */
30
34
  export interface AutoApplyTracker {
31
- kind: 'advisor-rule' | 'prompt-override';
35
+ kind: 'advisor-rule' | 'prompt-override' | 'cron-config';
32
36
  /** Absolute path of the file the apply wrote. */
33
37
  file: string;
34
38
  /** advisor-rule only: the rule's id, used by the loader's hot-reload. */
@@ -36,6 +40,11 @@ export interface AutoApplyTracker {
36
40
  /** prompt-override only: scope label for the verdict message. */
37
41
  scope?: 'global' | 'agent' | 'job';
38
42
  scopeKey?: string;
43
+ /** cron-config only: bare job name as written in the CRON.md frontmatter. */
44
+ bareName?: string;
45
+ /** cron-config only: original values for the fields that were mutated.
46
+ * Use null for "field was absent (delete on revert)". */
47
+ prevFields?: Record<string, unknown>;
39
48
  }
40
49
  /**
41
50
  * Compare an old and new jobs list and record verifications for any job that:
@@ -141,11 +141,19 @@ export function recordAutoApplyForVerification(jobName, tracker) {
141
141
  logger.info({ job: jobName, kind: tracker.kind, file: tracker.file }, 'Recorded autoApply for verification — will track next runs');
142
142
  }
143
143
  /**
144
- * Undo an autoApply by deleting the file the apply wrote. Best-effort:
145
- * a missing file is not an error (might have been hand-deleted). Returns
146
- * true if a file was actually removed.
144
+ * Undo an autoApply. Dispatches on `tracker.kind`:
145
+ *
146
+ * - advisor-rule / prompt-override: delete the file the apply wrote.
147
+ * - cron-config: re-apply the captured `prevFields` to the named job
148
+ * in CRON.md (never delete CRON.md).
149
+ *
150
+ * Best-effort throughout: a missing file or vanished job is not an error.
151
+ * Returns true if a meaningful change was made.
147
152
  */
148
153
  function revertAutoApply(tracker) {
154
+ if (tracker.kind === 'cron-config') {
155
+ return revertCronConfig(tracker);
156
+ }
149
157
  try {
150
158
  if (existsSync(tracker.file)) {
151
159
  // Use unlinkSync from fs — kept dynamic to avoid a top-of-file import
@@ -161,6 +169,55 @@ function revertAutoApply(tracker) {
161
169
  }
162
170
  return false;
163
171
  }
172
+ /**
173
+ * Restore the previous values of the fields the cron-config autoApply mutated.
174
+ * A `null` in `prevFields` means the field was absent before the fix and
175
+ * should be deleted on revert.
176
+ */
177
+ function revertCronConfig(tracker) {
178
+ if (!tracker.bareName || !tracker.prevFields) {
179
+ logger.warn({ tracker }, 'cron-config revert missing bareName/prevFields — skipping');
180
+ return false;
181
+ }
182
+ try {
183
+ if (!existsSync(tracker.file)) {
184
+ logger.warn({ file: tracker.file }, 'cron-config revert: file missing — skipping');
185
+ return false;
186
+ }
187
+ const { readFileSync, writeFileSync } = require('node:fs');
188
+ const matter = require('gray-matter');
189
+ const raw = readFileSync(tracker.file, 'utf-8');
190
+ const parsed = matter(raw);
191
+ const jobs = (parsed.data.jobs ?? []);
192
+ const job = jobs.find((j) => String(j.name ?? '') === tracker.bareName);
193
+ if (!job) {
194
+ logger.warn({ file: tracker.file, bareName: tracker.bareName }, 'cron-config revert: job not found — already removed/renamed');
195
+ return false;
196
+ }
197
+ let mutated = false;
198
+ for (const [key, prev] of Object.entries(tracker.prevFields)) {
199
+ if (prev === null || prev === undefined) {
200
+ if (key in job) {
201
+ delete job[key];
202
+ mutated = true;
203
+ }
204
+ }
205
+ else if (job[key] !== prev) {
206
+ job[key] = prev;
207
+ mutated = true;
208
+ }
209
+ }
210
+ if (!mutated)
211
+ return false;
212
+ writeFileSync(tracker.file, matter.stringify(parsed.content, parsed.data));
213
+ logger.warn({ file: tracker.file, bareName: tracker.bareName }, 'Reverted cron-config autoApply — fix did not help');
214
+ return true;
215
+ }
216
+ catch (err) {
217
+ logger.warn({ err, file: tracker.file }, 'Failed to revert cron-config autoApply');
218
+ return false;
219
+ }
220
+ }
164
221
  /**
165
222
  * After a cron run completes, check whether we were waiting on a fix
166
223
  * verification for this job. Two flows:
@@ -34,8 +34,14 @@ export declare function maybeVacuum(store: any): {
34
34
  */
35
35
  export declare function runStartupMaintenance(store: any): Promise<void>;
36
36
  /**
37
- * Start periodic maintenance on a 6-hour interval.
38
- * Returns the interval handle for cleanup on shutdown.
37
+ * Run one full periodic-maintenance cycle. Exported so tests can drive it
38
+ * without waiting on setInterval. `startPeriodicMaintenance` schedules
39
+ * this on the 6h cadence.
40
+ */
41
+ export declare function runPeriodicCycle(store: any, llmCall?: (prompt: string) => Promise<string>): Promise<void>;
42
+ /**
43
+ * Start periodic maintenance on a 6-hour interval. Returns the interval
44
+ * handle for cleanup on shutdown.
39
45
  */
40
46
  export declare function startPeriodicMaintenance(store: any, llmCall?: (prompt: string) => Promise<string>): ReturnType<typeof setInterval>;
41
47
  //# sourceMappingURL=maintenance.d.ts.map
@@ -14,6 +14,16 @@ import { runIntegrityProbes } from './integrity.js';
14
14
  const logger = pino({ name: 'clementine.maintenance' });
15
15
  const PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6 hours
16
16
  const VACUUM_META_KEY = 'last_vacuum_at';
17
+ /**
18
+ * Number of chunks to dense-embed per periodic cycle. With 4 cycles/day
19
+ * that's 400 chunks/day — fast enough to cover a 3,500-chunk vault in
20
+ * ~9 days, slow enough that the GPU/CPU load barely registers. Override
21
+ * via env for power users with very large vaults.
22
+ */
23
+ const PERIODIC_DENSE_BATCH = (() => {
24
+ const raw = parseInt(process.env.CLEMENTINE_DENSE_BATCH ?? '', 10);
25
+ return Number.isFinite(raw) && raw > 0 ? raw : 100;
26
+ })();
17
27
  /**
18
28
  * Janitor pass — keeps the store bounded. Safe to call repeatedly.
19
29
  * Idempotent within a single run; surfaces totals for logging.
@@ -155,104 +165,128 @@ export async function runStartupMaintenance(store) {
155
165
  logger.info({ durationMs: Date.now() - start }, 'Startup maintenance complete');
156
166
  }
157
167
  /**
158
- * Start periodic maintenance on a 6-hour interval.
159
- * Returns the interval handle for cleanup on shutdown.
168
+ * Run one full periodic-maintenance cycle. Exported so tests can drive it
169
+ * without waiting on setInterval. `startPeriodicMaintenance` schedules
170
+ * this on the 6h cadence.
160
171
  */
161
- export function startPeriodicMaintenance(store, llmCall) {
162
- const runCycle = async () => {
163
- const start = Date.now();
164
- logger.info('Starting periodic memory maintenance');
165
- // 1. Decay + prune
172
+ export async function runPeriodicCycle(store, llmCall) {
173
+ const start = Date.now();
174
+ logger.info('Starting periodic memory maintenance');
175
+ // 1. Decay + prune
176
+ try {
177
+ store.decaySalience?.();
178
+ }
179
+ catch (err) {
180
+ logger.warn({ err }, 'Periodic decay failed');
181
+ }
182
+ try {
183
+ store.pruneStaleData?.();
184
+ }
185
+ catch (err) {
186
+ logger.warn({ err }, 'Periodic prune failed');
187
+ }
188
+ // 2. Rebuild vocab + backfill embeddings
189
+ try {
190
+ store.buildEmbeddings?.();
191
+ }
192
+ catch (err) {
193
+ logger.warn({ err }, 'Periodic embedding build failed');
194
+ }
195
+ // 2b. Idle dense-embedding backfill — process up to PERIODIC_DENSE_BATCH
196
+ // chunks per cycle so coverage drifts toward 100% without anyone running
197
+ // the CLI. The first time the dense model loads inside this process it
198
+ // pulls ~440MB; subsequent cycles reuse the loaded model. Failures
199
+ // (network, missing model dir, etc.) fall through silently because the
200
+ // backfill is best-effort — query-time still has TF-IDF as fallback.
201
+ if (typeof store.backfillDenseEmbeddings === 'function') {
166
202
  try {
167
- store.decaySalience?.();
203
+ const result = await store.backfillDenseEmbeddings({ limit: PERIODIC_DENSE_BATCH });
204
+ if (result.embedded > 0) {
205
+ logger.info(result, 'Periodic dense embedding backfill');
206
+ }
168
207
  }
169
208
  catch (err) {
170
- logger.warn({ err }, 'Periodic decay failed');
209
+ logger.warn({ err }, 'Periodic dense embedding backfill failed');
171
210
  }
211
+ }
212
+ // 3. Consolidation (dedup, summarize, extract principles)
213
+ if (llmCall) {
172
214
  try {
173
- store.pruneStaleData?.();
215
+ const { runConsolidation } = await import('./consolidation.js');
216
+ const result = await runConsolidation(store, llmCall);
217
+ logger.info(result, 'Consolidation cycle complete');
174
218
  }
175
219
  catch (err) {
176
- logger.warn({ err }, 'Periodic prune failed');
220
+ logger.warn({ err }, 'Consolidation failed');
177
221
  }
178
- // 2. Rebuild vocab + backfill embeddings
222
+ // 4. Re-backfill embeddings for any new summary chunks from consolidation
179
223
  try {
180
224
  store.buildEmbeddings?.();
181
225
  }
182
226
  catch (err) {
183
- logger.warn({ err }, 'Periodic embedding build failed');
184
- }
185
- // 3. Consolidation (dedup, summarize, extract principles)
186
- if (llmCall) {
187
- try {
188
- const { runConsolidation } = await import('./consolidation.js');
189
- const result = await runConsolidation(store, llmCall);
190
- logger.info(result, 'Consolidation cycle complete');
191
- }
192
- catch (err) {
193
- logger.warn({ err }, 'Consolidation failed');
194
- }
195
- // 4. Re-backfill embeddings for any new summary chunks from consolidation
196
- try {
197
- store.buildEmbeddings?.();
198
- }
199
- catch (err) {
200
- logger.warn({ err }, 'Post-consolidation embedding build failed');
201
- }
227
+ logger.warn({ err }, 'Post-consolidation embedding build failed');
202
228
  }
203
- // 5. Extraction log pruning (legacy 90-day rule retained alongside cap)
204
- try {
205
- const conn = store.conn;
206
- if (conn) {
207
- conn.prepare(`DELETE FROM memory_extractions
229
+ }
230
+ // 5. Extraction log pruning (legacy 90-day rule retained alongside cap)
231
+ try {
232
+ const conn = store.conn;
233
+ if (conn) {
234
+ conn.prepare(`DELETE FROM memory_extractions
208
235
  WHERE extracted_at < datetime('now', '-90 days')
209
236
  AND status != 'active'`).run();
210
- }
211
237
  }
212
- catch { /* non-fatal */ }
213
- // 6. Janitor bounded growth.
214
- try {
215
- const result = runJanitor(store);
216
- if (result.softDeleted || result.physicallyDeleted || result.outcomesPruned || result.extractionsCapped) {
217
- logger.info(result, 'Janitor pass complete');
218
- }
219
- }
220
- catch (err) {
221
- logger.warn({ err }, 'Periodic janitor failed');
238
+ }
239
+ catch { /* non-fatal */ }
240
+ // 6. Janitor — bounded growth.
241
+ try {
242
+ const result = runJanitor(store);
243
+ if (result.softDeleted || result.physicallyDeleted || result.outcomesPruned || result.extractionsCapped) {
244
+ logger.info(result, 'Janitor pass complete');
222
245
  }
223
- // 6b. Integrity probes — FTS health, orphan derived_from, embedding gaps.
246
+ }
247
+ catch (err) {
248
+ logger.warn({ err }, 'Periodic janitor failed');
249
+ }
250
+ // 6b. Integrity probes — FTS health, orphan derived_from, embedding gaps.
251
+ try {
252
+ const report = runIntegrityProbes(store);
253
+ // Persist for the dashboard so the "last integrity check" surface
254
+ // doesn't depend on log scraping.
224
255
  try {
225
- const report = runIntegrityProbes(store);
226
- // Persist for the dashboard so the "last integrity check" surface
227
- // doesn't depend on log scraping.
228
- try {
229
- store.setMaintenanceMeta?.('last_integrity_report', JSON.stringify({ ...report, ranAt: new Date().toISOString() }));
230
- }
231
- catch { /* meta write is best-effort */ }
232
- if (!report.ftsOk || report.ftsRebuilt || report.orphanRefsNulled > 0 || report.missingEmbeddings > 0) {
233
- logger.info(report, 'Integrity probes complete');
234
- }
256
+ store.setMaintenanceMeta?.('last_integrity_report', JSON.stringify({ ...report, ranAt: new Date().toISOString() }));
235
257
  }
236
- catch (err) {
237
- logger.warn({ err }, 'Integrity probes failed');
238
- }
239
- // 7. VACUUM — idle-gated, at most once per vacuumIntervalDays.
240
- try {
241
- const vac = maybeVacuum(store);
242
- if (vac) {
243
- logger.info({
244
- sizeBeforeBytes: vac.sizeBeforeBytes,
245
- sizeAfterBytes: vac.sizeAfterBytes,
246
- reclaimedBytes: vac.sizeBeforeBytes - vac.sizeAfterBytes,
247
- durationMs: vac.durationMs,
248
- }, 'VACUUM complete');
249
- }
258
+ catch { /* meta write is best-effort */ }
259
+ if (!report.ftsOk || report.ftsRebuilt || report.orphanRefsNulled > 0 || report.missingEmbeddings > 0) {
260
+ logger.info(report, 'Integrity probes complete');
250
261
  }
251
- catch (err) {
252
- logger.warn({ err }, 'Periodic VACUUM failed');
262
+ }
263
+ catch (err) {
264
+ logger.warn({ err }, 'Integrity probes failed');
265
+ }
266
+ // 7. VACUUM — idle-gated, at most once per vacuumIntervalDays.
267
+ try {
268
+ const vac = maybeVacuum(store);
269
+ if (vac) {
270
+ logger.info({
271
+ sizeBeforeBytes: vac.sizeBeforeBytes,
272
+ sizeAfterBytes: vac.sizeAfterBytes,
273
+ reclaimedBytes: vac.sizeBeforeBytes - vac.sizeAfterBytes,
274
+ durationMs: vac.durationMs,
275
+ }, 'VACUUM complete');
253
276
  }
254
- logger.info({ durationMs: Date.now() - start }, 'Periodic maintenance complete');
255
- };
256
- return setInterval(runCycle, PERIODIC_INTERVAL_MS);
277
+ }
278
+ catch (err) {
279
+ logger.warn({ err }, 'Periodic VACUUM failed');
280
+ }
281
+ logger.info({ durationMs: Date.now() - start }, 'Periodic maintenance complete');
282
+ }
283
+ /**
284
+ * Start periodic maintenance on a 6-hour interval. Returns the interval
285
+ * handle for cleanup on shutdown.
286
+ */
287
+ export function startPeriodicMaintenance(store, llmCall) {
288
+ return setInterval(() => {
289
+ runPeriodicCycle(store, llmCall).catch(err => logger.warn({ err }, 'Periodic maintenance cycle threw — continuing'));
290
+ }, PERIODIC_INTERVAL_MS);
257
291
  }
258
292
  //# sourceMappingURL=maintenance.js.map
@@ -1040,6 +1040,31 @@ export declare class MemoryStore {
1040
1040
  * skill retrieved in that session. Window: last 60 days.
1041
1041
  */
1042
1042
  getSkillsToSuppress(agentSlug?: string): Set<string>;
1043
+ /**
1044
+ * Get a compact "recent feedback signal" snapshot for prompt injection.
1045
+ * Closes the feedback → behavior loop: the agent sees the last week's
1046
+ * negative pattern in its system prompt instead of feedback being
1047
+ * write-only.
1048
+ *
1049
+ * - `negative` / `positive`: counts in the window
1050
+ * - `negativesWithComments`: up to `limit` most recent negatives that
1051
+ * carry a non-empty comment (these are the actionable ones — silent
1052
+ * 👎 reactions don't tell the agent what to fix)
1053
+ * - `behavioralChannel` is excluded because behavioral-corrections are
1054
+ * already pushed to hotCorrections directly
1055
+ */
1056
+ getRecentFeedbackSignals(opts?: {
1057
+ days?: number;
1058
+ limit?: number;
1059
+ }): {
1060
+ negative: number;
1061
+ positive: number;
1062
+ negativesWithComments: Array<{
1063
+ comment: string;
1064
+ channel: string;
1065
+ createdAt: string;
1066
+ }>;
1067
+ };
1043
1068
  /**
1044
1069
  * Get aggregate feedback statistics.
1045
1070
  */
@@ -3995,6 +3995,62 @@ export class MemoryStore {
3995
3995
  }
3996
3996
  return suppressed;
3997
3997
  }
3998
+ /**
3999
+ * Get a compact "recent feedback signal" snapshot for prompt injection.
4000
+ * Closes the feedback → behavior loop: the agent sees the last week's
4001
+ * negative pattern in its system prompt instead of feedback being
4002
+ * write-only.
4003
+ *
4004
+ * - `negative` / `positive`: counts in the window
4005
+ * - `negativesWithComments`: up to `limit` most recent negatives that
4006
+ * carry a non-empty comment (these are the actionable ones — silent
4007
+ * 👎 reactions don't tell the agent what to fix)
4008
+ * - `behavioralChannel` is excluded because behavioral-corrections are
4009
+ * already pushed to hotCorrections directly
4010
+ */
4011
+ getRecentFeedbackSignals(opts = {}) {
4012
+ const days = Math.max(1, opts.days ?? 14);
4013
+ const limit = Math.max(1, Math.min(opts.limit ?? 3, 10));
4014
+ const since = `datetime('now', '-${days} days')`;
4015
+ let negative = 0;
4016
+ let positive = 0;
4017
+ let negativesWithComments = [];
4018
+ try {
4019
+ const rows = this.conn
4020
+ .prepare(`SELECT rating, COUNT(*) as cnt FROM feedback
4021
+ WHERE created_at >= ${since}
4022
+ AND channel != 'behavioral-correction'
4023
+ AND channel != 'preference-learned'
4024
+ GROUP BY rating`)
4025
+ .all();
4026
+ for (const row of rows) {
4027
+ if (row.rating === 'negative')
4028
+ negative = row.cnt;
4029
+ else if (row.rating === 'positive')
4030
+ positive = row.cnt;
4031
+ }
4032
+ const commented = this.conn
4033
+ .prepare(`SELECT comment, channel, created_at
4034
+ FROM feedback
4035
+ WHERE rating = 'negative'
4036
+ AND comment IS NOT NULL
4037
+ AND TRIM(comment) != ''
4038
+ AND channel != 'behavioral-correction'
4039
+ AND created_at >= ${since}
4040
+ ORDER BY created_at DESC, id DESC
4041
+ LIMIT ?`)
4042
+ .all(limit);
4043
+ negativesWithComments = commented.map((r) => ({
4044
+ comment: r.comment,
4045
+ channel: r.channel,
4046
+ createdAt: r.created_at,
4047
+ }));
4048
+ }
4049
+ catch {
4050
+ // Empty / legacy schema — return zeros
4051
+ }
4052
+ return { negative, positive, negativesWithComments };
4053
+ }
3998
4054
  /**
3999
4055
  * Get aggregate feedback statistics.
4000
4056
  */
@@ -1873,5 +1873,13 @@ export function registerAdminTools(server) {
1873
1873
  logger.info({ jobName: job_name, runCount: updated.runCount }, 'Cron progress saved');
1874
1874
  return textResult(`Progress saved for "${job_name}" (run #${updated.runCount}). ${(completedItems?.length ?? 0)} items completed, ${(updated.pendingItems?.length ?? 0)} pending.`);
1875
1875
  });
1876
+ // ── Browser harness — chat-driven Chrome connect ────────────────────
1877
+ server.tool('browser_connect', 'Connect Chrome to the browser harness via CDP. Idempotent — if Chrome is already running with remote debugging on :9222 this is a no-op. If no Chrome is running, launches Chrome with --remote-debugging-port=9222. If Chrome is running normally without the flag, refuses unless force_quit=true (which closes the user\'s open tabs). Use this so the user can connect from any chat channel without dropping to the terminal.', {
1878
+ force_quit: z.boolean().optional().describe('If true, quit any running Chrome before relaunching with the debug flag. DESTRUCTIVE — closes the user\'s open tabs. Only set after the user has explicitly confirmed they want this. Defaults to false.'),
1879
+ }, async ({ force_quit }) => {
1880
+ const { runConnectNonInteractive } = await import('../cli/browser.js');
1881
+ const result = await runConnectNonInteractive({ allowQuitChrome: !!force_quit });
1882
+ return textResult(result.message);
1883
+ });
1876
1884
  }
1877
1885
  //# sourceMappingURL=admin-tools.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.8.1",
3
+ "version": "1.9.0",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",