clementine-agent 1.8.2 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +55 -1
- package/dist/agent/self-improve-loop.d.ts +26 -0
- package/dist/agent/self-improve-loop.js +135 -25
- package/dist/cli/dashboard.js +137 -7
- package/dist/gateway/cron-scheduler.js +10 -2
- package/dist/gateway/fix-verification.d.ts +10 -1
- package/dist/gateway/fix-verification.js +60 -3
- package/dist/memory/maintenance.d.ts +8 -2
- package/dist/memory/maintenance.js +111 -77
- package/dist/memory/store.d.ts +25 -0
- package/dist/memory/store.js +56 -0
- package/package.json +1 -1
package/dist/agent/assistant.js
CHANGED
|
@@ -379,13 +379,20 @@ const AUTO_MEMORY_PROMPT = `You are a memory extraction agent. Your ONLY job is
|
|
|
379
379
|
|
|
380
380
|
{current_memory}
|
|
381
381
|
|
|
382
|
+
## Current User Model (already known — DO NOT re-extract these)
|
|
383
|
+
|
|
384
|
+
{current_user_model}
|
|
385
|
+
|
|
382
386
|
## Where to save what (memory routing):
|
|
383
387
|
|
|
384
388
|
**Always-in-context core memory** (use the user_model tool — these stay top-of-mind in every future session):
|
|
385
389
|
- **Lasting facts about ${OWNER}** (role, location, identifiers, durable preferences, communication style) → user_model(action="append", slot="user_facts", content=...)
|
|
386
390
|
- **Active goals/intents** (what ${OWNER} is trying to accomplish right now) → user_model(action="append", slot="goals", content=...)
|
|
387
391
|
- **Key people/projects** (recurring relationships) → user_model(action="append", slot="relationships", content=...)
|
|
388
|
-
-
|
|
392
|
+
- **DEFAULT to action="append"** — it adds the new fact alongside what's already there.
|
|
393
|
+
- Only use action="replace" when CORRECTING an existing fact, and you MUST include the FULL slot content (everything from "Current User Model" above, with the correction applied). \`replace\` overwrites the entire slot — passing only the new fact wipes everything else.
|
|
394
|
+
- Never use action="clear" from this extractor. Clearing is a deliberate user action, not a memory-extraction outcome.
|
|
395
|
+
- Slots are capped at 2000 chars — older content rolls off on append automatically.
|
|
389
396
|
|
|
390
397
|
**Vault notes** (use memory_write/note_create — durable but retrieved on demand):
|
|
391
398
|
- **People mentioned** — names, relationships, context → create or update person notes in 02-People/
|
|
@@ -1340,6 +1347,32 @@ Never spawn a sub-agent with vague instructions like "handle this brief."
|
|
|
1340
1347
|
parts.push(`## Recent Corrections (apply immediately)\n\n${lines.join('\n')}`);
|
|
1341
1348
|
}
|
|
1342
1349
|
}
|
|
1350
|
+
// Inject recent feedback signals (closes the feedback → behavior loop).
|
|
1351
|
+
// Without this block, user thumbs-down + comments live in the feedback
|
|
1352
|
+
// table and never reach the agent's awareness — only the skill-suppress
|
|
1353
|
+
// filter consumed them. We surface aggregates + the last few commented
|
|
1354
|
+
// negatives so the agent can self-adjust on the next turn. Skipped when
|
|
1355
|
+
// there's nothing to report (no noise).
|
|
1356
|
+
if (this.memoryStore?.getRecentFeedbackSignals) {
|
|
1357
|
+
try {
|
|
1358
|
+
const sig = this.memoryStore.getRecentFeedbackSignals({ days: 14, limit: 3 });
|
|
1359
|
+
if (sig.negative > 0) {
|
|
1360
|
+
const lines = [];
|
|
1361
|
+
const total = sig.positive + sig.negative;
|
|
1362
|
+
const ratio = total > 0 ? Math.round((sig.negative / total) * 100) : 0;
|
|
1363
|
+
lines.push(`Last 14 days: ${sig.negative} negative / ${sig.positive} positive (${ratio}% negative).`);
|
|
1364
|
+
if (sig.negativesWithComments.length > 0) {
|
|
1365
|
+
lines.push('Recent negative comments — adjust accordingly:');
|
|
1366
|
+
for (const n of sig.negativesWithComments) {
|
|
1367
|
+
const comment = n.comment.length > 200 ? n.comment.slice(0, 200) + '…' : n.comment;
|
|
1368
|
+
lines.push(`- (${n.channel}) ${comment}`);
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
parts.push(`## Recent feedback signals\n\n${lines.join('\n')}`);
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
catch { /* non-fatal */ }
|
|
1375
|
+
}
|
|
1343
1376
|
// Proactive skill injection: match user message against skill triggers
|
|
1344
1377
|
if (this._lastUserMessage && !isAutonomous) {
|
|
1345
1378
|
try {
|
|
@@ -1612,6 +1645,7 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
1612
1645
|
'Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep',
|
|
1613
1646
|
'WebSearch', 'WebFetch',
|
|
1614
1647
|
mcpTool('working_memory'),
|
|
1648
|
+
mcpTool('user_model'),
|
|
1615
1649
|
mcpTool('memory_read'),
|
|
1616
1650
|
mcpTool('memory_write'),
|
|
1617
1651
|
mcpTool('memory_search'),
|
|
@@ -3657,10 +3691,23 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3657
3691
|
// Non-fatal — proceed without corrections
|
|
3658
3692
|
}
|
|
3659
3693
|
}
|
|
3694
|
+
// Render current user_model state so the extractor can: (a) skip
|
|
3695
|
+
// re-extracting facts already there, (b) safely use action="replace"
|
|
3696
|
+
// by passing the full slot content with a correction applied. Scoped
|
|
3697
|
+
// to the active agent — Clementine sees global slots, hired agents
|
|
3698
|
+
// see their own per-agent slots.
|
|
3699
|
+
let currentUserModel = '(empty — no slots populated yet)';
|
|
3700
|
+
try {
|
|
3701
|
+
const rendered = this.memoryStore?.renderUserModel?.(profile?.slug ?? null);
|
|
3702
|
+
if (rendered && rendered.trim())
|
|
3703
|
+
currentUserModel = rendered;
|
|
3704
|
+
}
|
|
3705
|
+
catch { /* non-fatal */ }
|
|
3660
3706
|
const memPrompt = AUTO_MEMORY_PROMPT
|
|
3661
3707
|
.replace('{user_message}', userMessage)
|
|
3662
3708
|
.replace('{assistant_response}', truncatedResponse)
|
|
3663
3709
|
.replace('{current_memory}', currentMemory || '(empty — no existing memory yet)')
|
|
3710
|
+
.replace('{current_user_model}', currentUserModel)
|
|
3664
3711
|
.replace('{recent_corrections}', correctionsText);
|
|
3665
3712
|
const userMessageSnippet = userMessage.slice(0, 500);
|
|
3666
3713
|
const stream = query({
|
|
@@ -3680,6 +3727,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3680
3727
|
mcpTool('task_add'),
|
|
3681
3728
|
mcpTool('note_take'),
|
|
3682
3729
|
mcpTool('memory_read'),
|
|
3730
|
+
// Auto-extractor needs user_model to populate the always-in-context
|
|
3731
|
+
// core slots (user_facts, goals, relationships, agent_persona).
|
|
3732
|
+
// The MCP server boots with CLEMENTINE_TEAM_AGENT=<slug>, so writes
|
|
3733
|
+
// are scoped to the active agent automatically — Clementine's
|
|
3734
|
+
// sessions populate global slots, hired-agent sessions populate
|
|
3735
|
+
// that agent's per-agent slots.
|
|
3736
|
+
mcpTool('user_model'),
|
|
3683
3737
|
],
|
|
3684
3738
|
mcpServers: {
|
|
3685
3739
|
[TOOLS_SERVER]: {
|
|
@@ -26,6 +26,19 @@
|
|
|
26
26
|
*/
|
|
27
27
|
export interface TriggerFile {
|
|
28
28
|
jobName: string;
|
|
29
|
+
/**
|
|
30
|
+
* Bare job name (without `{agentSlug}:` prefix). Set by cron-scheduler
|
|
31
|
+
* for agent-scoped jobs so the loop can look the job up in
|
|
32
|
+
* agents/{agentSlug}/CRON.md. Optional for backward compat with
|
|
33
|
+
* triggers written before this field existed.
|
|
34
|
+
*/
|
|
35
|
+
bareName?: string;
|
|
36
|
+
/**
|
|
37
|
+
* Owning agent slug, set by cron-scheduler. When present, the loop
|
|
38
|
+
* applies fixes to vault/00-System/agents/{agentSlug}/CRON.md instead
|
|
39
|
+
* of the central CRON.md. Falls back to scanning if absent (older triggers).
|
|
40
|
+
*/
|
|
41
|
+
agentSlug?: string;
|
|
29
42
|
consecutiveErrors: number;
|
|
30
43
|
recentErrors: string[];
|
|
31
44
|
triggeredAt: string;
|
|
@@ -35,6 +48,13 @@ export interface FixRecipe {
|
|
|
35
48
|
category: FixCategory;
|
|
36
49
|
/** Description of what this fix does, for DMs. */
|
|
37
50
|
description: string;
|
|
51
|
+
/**
|
|
52
|
+
* Frontmatter keys this recipe may touch. Used to snapshot prior values
|
|
53
|
+
* before apply() runs so an ineffective fix can be reverted by post-fix
|
|
54
|
+
* verification without restoring fields the recipe never owned. Required
|
|
55
|
+
* for safe-cron-config recipes that participate in autoApply verification.
|
|
56
|
+
*/
|
|
57
|
+
fields?: readonly string[];
|
|
38
58
|
/**
|
|
39
59
|
* For safe-cron-config: a function that mutates the job's frontmatter
|
|
40
60
|
* entry in-place. Returns true if any change was made (false = idempotent
|
|
@@ -61,6 +81,11 @@ export interface SelfImproveLoopOptions {
|
|
|
61
81
|
triggersDir?: string;
|
|
62
82
|
pendingDir?: string;
|
|
63
83
|
cronPath?: string;
|
|
84
|
+
/**
|
|
85
|
+
* Override the agents root (vault/00-System/agents). When a trigger
|
|
86
|
+
* has agentSlug, the loop reads/writes `${agentsDir}/${agentSlug}/CRON.md`.
|
|
87
|
+
*/
|
|
88
|
+
agentsDir?: string;
|
|
64
89
|
/**
|
|
65
90
|
* Disable the fs.watch event-driven path. Tests use this so they can
|
|
66
91
|
* call tick() directly without racing the watcher.
|
|
@@ -73,6 +98,7 @@ export declare class SelfImproveLoop {
|
|
|
73
98
|
private readonly triggersDir;
|
|
74
99
|
private readonly pendingDir;
|
|
75
100
|
private readonly cronPath;
|
|
101
|
+
private readonly agentsDir;
|
|
76
102
|
private readonly dispatcher;
|
|
77
103
|
private readonly watchEnabled;
|
|
78
104
|
private timer;
|
|
@@ -28,7 +28,7 @@ import { existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, watch, wr
|
|
|
28
28
|
import path from 'node:path';
|
|
29
29
|
import matter from 'gray-matter';
|
|
30
30
|
import pino from 'pino';
|
|
31
|
-
import { BASE_DIR, SYSTEM_DIR } from '../config.js';
|
|
31
|
+
import { AGENTS_DIR, BASE_DIR, SYSTEM_DIR } from '../config.js';
|
|
32
32
|
const logger = pino({ name: 'clementine.self-improve-loop' });
|
|
33
33
|
/**
|
|
34
34
|
* Fallback tick interval. The loop is primarily event-driven via fs.watch
|
|
@@ -46,6 +46,7 @@ const WATCH_DEBOUNCE_MS = 2000;
|
|
|
46
46
|
const TRIGGERS_DIR = path.join(BASE_DIR, 'self-improve', 'triggers');
|
|
47
47
|
const PENDING_CHANGES_DIR = path.join(BASE_DIR, 'self-improve', 'pending-changes');
|
|
48
48
|
const CRON_PATH = path.join(SYSTEM_DIR, 'CRON.md');
|
|
49
|
+
const AGENTS_ROOT = AGENTS_DIR;
|
|
49
50
|
// ── Pattern recognition ──────────────────────────────────────────────
|
|
50
51
|
const PATTERNS = [
|
|
51
52
|
{
|
|
@@ -54,6 +55,7 @@ const PATTERNS = [
|
|
|
54
55
|
recipe: () => ({
|
|
55
56
|
category: 'safe-cron-config',
|
|
56
57
|
description: 'Hit max-turns ceiling repeatedly. Switching to unleashed mode (multi-phase) so the job can complete its workflow.',
|
|
58
|
+
fields: ['mode', 'max_hours'],
|
|
57
59
|
apply: (job) => {
|
|
58
60
|
let changed = false;
|
|
59
61
|
if (job.mode !== 'unleashed') {
|
|
@@ -108,37 +110,118 @@ export function classifyFailure(recentErrors) {
|
|
|
108
110
|
description: 'Unrecognized failure pattern. Owner needs to inspect the trigger file.',
|
|
109
111
|
};
|
|
110
112
|
}
|
|
111
|
-
function
|
|
113
|
+
function readJobsFromFile(cronPath) {
|
|
112
114
|
if (!existsSync(cronPath))
|
|
113
115
|
return null;
|
|
114
116
|
const raw = readFileSync(cronPath, 'utf-8');
|
|
115
117
|
const parsed = matter(raw);
|
|
116
118
|
const jobs = (parsed.data.jobs ?? []);
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
119
|
+
return { raw, parsed, jobs };
|
|
120
|
+
}
|
|
121
|
+
function readAgentSlug(job) {
|
|
122
|
+
if (typeof job.agentSlug === 'string')
|
|
123
|
+
return job.agentSlug;
|
|
124
|
+
if (typeof job.agent_slug === 'string')
|
|
125
|
+
return job.agent_slug;
|
|
126
|
+
return undefined;
|
|
122
127
|
}
|
|
123
128
|
/**
|
|
124
|
-
*
|
|
125
|
-
*
|
|
129
|
+
* Locate a job's frontmatter entry in either the central CRON.md or an
|
|
130
|
+
* agent-scoped CRON.md. Search priority:
|
|
131
|
+
*
|
|
132
|
+
* 1. If trigger.agentSlug is set, look in agents/{slug}/CRON.md by bareName.
|
|
133
|
+
* 2. Otherwise look in central CRON.md by exact name.
|
|
134
|
+
* 3. Fall back to scanning agents/* for the bareName (covers older triggers
|
|
135
|
+
* that lack agentSlug — the cron-scheduler-prefixed jobName like
|
|
136
|
+
* `slug:name` lets us recover the slug).
|
|
126
137
|
*/
|
|
127
|
-
function
|
|
138
|
+
function loadCronJob(trigger, cronPath, agentsDir) {
|
|
139
|
+
const explicitSlug = trigger.agentSlug;
|
|
140
|
+
const bare = trigger.bareName ?? (explicitSlug && trigger.jobName.startsWith(`${explicitSlug}:`)
|
|
141
|
+
? trigger.jobName.slice(explicitSlug.length + 1)
|
|
142
|
+
: trigger.jobName);
|
|
143
|
+
// 1. Agent-scoped file when slug is known
|
|
144
|
+
if (explicitSlug) {
|
|
145
|
+
const agentCronPath = path.join(agentsDir, explicitSlug, 'CRON.md');
|
|
146
|
+
const file = readJobsFromFile(agentCronPath);
|
|
147
|
+
if (file) {
|
|
148
|
+
const job = file.jobs.find((j) => String(j.name ?? '') === bare);
|
|
149
|
+
if (job) {
|
|
150
|
+
return {
|
|
151
|
+
agentSlug: explicitSlug,
|
|
152
|
+
cronPath: agentCronPath,
|
|
153
|
+
bareName: bare,
|
|
154
|
+
job,
|
|
155
|
+
raw: file.raw,
|
|
156
|
+
parsed: file.parsed,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// 2. Central CRON.md by full jobName (handles globally-defined jobs and
|
|
162
|
+
// legacy jobs tagged with agentSlug field directly in the central file)
|
|
163
|
+
const central = readJobsFromFile(cronPath);
|
|
164
|
+
if (central) {
|
|
165
|
+
const job = central.jobs.find((j) => String(j.name ?? '') === trigger.jobName);
|
|
166
|
+
if (job) {
|
|
167
|
+
return {
|
|
168
|
+
agentSlug: explicitSlug ?? readAgentSlug(job),
|
|
169
|
+
cronPath,
|
|
170
|
+
bareName: String(job.name ?? ''),
|
|
171
|
+
job,
|
|
172
|
+
raw: central.raw,
|
|
173
|
+
parsed: central.parsed,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// 3. Recover via scan: trigger jobName follows `{slug}:{bareName}` for
|
|
178
|
+
// agent-scoped jobs even when older triggers omit agentSlug.
|
|
179
|
+
if (!explicitSlug && trigger.jobName.includes(':')) {
|
|
180
|
+
const [slug, ...rest] = trigger.jobName.split(':');
|
|
181
|
+
const inferredBare = rest.join(':');
|
|
182
|
+
if (slug && inferredBare) {
|
|
183
|
+
const agentCronPath = path.join(agentsDir, slug, 'CRON.md');
|
|
184
|
+
const file = readJobsFromFile(agentCronPath);
|
|
185
|
+
if (file) {
|
|
186
|
+
const job = file.jobs.find((j) => String(j.name ?? '') === inferredBare);
|
|
187
|
+
if (job) {
|
|
188
|
+
return {
|
|
189
|
+
agentSlug: slug,
|
|
190
|
+
cronPath: agentCronPath,
|
|
191
|
+
bareName: inferredBare,
|
|
192
|
+
job,
|
|
193
|
+
raw: file.raw,
|
|
194
|
+
parsed: file.parsed,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Apply the recipe's mutator to the job's frontmatter and write the CRON.md
|
|
204
|
+
* (central or agent-scoped, whichever the lookup resolved to) back atomically.
|
|
205
|
+
* Returns the captured prevFields snapshot when a change was written, or
|
|
206
|
+
* null when no change was needed (idempotent re-apply). prevFields uses
|
|
207
|
+
* `null` to represent "field was absent before the fix" — the revert path
|
|
208
|
+
* deletes the key in that case.
|
|
209
|
+
*/
|
|
210
|
+
function applyCronEdit(lookup, recipe) {
|
|
128
211
|
if (!recipe.apply)
|
|
129
|
-
return
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
212
|
+
return null;
|
|
213
|
+
// Snapshot only the fields the recipe declared it would touch — over-broad
|
|
214
|
+
// snapshots would clobber concurrent edits during a revert.
|
|
215
|
+
const prevFields = {};
|
|
216
|
+
for (const key of recipe.fields ?? []) {
|
|
217
|
+
prevFields[key] = key in lookup.job ? lookup.job[key] : null;
|
|
134
218
|
}
|
|
135
219
|
const changed = recipe.apply(lookup.job);
|
|
136
220
|
if (!changed)
|
|
137
|
-
return
|
|
138
|
-
// Re-stringify with the existing content body preserved.
|
|
221
|
+
return null;
|
|
139
222
|
const updated = matter.stringify(lookup.parsed.content, lookup.parsed.data);
|
|
140
|
-
writeFileSync(cronPath, updated);
|
|
141
|
-
return
|
|
223
|
+
writeFileSync(lookup.cronPath, updated);
|
|
224
|
+
return prevFields;
|
|
142
225
|
}
|
|
143
226
|
function writePendingChange(record, dir) {
|
|
144
227
|
mkdirSync(dir, { recursive: true });
|
|
@@ -152,6 +235,7 @@ export class SelfImproveLoop {
|
|
|
152
235
|
triggersDir;
|
|
153
236
|
pendingDir;
|
|
154
237
|
cronPath;
|
|
238
|
+
agentsDir;
|
|
155
239
|
dispatcher;
|
|
156
240
|
watchEnabled;
|
|
157
241
|
timer = null;
|
|
@@ -165,6 +249,7 @@ export class SelfImproveLoop {
|
|
|
165
249
|
this.triggersDir = opts.triggersDir ?? TRIGGERS_DIR;
|
|
166
250
|
this.pendingDir = opts.pendingDir ?? PENDING_CHANGES_DIR;
|
|
167
251
|
this.cronPath = opts.cronPath ?? CRON_PATH;
|
|
252
|
+
this.agentsDir = opts.agentsDir ?? AGENTS_ROOT;
|
|
168
253
|
this.watchEnabled = opts.disableWatch !== true;
|
|
169
254
|
}
|
|
170
255
|
start() {
|
|
@@ -286,23 +371,48 @@ export class SelfImproveLoop {
|
|
|
286
371
|
}
|
|
287
372
|
async processOne(trigger, counts) {
|
|
288
373
|
const recipe = classifyFailure(trigger.recentErrors);
|
|
289
|
-
const lookup = loadCronJob(trigger.
|
|
290
|
-
const agentSlug = lookup?.agentSlug;
|
|
374
|
+
const lookup = loadCronJob(trigger, this.cronPath, this.agentsDir);
|
|
375
|
+
const agentSlug = trigger.agentSlug ?? lookup?.agentSlug;
|
|
291
376
|
if (recipe.category === 'safe-cron-config') {
|
|
292
|
-
|
|
293
|
-
|
|
377
|
+
if (!lookup) {
|
|
378
|
+
// Job vanished from CRON files (renamed/deleted). Nothing to fix.
|
|
379
|
+
counts.noop++;
|
|
380
|
+
logger.warn({ jobName: trigger.jobName, agentSlug }, 'Job not found in any CRON.md — cannot apply fix');
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
const prevFields = applyCronEdit(lookup, recipe);
|
|
384
|
+
if (prevFields) {
|
|
294
385
|
counts.applied++;
|
|
386
|
+
// Register the edit for post-fix verification. The verifier watches
|
|
387
|
+
// the next AUTOAPPLY_VERDICT_WINDOW non-skipped runs and reverts
|
|
388
|
+
// prevFields if 0 succeed. Lazy import avoids pulling the gateway
|
|
389
|
+
// graph into the agent layer at module-load time.
|
|
390
|
+
try {
|
|
391
|
+
const { recordAutoApplyForVerification } = await import('../gateway/fix-verification.js');
|
|
392
|
+
recordAutoApplyForVerification(trigger.jobName, {
|
|
393
|
+
kind: 'cron-config',
|
|
394
|
+
file: lookup.cronPath,
|
|
395
|
+
bareName: lookup.bareName,
|
|
396
|
+
prevFields,
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
catch (err) {
|
|
400
|
+
logger.warn({ err, jobName: trigger.jobName }, 'Failed to register cron-config autoApply for verification (non-fatal)');
|
|
401
|
+
}
|
|
402
|
+
const where = lookup.agentSlug
|
|
403
|
+
? `\`agents/${lookup.agentSlug}/CRON.md\``
|
|
404
|
+
: '`CRON.md`';
|
|
295
405
|
await this.notifyAgent(agentSlug, [
|
|
296
406
|
`🔧 **Auto-fixed** \`${trigger.jobName}\` after ${trigger.consecutiveErrors} consecutive failures.`,
|
|
297
407
|
'',
|
|
298
408
|
recipe.description,
|
|
299
409
|
'',
|
|
300
|
-
|
|
410
|
+
`Edit applied to ${where}. Verifying over the next 3 runs — I'll revert automatically if it doesn't help.`,
|
|
301
411
|
].join('\n'));
|
|
302
412
|
}
|
|
303
413
|
else {
|
|
304
414
|
counts.noop++;
|
|
305
|
-
logger.info({ jobName: trigger.jobName }, 'Fix recipe applied is already in place — trigger removed without further action');
|
|
415
|
+
logger.info({ jobName: trigger.jobName, agentSlug }, 'Fix recipe applied is already in place — trigger removed without further action');
|
|
306
416
|
}
|
|
307
417
|
return;
|
|
308
418
|
}
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -6603,6 +6603,15 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
6603
6603
|
const stateFile = path.join(siDir, 'state.json');
|
|
6604
6604
|
const logFile = path.join(siDir, 'experiment-log.jsonl');
|
|
6605
6605
|
const pendingDir = path.join(siDir, 'pending-changes');
|
|
6606
|
+
// Active failure triggers — written by cron-scheduler when a job hits
|
|
6607
|
+
// 3+ consecutive errors; consumed by self-improve-loop on its next tick.
|
|
6608
|
+
// Surfacing them here gives the user a "work in progress" view between
|
|
6609
|
+
// tick boundaries (event-driven debounce + 1h fallback).
|
|
6610
|
+
const triggersDir = path.join(siDir, 'triggers');
|
|
6611
|
+
// Pending fix verifications — auto-applied fixes that are soaking
|
|
6612
|
+
// through the 3-run verdict window (cron-config / advisor-rule /
|
|
6613
|
+
// prompt-override). Reverts automatically if 0 succeed.
|
|
6614
|
+
const verificationsFile = path.join(BASE_DIR, 'cron', 'fix-verifications.json');
|
|
6606
6615
|
let state = null;
|
|
6607
6616
|
if (existsSync(stateFile)) {
|
|
6608
6617
|
try {
|
|
@@ -6632,7 +6641,29 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
6632
6641
|
}
|
|
6633
6642
|
catch { /* ignore */ }
|
|
6634
6643
|
}
|
|
6635
|
-
|
|
6644
|
+
let triggers = [];
|
|
6645
|
+
if (existsSync(triggersDir)) {
|
|
6646
|
+
try {
|
|
6647
|
+
triggers = readdirSync(triggersDir).filter(f => f.endsWith('.json'))
|
|
6648
|
+
.map(f => { try {
|
|
6649
|
+
return JSON.parse(readFileSync(path.join(triggersDir, f), 'utf-8'));
|
|
6650
|
+
}
|
|
6651
|
+
catch {
|
|
6652
|
+
return null;
|
|
6653
|
+
} })
|
|
6654
|
+
.filter(Boolean);
|
|
6655
|
+
}
|
|
6656
|
+
catch { /* ignore */ }
|
|
6657
|
+
}
|
|
6658
|
+
let verifications = [];
|
|
6659
|
+
if (existsSync(verificationsFile)) {
|
|
6660
|
+
try {
|
|
6661
|
+
const raw = JSON.parse(readFileSync(verificationsFile, 'utf-8'));
|
|
6662
|
+
verifications = Object.values(raw.pending ?? {});
|
|
6663
|
+
}
|
|
6664
|
+
catch { /* ignore */ }
|
|
6665
|
+
}
|
|
6666
|
+
res.json({ state, experiments, pending, triggers, verifications });
|
|
6636
6667
|
});
|
|
6637
6668
|
app.post('/api/self-improve/run', async (_req, res) => {
|
|
6638
6669
|
try {
|
|
@@ -12667,8 +12698,8 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
12667
12698
|
|
|
12668
12699
|
<!-- User Model — MemGPT-style core memory blocks always loaded into context -->
|
|
12669
12700
|
<div class="tab-pane" id="tab-intelligence-user-model">
|
|
12670
|
-
<div style="color:var(--muted,#888);margin-bottom:12px;font-size:13px">
|
|
12671
|
-
|
|
12701
|
+
<div style="color:var(--muted,#888);margin-bottom:12px;font-size:13px;max-width:760px">
|
|
12702
|
+
<strong style="color:var(--text)">Always-in-context core memory.</strong> Four small slots (capped at 2000 chars each) that load into <em>every</em> conversation — distinct from MEMORY.md and the chunk store. The agent appends here automatically as you talk; you can also edit directly to correct or steer. Use the Scope dropdown to view per-agent slots (each hired agent maintains their own).
|
|
12672
12703
|
</div>
|
|
12673
12704
|
<div style="display:flex;gap:8px;margin-bottom:12px;align-items:center;flex-wrap:wrap">
|
|
12674
12705
|
<label style="font-size:13px;color:var(--text-secondary)">Scope:</label>
|
|
@@ -12930,14 +12961,34 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
12930
12961
|
</div>
|
|
12931
12962
|
</div>
|
|
12932
12963
|
<div class="tab-pane" id="tab-intelligence-learning">
|
|
12933
|
-
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:12px">
|
|
12934
|
-
<div style="font-size:13px;color:var(--text-secondary)">
|
|
12964
|
+
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:12px;gap:12px;flex-wrap:wrap">
|
|
12965
|
+
<div style="font-size:13px;color:var(--text-secondary);max-width:680px">
|
|
12966
|
+
Self-improvement runs nightly at 1 AM. The autonomous loop also auto-fixes failing crons (3+ consecutive errors) and verifies each fix over the next 3 runs — reverting automatically if it doesn't help.
|
|
12967
|
+
</div>
|
|
12935
12968
|
<button class="btn-sm btn-primary" onclick="siRunCycle()" id="si-run-btn">Run Now</button>
|
|
12936
12969
|
</div>
|
|
12937
12970
|
<div class="grid-2" id="si-status-cards">
|
|
12938
12971
|
<div class="skel-block"><div class="skel-row med"></div><div class="skel-row short"></div></div>
|
|
12939
12972
|
<div class="skel-block"><div class="skel-row med"></div><div class="skel-row short"></div></div>
|
|
12940
12973
|
</div>
|
|
12974
|
+
<div class="card" style="margin-top:16px">
|
|
12975
|
+
<div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
|
|
12976
|
+
<span>Active failures</span>
|
|
12977
|
+
<span class="tab-badge" id="tab-si-triggers" style="display:none;background:#ef4444;color:#fff">0</span>
|
|
12978
|
+
</div>
|
|
12979
|
+
<div class="card-body" id="si-triggers-list" style="padding:0">
|
|
12980
|
+
<div class="empty-state" style="padding:14px">No active failures — nothing has tripped 3+ consecutive errors.</div>
|
|
12981
|
+
</div>
|
|
12982
|
+
</div>
|
|
12983
|
+
<div class="card" style="margin-top:16px">
|
|
12984
|
+
<div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
|
|
12985
|
+
<span>Verifying fixes</span>
|
|
12986
|
+
<span class="tab-badge" id="tab-si-verifying" style="display:none;background:#f59e0b;color:#000">0</span>
|
|
12987
|
+
</div>
|
|
12988
|
+
<div class="card-body" id="si-verifying-list" style="padding:0">
|
|
12989
|
+
<div class="empty-state" style="padding:14px">No fixes currently soaking. Auto-fixes are verified over 3 runs and reverted if 0 succeed.</div>
|
|
12990
|
+
</div>
|
|
12991
|
+
</div>
|
|
12941
12992
|
<div class="card" style="margin-top:16px">
|
|
12942
12993
|
<div class="card-header" style="display:flex;align-items:center;justify-content:space-between">
|
|
12943
12994
|
<span>Pending Proposals</span>
|
|
@@ -19109,7 +19160,20 @@ async function loadUserModel() {
|
|
|
19109
19160
|
relationships: 'People, projects, channels they regularly interact with.',
|
|
19110
19161
|
agent_persona: 'For multi-agent: this agent\\'s self-identity in its working relationship with the user.',
|
|
19111
19162
|
};
|
|
19163
|
+
// First-run hint: when every slot is empty, show a single explainer
|
|
19164
|
+
// banner above the (still editable) textareas so the user understands
|
|
19165
|
+
// both what this is and how to populate it. Suppressed once anything
|
|
19166
|
+
// is in place — at that point the metadata on each card is enough.
|
|
19167
|
+
var allEmpty = d.blocks.every(function(b) { return !(b.content || '').trim(); });
|
|
19112
19168
|
var html = '<div style="display:flex;flex-direction:column;gap:14px">';
|
|
19169
|
+
if (allEmpty) {
|
|
19170
|
+
html += '<div class="card" style="padding:14px;border-left:3px solid var(--accent,#f59e0b);background:var(--bg-input,#1a1a1a)">' +
|
|
19171
|
+
'<div style="font-weight:600;margin-bottom:6px">No core memory yet for this scope</div>' +
|
|
19172
|
+
'<div style="font-size:12px;color:var(--text-secondary);line-height:1.5">' +
|
|
19173
|
+
'These slots auto-populate as you chat — the agent extracts durable facts about you (your role, active goals, recurring people/projects) and appends them after each exchange. ' +
|
|
19174
|
+
'You can also seed from existing memory in one click, or type directly into any slot below and click Save.' +
|
|
19175
|
+
'</div></div>';
|
|
19176
|
+
}
|
|
19113
19177
|
for (var i = 0; i < d.blocks.length; i++) {
|
|
19114
19178
|
var b = d.blocks[i];
|
|
19115
19179
|
var label = labelMap[b.slot] || b.slot;
|
|
@@ -21291,6 +21355,7 @@ async function refreshMemoryHealth() {
|
|
|
21291
21355
|
html += '<div style="flex:1;min-width:240px">';
|
|
21292
21356
|
html += '<div style="font-weight:600;margin-bottom:4px">Retrieval running on sparse vectors for ' + missing.toLocaleString() + ' chunks</div>';
|
|
21293
21357
|
html += '<div style="font-size:12px;color:var(--text-muted)">Backfill builds 768-dim neural embeddings for semantic search. First run downloads ~440MB.</div>';
|
|
21358
|
+
html += '<div style="font-size:11px;color:var(--text-muted);margin-top:4px">Auto-backfill runs every 6h (~100 chunks/cycle). Use the buttons below to push faster.</div>';
|
|
21294
21359
|
html += '</div>';
|
|
21295
21360
|
html += '<button class="btn-sm" onclick="memoryHealthAction(\\'reembed-dense\\', { limit: 200 })" title="Embed up to 200 chunks now">Backfill 200</button>';
|
|
21296
21361
|
html += '<button class="btn-sm" onclick="memoryHealthAction(\\'reembed-dense\\', { limit: 2000 })" title="Embed up to 2000 chunks now (slower)">Backfill 2000</button>';
|
|
@@ -24409,12 +24474,77 @@ async function refreshSelfImprove() {
|
|
|
24409
24474
|
const state = d.state;
|
|
24410
24475
|
const experiments = d.experiments || [];
|
|
24411
24476
|
const pending = d.pending || [];
|
|
24477
|
+
const triggers = d.triggers || [];
|
|
24478
|
+
const verifications = d.verifications || [];
|
|
24412
24479
|
|
|
24413
|
-
// Update tab badge
|
|
24480
|
+
// Update tab badge — combine human-attention queues so the sidebar
|
|
24481
|
+
// count reflects "things that need you to look at", not just proposals.
|
|
24482
|
+
const attentionCount = pending.length + triggers.length;
|
|
24414
24483
|
const badge = document.getElementById('nav-si-pending');
|
|
24415
|
-
if (badge) badge.textContent =
|
|
24484
|
+
if (badge) badge.textContent = attentionCount || '0';
|
|
24416
24485
|
var _sib = document.getElementById('tab-si-pending');
|
|
24417
24486
|
if (_sib) { _sib.textContent = pending.length || '0'; _sib.style.display = pending.length > 0 ? '' : 'none'; }
|
|
24487
|
+
var _sit = document.getElementById('tab-si-triggers');
|
|
24488
|
+
if (_sit) { _sit.textContent = triggers.length || '0'; _sit.style.display = triggers.length > 0 ? '' : 'none'; }
|
|
24489
|
+
var _siv = document.getElementById('tab-si-verifying');
|
|
24490
|
+
if (_siv) { _siv.textContent = verifications.length || '0'; _siv.style.display = verifications.length > 0 ? '' : 'none'; }
|
|
24491
|
+
|
|
24492
|
+
// Active failure triggers — jobs at 3+ consecutive errors; the loop
|
|
24493
|
+
// will pick these up on its next tick (event-driven; ~2s debounce).
|
|
24494
|
+
const triggersEl = document.getElementById('si-triggers-list');
|
|
24495
|
+
if (triggersEl) {
|
|
24496
|
+
if (triggers.length === 0) {
|
|
24497
|
+
triggersEl.innerHTML = '<div class="empty-state" style="padding:14px">No active failures — nothing has tripped 3+ consecutive errors.</div>';
|
|
24498
|
+
} else {
|
|
24499
|
+
triggersEl.innerHTML = triggers.map(function(t) {
|
|
24500
|
+
var owner = t.agentSlug ? '@' + esc(t.agentSlug) : 'global';
|
|
24501
|
+
var when = t.triggeredAt ? new Date(t.triggeredAt).toLocaleString() : '—';
|
|
24502
|
+
var firstError = (t.recentErrors && t.recentErrors[0]) ? String(t.recentErrors[0]).slice(0, 200) : '';
|
|
24503
|
+
return '<div style="padding:12px;border-bottom:1px solid var(--border)">' +
|
|
24504
|
+
'<div style="display:flex;justify-content:space-between;align-items:baseline;gap:8px;flex-wrap:wrap">' +
|
|
24505
|
+
'<div><strong>' + esc(t.jobName || '—') + '</strong> ' +
|
|
24506
|
+
'<span style="font-size:11px;color:var(--text-muted)">· owner: ' + owner + '</span> ' +
|
|
24507
|
+
'<span style="font-size:11px;color:var(--danger,#ef4444)">· ' + (t.consecutiveErrors || 0) + ' consecutive errors</span></div>' +
|
|
24508
|
+
'<span style="font-size:11px;color:var(--text-muted)">' + esc(when) + '</span>' +
|
|
24509
|
+
'</div>' +
|
|
24510
|
+
(firstError ? '<div style="margin-top:6px;font-size:12px;color:var(--text-secondary);font-family:ui-monospace,monospace">' + esc(firstError) + '</div>' : '') +
|
|
24511
|
+
'</div>';
|
|
24512
|
+
}).join('');
|
|
24513
|
+
}
|
|
24514
|
+
}
|
|
24515
|
+
|
|
24516
|
+
// Pending fix verifications — auto-fixes soaking through the 3-run window.
|
|
24517
|
+
const verifyEl = document.getElementById('si-verifying-list');
|
|
24518
|
+
if (verifyEl) {
|
|
24519
|
+
if (verifications.length === 0) {
|
|
24520
|
+
verifyEl.innerHTML = '<div class="empty-state" style="padding:14px">No fixes currently soaking. Auto-fixes are verified over 3 runs and reverted if 0 succeed.</div>';
|
|
24521
|
+
} else {
|
|
24522
|
+
verifyEl.innerHTML = verifications.map(function(v) {
|
|
24523
|
+
var outcomes = v.postRunOutcomes || [];
|
|
24524
|
+
var dots = '';
|
|
24525
|
+
for (var i = 0; i < 3; i++) {
|
|
24526
|
+
var o = outcomes[i];
|
|
24527
|
+
var color = o === 'ok' ? 'var(--success,#10b981)' : o === 'error' || o === 'retried' ? 'var(--danger,#ef4444)' : 'var(--border)';
|
|
24528
|
+
dots += '<span title="' + (o || 'pending') + '" style="display:inline-block;width:10px;height:10px;border-radius:50%;background:' + color + ';margin-right:4px"></span>';
|
|
24529
|
+
}
|
|
24530
|
+
var kind = v.autoApply && v.autoApply.kind ? v.autoApply.kind : 'hand-edit';
|
|
24531
|
+
var when = v.recordedAt ? new Date(v.recordedAt).toLocaleString() : '—';
|
|
24532
|
+
var fileLabel = v.autoApply && v.autoApply.file ? v.autoApply.file.split('/').slice(-3).join('/') : '';
|
|
24533
|
+
return '<div style="padding:12px;border-bottom:1px solid var(--border)">' +
|
|
24534
|
+
'<div style="display:flex;justify-content:space-between;align-items:baseline;gap:8px;flex-wrap:wrap">' +
|
|
24535
|
+
'<div><strong>' + esc(v.jobName || '—') + '</strong> ' +
|
|
24536
|
+
'<span style="font-size:11px;color:var(--text-muted)">· ' + esc(kind) + '</span></div>' +
|
|
24537
|
+
'<div style="font-size:11px;color:var(--text-muted)">' + esc(when) + '</div>' +
|
|
24538
|
+
'</div>' +
|
|
24539
|
+
'<div style="margin-top:8px;display:flex;align-items:center;gap:10px;font-size:12px;color:var(--text-secondary)">' +
|
|
24540
|
+
'<span>' + dots + '</span>' +
|
|
24541
|
+
'<span>' + outcomes.length + ' / 3 runs sampled</span>' +
|
|
24542
|
+
(fileLabel ? '<span style="font-family:ui-monospace,monospace;color:var(--text-muted)">' + esc(fileLabel) + '</span>' : '') +
|
|
24543
|
+
'</div>' +
|
|
24544
|
+
'</div>';
|
|
24545
|
+
}).join('');
|
|
24546
|
+
}
|
|
24547
|
+
}
|
|
24418
24548
|
|
|
24419
24549
|
// Status cards
|
|
24420
24550
|
const cards = document.getElementById('si-status-cards');
|
|
@@ -1184,19 +1184,27 @@ export class CronScheduler {
|
|
|
1184
1184
|
if (advice.shouldEscalate) {
|
|
1185
1185
|
this.logAdvisorEvent('escalation', job.name, advice.escalationReason ?? 'Escalated to unleashed');
|
|
1186
1186
|
}
|
|
1187
|
-
// Write targeted self-improvement trigger when consecutive errors are high
|
|
1187
|
+
// Write targeted self-improvement trigger when consecutive errors are high.
|
|
1188
|
+
// Include agentSlug + bareName so the self-improve loop can locate jobs
|
|
1189
|
+
// defined in per-agent CRON.md files (vault/00-System/agents/{slug}/CRON.md)
|
|
1190
|
+
// rather than only the central one.
|
|
1188
1191
|
if (consErrors >= 3) {
|
|
1189
1192
|
try {
|
|
1190
1193
|
const triggerDir = path.join(BASE_DIR, 'self-improve', 'triggers');
|
|
1191
1194
|
mkdirSync(triggerDir, { recursive: true });
|
|
1192
1195
|
const triggerPath = path.join(triggerDir, `${job.name.replace(/[^a-zA-Z0-9_-]/g, '_')}.json`);
|
|
1196
|
+
const bareName = job.agentSlug && job.name.startsWith(`${job.agentSlug}:`)
|
|
1197
|
+
? job.name.slice(job.agentSlug.length + 1)
|
|
1198
|
+
: job.name;
|
|
1193
1199
|
writeFileSync(triggerPath, JSON.stringify({
|
|
1194
1200
|
jobName: job.name,
|
|
1201
|
+
bareName,
|
|
1202
|
+
agentSlug: job.agentSlug,
|
|
1195
1203
|
consecutiveErrors: consErrors,
|
|
1196
1204
|
recentErrors: this.runLog.readRecent(job.name, 3).map(e => e.error?.slice(0, 200)),
|
|
1197
1205
|
triggeredAt: new Date().toISOString(),
|
|
1198
1206
|
}, null, 2));
|
|
1199
|
-
logger.info({ job: job.name, consErrors }, 'Wrote self-improvement trigger for failing job');
|
|
1207
|
+
logger.info({ job: job.name, agentSlug: job.agentSlug, consErrors }, 'Wrote self-improvement trigger for failing job');
|
|
1200
1208
|
}
|
|
1201
1209
|
catch { /* non-fatal */ }
|
|
1202
1210
|
}
|
|
@@ -26,9 +26,13 @@ interface PendingVerification {
|
|
|
26
26
|
/**
|
|
27
27
|
* Tracks an autoApply that's currently being verified. When the verdict
|
|
28
28
|
* window closes negatively, revertFix() uses these fields to undo.
|
|
29
|
+
*
|
|
30
|
+
* - `advisor-rule` and `prompt-override` revert by deleting the written file.
|
|
31
|
+
* - `cron-config` reverts by re-applying the captured `prevFields` to the
|
|
32
|
+
* named job inside CRON.md (deleting CRON.md would be catastrophic).
|
|
29
33
|
*/
|
|
30
34
|
export interface AutoApplyTracker {
|
|
31
|
-
kind: 'advisor-rule' | 'prompt-override';
|
|
35
|
+
kind: 'advisor-rule' | 'prompt-override' | 'cron-config';
|
|
32
36
|
/** Absolute path of the file the apply wrote. */
|
|
33
37
|
file: string;
|
|
34
38
|
/** advisor-rule only: the rule's id, used by the loader's hot-reload. */
|
|
@@ -36,6 +40,11 @@ export interface AutoApplyTracker {
|
|
|
36
40
|
/** prompt-override only: scope label for the verdict message. */
|
|
37
41
|
scope?: 'global' | 'agent' | 'job';
|
|
38
42
|
scopeKey?: string;
|
|
43
|
+
/** cron-config only: bare job name as written in the CRON.md frontmatter. */
|
|
44
|
+
bareName?: string;
|
|
45
|
+
/** cron-config only: original values for the fields that were mutated.
|
|
46
|
+
* Use null for "field was absent (delete on revert)". */
|
|
47
|
+
prevFields?: Record<string, unknown>;
|
|
39
48
|
}
|
|
40
49
|
/**
|
|
41
50
|
* Compare an old and new jobs list and record verifications for any job that:
|
|
@@ -141,11 +141,19 @@ export function recordAutoApplyForVerification(jobName, tracker) {
|
|
|
141
141
|
logger.info({ job: jobName, kind: tracker.kind, file: tracker.file }, 'Recorded autoApply for verification — will track next runs');
|
|
142
142
|
}
|
|
143
143
|
/**
|
|
144
|
-
* Undo an autoApply
|
|
145
|
-
*
|
|
146
|
-
*
|
|
144
|
+
* Undo an autoApply. Dispatches on `tracker.kind`:
|
|
145
|
+
*
|
|
146
|
+
* - advisor-rule / prompt-override: delete the file the apply wrote.
|
|
147
|
+
* - cron-config: re-apply the captured `prevFields` to the named job
|
|
148
|
+
* in CRON.md (never delete CRON.md).
|
|
149
|
+
*
|
|
150
|
+
* Best-effort throughout: a missing file or vanished job is not an error.
|
|
151
|
+
* Returns true if a meaningful change was made.
|
|
147
152
|
*/
|
|
148
153
|
function revertAutoApply(tracker) {
|
|
154
|
+
if (tracker.kind === 'cron-config') {
|
|
155
|
+
return revertCronConfig(tracker);
|
|
156
|
+
}
|
|
149
157
|
try {
|
|
150
158
|
if (existsSync(tracker.file)) {
|
|
151
159
|
// Use unlinkSync from fs — kept dynamic to avoid a top-of-file import
|
|
@@ -161,6 +169,55 @@ function revertAutoApply(tracker) {
|
|
|
161
169
|
}
|
|
162
170
|
return false;
|
|
163
171
|
}
|
|
172
|
+
/**
|
|
173
|
+
* Restore the previous values of the fields the cron-config autoApply mutated.
|
|
174
|
+
* A `null` in `prevFields` means the field was absent before the fix and
|
|
175
|
+
* should be deleted on revert.
|
|
176
|
+
*/
|
|
177
|
+
function revertCronConfig(tracker) {
|
|
178
|
+
if (!tracker.bareName || !tracker.prevFields) {
|
|
179
|
+
logger.warn({ tracker }, 'cron-config revert missing bareName/prevFields — skipping');
|
|
180
|
+
return false;
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
if (!existsSync(tracker.file)) {
|
|
184
|
+
logger.warn({ file: tracker.file }, 'cron-config revert: file missing — skipping');
|
|
185
|
+
return false;
|
|
186
|
+
}
|
|
187
|
+
const { readFileSync, writeFileSync } = require('node:fs');
|
|
188
|
+
const matter = require('gray-matter');
|
|
189
|
+
const raw = readFileSync(tracker.file, 'utf-8');
|
|
190
|
+
const parsed = matter(raw);
|
|
191
|
+
const jobs = (parsed.data.jobs ?? []);
|
|
192
|
+
const job = jobs.find((j) => String(j.name ?? '') === tracker.bareName);
|
|
193
|
+
if (!job) {
|
|
194
|
+
logger.warn({ file: tracker.file, bareName: tracker.bareName }, 'cron-config revert: job not found — already removed/renamed');
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
let mutated = false;
|
|
198
|
+
for (const [key, prev] of Object.entries(tracker.prevFields)) {
|
|
199
|
+
if (prev === null || prev === undefined) {
|
|
200
|
+
if (key in job) {
|
|
201
|
+
delete job[key];
|
|
202
|
+
mutated = true;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
else if (job[key] !== prev) {
|
|
206
|
+
job[key] = prev;
|
|
207
|
+
mutated = true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
if (!mutated)
|
|
211
|
+
return false;
|
|
212
|
+
writeFileSync(tracker.file, matter.stringify(parsed.content, parsed.data));
|
|
213
|
+
logger.warn({ file: tracker.file, bareName: tracker.bareName }, 'Reverted cron-config autoApply — fix did not help');
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
catch (err) {
|
|
217
|
+
logger.warn({ err, file: tracker.file }, 'Failed to revert cron-config autoApply');
|
|
218
|
+
return false;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
164
221
|
/**
|
|
165
222
|
* After a cron run completes, check whether we were waiting on a fix
|
|
166
223
|
* verification for this job. Two flows:
|
|
@@ -34,8 +34,14 @@ export declare function maybeVacuum(store: any): {
|
|
|
34
34
|
*/
|
|
35
35
|
export declare function runStartupMaintenance(store: any): Promise<void>;
|
|
36
36
|
/**
|
|
37
|
-
*
|
|
38
|
-
*
|
|
37
|
+
* Run one full periodic-maintenance cycle. Exported so tests can drive it
|
|
38
|
+
* without waiting on setInterval. `startPeriodicMaintenance` schedules
|
|
39
|
+
* this on the 6h cadence.
|
|
40
|
+
*/
|
|
41
|
+
export declare function runPeriodicCycle(store: any, llmCall?: (prompt: string) => Promise<string>): Promise<void>;
|
|
42
|
+
/**
|
|
43
|
+
* Start periodic maintenance on a 6-hour interval. Returns the interval
|
|
44
|
+
* handle for cleanup on shutdown.
|
|
39
45
|
*/
|
|
40
46
|
export declare function startPeriodicMaintenance(store: any, llmCall?: (prompt: string) => Promise<string>): ReturnType<typeof setInterval>;
|
|
41
47
|
//# sourceMappingURL=maintenance.d.ts.map
|
|
@@ -14,6 +14,16 @@ import { runIntegrityProbes } from './integrity.js';
|
|
|
14
14
|
const logger = pino({ name: 'clementine.maintenance' });
|
|
15
15
|
const PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6 hours
|
|
16
16
|
const VACUUM_META_KEY = 'last_vacuum_at';
|
|
17
|
+
/**
|
|
18
|
+
* Number of chunks to dense-embed per periodic cycle. With 4 cycles/day
|
|
19
|
+
* that's 400 chunks/day — fast enough to cover a 3,500-chunk vault in
|
|
20
|
+
* ~9 days, slow enough that the GPU/CPU load barely registers. Override
|
|
21
|
+
* via env for power users with very large vaults.
|
|
22
|
+
*/
|
|
23
|
+
const PERIODIC_DENSE_BATCH = (() => {
|
|
24
|
+
const raw = parseInt(process.env.CLEMENTINE_DENSE_BATCH ?? '', 10);
|
|
25
|
+
return Number.isFinite(raw) && raw > 0 ? raw : 100;
|
|
26
|
+
})();
|
|
17
27
|
/**
|
|
18
28
|
* Janitor pass — keeps the store bounded. Safe to call repeatedly.
|
|
19
29
|
* Idempotent within a single run; surfaces totals for logging.
|
|
@@ -155,104 +165,128 @@ export async function runStartupMaintenance(store) {
|
|
|
155
165
|
logger.info({ durationMs: Date.now() - start }, 'Startup maintenance complete');
|
|
156
166
|
}
|
|
157
167
|
/**
|
|
158
|
-
*
|
|
159
|
-
*
|
|
168
|
+
* Run one full periodic-maintenance cycle. Exported so tests can drive it
|
|
169
|
+
* without waiting on setInterval. `startPeriodicMaintenance` schedules
|
|
170
|
+
* this on the 6h cadence.
|
|
160
171
|
*/
|
|
161
|
-
export function
|
|
162
|
-
const
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
172
|
+
export async function runPeriodicCycle(store, llmCall) {
|
|
173
|
+
const start = Date.now();
|
|
174
|
+
logger.info('Starting periodic memory maintenance');
|
|
175
|
+
// 1. Decay + prune
|
|
176
|
+
try {
|
|
177
|
+
store.decaySalience?.();
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
logger.warn({ err }, 'Periodic decay failed');
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
store.pruneStaleData?.();
|
|
184
|
+
}
|
|
185
|
+
catch (err) {
|
|
186
|
+
logger.warn({ err }, 'Periodic prune failed');
|
|
187
|
+
}
|
|
188
|
+
// 2. Rebuild vocab + backfill embeddings
|
|
189
|
+
try {
|
|
190
|
+
store.buildEmbeddings?.();
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
logger.warn({ err }, 'Periodic embedding build failed');
|
|
194
|
+
}
|
|
195
|
+
// 2b. Idle dense-embedding backfill — process up to PERIODIC_DENSE_BATCH
|
|
196
|
+
// chunks per cycle so coverage drifts toward 100% without anyone running
|
|
197
|
+
// the CLI. The first time the dense model loads inside this process it
|
|
198
|
+
// pulls ~440MB; subsequent cycles reuse the loaded model. Failures
|
|
199
|
+
// (network, missing model dir, etc.) fall through silently because the
|
|
200
|
+
// backfill is best-effort — query-time still has TF-IDF as fallback.
|
|
201
|
+
if (typeof store.backfillDenseEmbeddings === 'function') {
|
|
166
202
|
try {
|
|
167
|
-
store.
|
|
203
|
+
const result = await store.backfillDenseEmbeddings({ limit: PERIODIC_DENSE_BATCH });
|
|
204
|
+
if (result.embedded > 0) {
|
|
205
|
+
logger.info(result, 'Periodic dense embedding backfill');
|
|
206
|
+
}
|
|
168
207
|
}
|
|
169
208
|
catch (err) {
|
|
170
|
-
logger.warn({ err }, 'Periodic
|
|
209
|
+
logger.warn({ err }, 'Periodic dense embedding backfill failed');
|
|
171
210
|
}
|
|
211
|
+
}
|
|
212
|
+
// 3. Consolidation (dedup, summarize, extract principles)
|
|
213
|
+
if (llmCall) {
|
|
172
214
|
try {
|
|
173
|
-
|
|
215
|
+
const { runConsolidation } = await import('./consolidation.js');
|
|
216
|
+
const result = await runConsolidation(store, llmCall);
|
|
217
|
+
logger.info(result, 'Consolidation cycle complete');
|
|
174
218
|
}
|
|
175
219
|
catch (err) {
|
|
176
|
-
logger.warn({ err }, '
|
|
220
|
+
logger.warn({ err }, 'Consolidation failed');
|
|
177
221
|
}
|
|
178
|
-
//
|
|
222
|
+
// 4. Re-backfill embeddings for any new summary chunks from consolidation
|
|
179
223
|
try {
|
|
180
224
|
store.buildEmbeddings?.();
|
|
181
225
|
}
|
|
182
226
|
catch (err) {
|
|
183
|
-
logger.warn({ err }, '
|
|
184
|
-
}
|
|
185
|
-
// 3. Consolidation (dedup, summarize, extract principles)
|
|
186
|
-
if (llmCall) {
|
|
187
|
-
try {
|
|
188
|
-
const { runConsolidation } = await import('./consolidation.js');
|
|
189
|
-
const result = await runConsolidation(store, llmCall);
|
|
190
|
-
logger.info(result, 'Consolidation cycle complete');
|
|
191
|
-
}
|
|
192
|
-
catch (err) {
|
|
193
|
-
logger.warn({ err }, 'Consolidation failed');
|
|
194
|
-
}
|
|
195
|
-
// 4. Re-backfill embeddings for any new summary chunks from consolidation
|
|
196
|
-
try {
|
|
197
|
-
store.buildEmbeddings?.();
|
|
198
|
-
}
|
|
199
|
-
catch (err) {
|
|
200
|
-
logger.warn({ err }, 'Post-consolidation embedding build failed');
|
|
201
|
-
}
|
|
227
|
+
logger.warn({ err }, 'Post-consolidation embedding build failed');
|
|
202
228
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
229
|
+
}
|
|
230
|
+
// 5. Extraction log pruning (legacy 90-day rule retained alongside cap)
|
|
231
|
+
try {
|
|
232
|
+
const conn = store.conn;
|
|
233
|
+
if (conn) {
|
|
234
|
+
conn.prepare(`DELETE FROM memory_extractions
|
|
208
235
|
WHERE extracted_at < datetime('now', '-90 days')
|
|
209
236
|
AND status != 'active'`).run();
|
|
210
|
-
}
|
|
211
237
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
}
|
|
220
|
-
catch (err) {
|
|
221
|
-
logger.warn({ err }, 'Periodic janitor failed');
|
|
238
|
+
}
|
|
239
|
+
catch { /* non-fatal */ }
|
|
240
|
+
// 6. Janitor — bounded growth.
|
|
241
|
+
try {
|
|
242
|
+
const result = runJanitor(store);
|
|
243
|
+
if (result.softDeleted || result.physicallyDeleted || result.outcomesPruned || result.extractionsCapped) {
|
|
244
|
+
logger.info(result, 'Janitor pass complete');
|
|
222
245
|
}
|
|
223
|
-
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
logger.warn({ err }, 'Periodic janitor failed');
|
|
249
|
+
}
|
|
250
|
+
// 6b. Integrity probes — FTS health, orphan derived_from, embedding gaps.
|
|
251
|
+
try {
|
|
252
|
+
const report = runIntegrityProbes(store);
|
|
253
|
+
// Persist for the dashboard so the "last integrity check" surface
|
|
254
|
+
// doesn't depend on log scraping.
|
|
224
255
|
try {
|
|
225
|
-
|
|
226
|
-
// Persist for the dashboard so the "last integrity check" surface
|
|
227
|
-
// doesn't depend on log scraping.
|
|
228
|
-
try {
|
|
229
|
-
store.setMaintenanceMeta?.('last_integrity_report', JSON.stringify({ ...report, ranAt: new Date().toISOString() }));
|
|
230
|
-
}
|
|
231
|
-
catch { /* meta write is best-effort */ }
|
|
232
|
-
if (!report.ftsOk || report.ftsRebuilt || report.orphanRefsNulled > 0 || report.missingEmbeddings > 0) {
|
|
233
|
-
logger.info(report, 'Integrity probes complete');
|
|
234
|
-
}
|
|
256
|
+
store.setMaintenanceMeta?.('last_integrity_report', JSON.stringify({ ...report, ranAt: new Date().toISOString() }));
|
|
235
257
|
}
|
|
236
|
-
catch
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
// 7. VACUUM — idle-gated, at most once per vacuumIntervalDays.
|
|
240
|
-
try {
|
|
241
|
-
const vac = maybeVacuum(store);
|
|
242
|
-
if (vac) {
|
|
243
|
-
logger.info({
|
|
244
|
-
sizeBeforeBytes: vac.sizeBeforeBytes,
|
|
245
|
-
sizeAfterBytes: vac.sizeAfterBytes,
|
|
246
|
-
reclaimedBytes: vac.sizeBeforeBytes - vac.sizeAfterBytes,
|
|
247
|
-
durationMs: vac.durationMs,
|
|
248
|
-
}, 'VACUUM complete');
|
|
249
|
-
}
|
|
258
|
+
catch { /* meta write is best-effort */ }
|
|
259
|
+
if (!report.ftsOk || report.ftsRebuilt || report.orphanRefsNulled > 0 || report.missingEmbeddings > 0) {
|
|
260
|
+
logger.info(report, 'Integrity probes complete');
|
|
250
261
|
}
|
|
251
|
-
|
|
252
|
-
|
|
262
|
+
}
|
|
263
|
+
catch (err) {
|
|
264
|
+
logger.warn({ err }, 'Integrity probes failed');
|
|
265
|
+
}
|
|
266
|
+
// 7. VACUUM — idle-gated, at most once per vacuumIntervalDays.
|
|
267
|
+
try {
|
|
268
|
+
const vac = maybeVacuum(store);
|
|
269
|
+
if (vac) {
|
|
270
|
+
logger.info({
|
|
271
|
+
sizeBeforeBytes: vac.sizeBeforeBytes,
|
|
272
|
+
sizeAfterBytes: vac.sizeAfterBytes,
|
|
273
|
+
reclaimedBytes: vac.sizeBeforeBytes - vac.sizeAfterBytes,
|
|
274
|
+
durationMs: vac.durationMs,
|
|
275
|
+
}, 'VACUUM complete');
|
|
253
276
|
}
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
277
|
+
}
|
|
278
|
+
catch (err) {
|
|
279
|
+
logger.warn({ err }, 'Periodic VACUUM failed');
|
|
280
|
+
}
|
|
281
|
+
logger.info({ durationMs: Date.now() - start }, 'Periodic maintenance complete');
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Start periodic maintenance on a 6-hour interval. Returns the interval
|
|
285
|
+
* handle for cleanup on shutdown.
|
|
286
|
+
*/
|
|
287
|
+
export function startPeriodicMaintenance(store, llmCall) {
|
|
288
|
+
return setInterval(() => {
|
|
289
|
+
runPeriodicCycle(store, llmCall).catch(err => logger.warn({ err }, 'Periodic maintenance cycle threw — continuing'));
|
|
290
|
+
}, PERIODIC_INTERVAL_MS);
|
|
257
291
|
}
|
|
258
292
|
//# sourceMappingURL=maintenance.js.map
|
package/dist/memory/store.d.ts
CHANGED
|
@@ -1040,6 +1040,31 @@ export declare class MemoryStore {
|
|
|
1040
1040
|
* skill retrieved in that session. Window: last 60 days.
|
|
1041
1041
|
*/
|
|
1042
1042
|
getSkillsToSuppress(agentSlug?: string): Set<string>;
|
|
1043
|
+
/**
|
|
1044
|
+
* Get a compact "recent feedback signal" snapshot for prompt injection.
|
|
1045
|
+
* Closes the feedback → behavior loop: the agent sees the last week's
|
|
1046
|
+
* negative pattern in its system prompt instead of feedback being
|
|
1047
|
+
* write-only.
|
|
1048
|
+
*
|
|
1049
|
+
* - `negative` / `positive`: counts in the window
|
|
1050
|
+
* - `negativesWithComments`: up to `limit` most recent negatives that
|
|
1051
|
+
* carry a non-empty comment (these are the actionable ones — silent
|
|
1052
|
+
* 👎 reactions don't tell the agent what to fix)
|
|
1053
|
+
* - `behavioralChannel` is excluded because behavioral-corrections are
|
|
1054
|
+
* already pushed to hotCorrections directly
|
|
1055
|
+
*/
|
|
1056
|
+
getRecentFeedbackSignals(opts?: {
|
|
1057
|
+
days?: number;
|
|
1058
|
+
limit?: number;
|
|
1059
|
+
}): {
|
|
1060
|
+
negative: number;
|
|
1061
|
+
positive: number;
|
|
1062
|
+
negativesWithComments: Array<{
|
|
1063
|
+
comment: string;
|
|
1064
|
+
channel: string;
|
|
1065
|
+
createdAt: string;
|
|
1066
|
+
}>;
|
|
1067
|
+
};
|
|
1043
1068
|
/**
|
|
1044
1069
|
* Get aggregate feedback statistics.
|
|
1045
1070
|
*/
|
package/dist/memory/store.js
CHANGED
|
@@ -3995,6 +3995,62 @@ export class MemoryStore {
|
|
|
3995
3995
|
}
|
|
3996
3996
|
return suppressed;
|
|
3997
3997
|
}
|
|
3998
|
+
/**
|
|
3999
|
+
* Get a compact "recent feedback signal" snapshot for prompt injection.
|
|
4000
|
+
* Closes the feedback → behavior loop: the agent sees the last week's
|
|
4001
|
+
* negative pattern in its system prompt instead of feedback being
|
|
4002
|
+
* write-only.
|
|
4003
|
+
*
|
|
4004
|
+
* - `negative` / `positive`: counts in the window
|
|
4005
|
+
* - `negativesWithComments`: up to `limit` most recent negatives that
|
|
4006
|
+
* carry a non-empty comment (these are the actionable ones — silent
|
|
4007
|
+
* 👎 reactions don't tell the agent what to fix)
|
|
4008
|
+
* - `behavioralChannel` is excluded because behavioral-corrections are
|
|
4009
|
+
* already pushed to hotCorrections directly
|
|
4010
|
+
*/
|
|
4011
|
+
getRecentFeedbackSignals(opts = {}) {
|
|
4012
|
+
const days = Math.max(1, opts.days ?? 14);
|
|
4013
|
+
const limit = Math.max(1, Math.min(opts.limit ?? 3, 10));
|
|
4014
|
+
const since = `datetime('now', '-${days} days')`;
|
|
4015
|
+
let negative = 0;
|
|
4016
|
+
let positive = 0;
|
|
4017
|
+
let negativesWithComments = [];
|
|
4018
|
+
try {
|
|
4019
|
+
const rows = this.conn
|
|
4020
|
+
.prepare(`SELECT rating, COUNT(*) as cnt FROM feedback
|
|
4021
|
+
WHERE created_at >= ${since}
|
|
4022
|
+
AND channel != 'behavioral-correction'
|
|
4023
|
+
AND channel != 'preference-learned'
|
|
4024
|
+
GROUP BY rating`)
|
|
4025
|
+
.all();
|
|
4026
|
+
for (const row of rows) {
|
|
4027
|
+
if (row.rating === 'negative')
|
|
4028
|
+
negative = row.cnt;
|
|
4029
|
+
else if (row.rating === 'positive')
|
|
4030
|
+
positive = row.cnt;
|
|
4031
|
+
}
|
|
4032
|
+
const commented = this.conn
|
|
4033
|
+
.prepare(`SELECT comment, channel, created_at
|
|
4034
|
+
FROM feedback
|
|
4035
|
+
WHERE rating = 'negative'
|
|
4036
|
+
AND comment IS NOT NULL
|
|
4037
|
+
AND TRIM(comment) != ''
|
|
4038
|
+
AND channel != 'behavioral-correction'
|
|
4039
|
+
AND created_at >= ${since}
|
|
4040
|
+
ORDER BY created_at DESC, id DESC
|
|
4041
|
+
LIMIT ?`)
|
|
4042
|
+
.all(limit);
|
|
4043
|
+
negativesWithComments = commented.map((r) => ({
|
|
4044
|
+
comment: r.comment,
|
|
4045
|
+
channel: r.channel,
|
|
4046
|
+
createdAt: r.created_at,
|
|
4047
|
+
}));
|
|
4048
|
+
}
|
|
4049
|
+
catch {
|
|
4050
|
+
// Empty / legacy schema — return zeros
|
|
4051
|
+
}
|
|
4052
|
+
return { negative, positive, negativesWithComments };
|
|
4053
|
+
}
|
|
3998
4054
|
/**
|
|
3999
4055
|
* Get aggregate feedback statistics.
|
|
4000
4056
|
*/
|