clementine-agent 1.18.136 → 1.18.137
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/self-improve.js +16 -52
- package/dist/types.d.ts +2 -4
- package/package.json +1 -1
|
@@ -14,7 +14,7 @@ import matter from 'gray-matter';
|
|
|
14
14
|
import { load as yamlLoad } from 'js-yaml';
|
|
15
15
|
import path from 'node:path';
|
|
16
16
|
import pino from 'pino';
|
|
17
|
-
import { BASE_DIR, SELF_IMPROVE_DIR, SOUL_FILE,
|
|
17
|
+
import { BASE_DIR, SELF_IMPROVE_DIR, SOUL_FILE, CRON_FILE, WORKFLOWS_DIR, VAULT_DIR, MEMORY_DB_PATH, AGENTS_DIR, CRON_REFLECTIONS_DIR, GOALS_DIR, } from '../config.js';
|
|
18
18
|
import { listAllGoals } from '../tools/shared.js';
|
|
19
19
|
import { MemoryStore } from '../memory/store.js';
|
|
20
20
|
const logger = pino({ name: 'clementine.self-improve' });
|
|
@@ -26,15 +26,18 @@ const DEFAULT_CONFIG = {
|
|
|
26
26
|
acceptThreshold: 0.7,
|
|
27
27
|
surfaceThreshold: 0.85,
|
|
28
28
|
plateauLimit: 3,
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
//
|
|
29
|
+
// Areas the autoresearch loop targets. Removed in 1.18.137 cleanup:
|
|
30
|
+
// 'source' (engine TS edits — quarantined since Phase 1, every layer rejected)
|
|
31
|
+
// 'communication' (AGENTS.md — interactive-only; loop fires from cron so
|
|
32
|
+
// improvements are invisible to the next evaluation pass)
|
|
33
|
+
// 'memory' (MEMORY.md is rewritten by the extraction pipeline; proposals
|
|
34
|
+
// get clobbered within days)
|
|
35
|
+
// Each remaining area maps to a file the runtime actually consumes.
|
|
32
36
|
areas: [
|
|
33
|
-
'soul', 'cron', 'workflow', '
|
|
37
|
+
'soul', 'cron', 'workflow', 'agent', 'goal',
|
|
34
38
|
'advisor-rule', 'prompt-override', 'skill',
|
|
35
39
|
],
|
|
36
40
|
autoApply: true,
|
|
37
|
-
sourceMode: 'skip',
|
|
38
41
|
};
|
|
39
42
|
// ── Paths ────────────────────────────────────────────────────────────
|
|
40
43
|
const EXPERIMENT_LOG = path.join(SELF_IMPROVE_DIR, 'experiment-log.jsonl');
|
|
@@ -139,9 +142,10 @@ function checkDrift(proposedContent) {
|
|
|
139
142
|
return { ok: similarity >= DRIFT_SIMILARITY_THRESHOLD, similarity };
|
|
140
143
|
}
|
|
141
144
|
/** Classify the risk level of a proposed change.
|
|
142
|
-
* - low: agent prompts, individual cron job prompts,
|
|
143
|
-
*
|
|
144
|
-
* -
|
|
145
|
+
* - low: agent prompts, individual cron job prompts, workflows, advisor
|
|
146
|
+
* rules, prompt overrides — small-blast-radius config files
|
|
147
|
+
* - medium: SOUL.md, goals, skills — fan out across many runs; the owner
|
|
148
|
+
* should review before they ship
|
|
145
149
|
*/
|
|
146
150
|
function classifyRisk(area) {
|
|
147
151
|
switch (area) {
|
|
@@ -150,7 +154,7 @@ function classifyRisk(area) {
|
|
|
150
154
|
case 'workflow': return 'low';
|
|
151
155
|
case 'advisor-rule': return 'low'; // YAML files, hot-reloaded, easily deleted
|
|
152
156
|
case 'prompt-override': return 'low'; // Markdown files, hot-reloaded, easily deleted
|
|
153
|
-
// 1.18.136 — 'skill' is
|
|
157
|
+
// 1.18.136 — 'skill' is a first-class self-improve target.
|
|
154
158
|
// Skills are user-facing recipes that fire across many tasks; a
|
|
155
159
|
// bad change can cascade into every cron that pins them. Medium
|
|
156
160
|
// tier so changes always surface for owner approval, never auto-
|
|
@@ -158,11 +162,8 @@ function classifyRisk(area) {
|
|
|
158
162
|
// proposal can even reach the queue (see validateProposal).
|
|
159
163
|
case 'skill': return 'medium';
|
|
160
164
|
case 'soul': return 'medium'; // Core personality — needs approval
|
|
161
|
-
case 'communication': return 'medium'; // Global operating instructions
|
|
162
|
-
case 'memory': return 'medium'; // Memory config
|
|
163
165
|
case 'goal': return 'medium'; // New goals need owner review before activating
|
|
164
|
-
|
|
165
|
-
default: return 'high';
|
|
166
|
+
default: return 'medium'; // Unknown area → safest default
|
|
166
167
|
}
|
|
167
168
|
}
|
|
168
169
|
export const USER_MODEL_SLOT_KEYS = ['user_facts', 'goals', 'relationships', 'agent_persona'];
|
|
@@ -567,23 +568,6 @@ export class SelfImproveLoop {
|
|
|
567
568
|
state.pendingApprovals++;
|
|
568
569
|
}
|
|
569
570
|
}
|
|
570
|
-
else if (this.config.autoApply && risk === 'high') {
|
|
571
|
-
// High-risk: behavior depends on sourceMode config
|
|
572
|
-
if (this.config.sourceMode === 'skip') {
|
|
573
|
-
logger.info({ id, area: proposal.area, risk }, 'Skipped high-risk proposal in auto mode');
|
|
574
|
-
experiment.approvalStatus = 'denied';
|
|
575
|
-
experiment.reason = 'High-risk area blocked in autonomous mode (sourceMode=skip)';
|
|
576
|
-
}
|
|
577
|
-
else {
|
|
578
|
-
// propose-only: save for human review, never auto-apply
|
|
579
|
-
await this.savePendingChange(experiment, before);
|
|
580
|
-
state.pendingApprovals++;
|
|
581
|
-
if (onProposal) {
|
|
582
|
-
await onProposal(experiment);
|
|
583
|
-
}
|
|
584
|
-
logger.info({ id, area: proposal.area, risk }, 'Saved high-risk proposal for human review');
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
571
|
else {
|
|
588
572
|
// Medium-risk or manual mode: save as pending for approval
|
|
589
573
|
await this.savePendingChange(experiment, before);
|
|
@@ -1254,12 +1238,6 @@ export class SelfImproveLoop {
|
|
|
1254
1238
|
const agentFile = path.join(AGENTS_DIR, target, 'agent.md');
|
|
1255
1239
|
return existsSync(agentFile) ? readFileSync(agentFile, 'utf-8') : '';
|
|
1256
1240
|
}
|
|
1257
|
-
case 'communication':
|
|
1258
|
-
return existsSync(AGENTS_FILE) ? readFileSync(AGENTS_FILE, 'utf-8') : '';
|
|
1259
|
-
case 'memory': {
|
|
1260
|
-
const memoryFile = path.join(VAULT_DIR, '00-System', 'MEMORY.md');
|
|
1261
|
-
return existsSync(memoryFile) ? readFileSync(memoryFile, 'utf-8') : '';
|
|
1262
|
-
}
|
|
1263
1241
|
case 'goal': {
|
|
1264
1242
|
// target = "{owner}" e.g. "clementine" or an agent slug
|
|
1265
1243
|
const owner = target.split('/')[0];
|
|
@@ -1363,11 +1341,6 @@ export class SelfImproveLoop {
|
|
|
1363
1341
|
if (!targetPath) {
|
|
1364
1342
|
return `Cannot resolve target path for area=${pending.area}, target=${pending.target}`;
|
|
1365
1343
|
}
|
|
1366
|
-
// 'source' area is deprecated (Phase 1 quarantine). Reject up-front so a
|
|
1367
|
-
// misbehaving proposal cannot reach the safeSourceEdit primitive.
|
|
1368
|
-
if (pending.area === 'source') {
|
|
1369
|
-
return 'source area is deprecated — propose advisor-rule or prompt-override instead';
|
|
1370
|
-
}
|
|
1371
1344
|
// Goal area: parse JSON, inject required fields, ensure parent dir exists
|
|
1372
1345
|
if (pending.area === 'goal') {
|
|
1373
1346
|
try {
|
|
@@ -2124,10 +2097,6 @@ export class SelfImproveLoop {
|
|
|
2124
2097
|
case 'agent': {
|
|
2125
2098
|
return path.join(AGENTS_DIR, target, 'agent.md');
|
|
2126
2099
|
}
|
|
2127
|
-
case 'communication':
|
|
2128
|
-
return AGENTS_FILE;
|
|
2129
|
-
case 'memory':
|
|
2130
|
-
return path.join(VAULT_DIR, '00-System', 'MEMORY.md');
|
|
2131
2100
|
case 'goal': {
|
|
2132
2101
|
// target = "{owner}/{goalSlug}" e.g. "clementine/<goal-slug>" or "<agent-slug>/<goal-slug>"
|
|
2133
2102
|
const [owner, goalSlug] = target.split('/');
|
|
@@ -2184,7 +2153,7 @@ export function validateProposal(area, target, proposedChange) {
|
|
|
2184
2153
|
if (!proposedChange.trim()) {
|
|
2185
2154
|
return { valid: false, error: 'Proposed change is empty' };
|
|
2186
2155
|
}
|
|
2187
|
-
if (['soul', 'cron', 'workflow', 'agent'
|
|
2156
|
+
if (['soul', 'cron', 'workflow', 'agent'].includes(area)) {
|
|
2188
2157
|
try {
|
|
2189
2158
|
matter(proposedChange);
|
|
2190
2159
|
}
|
|
@@ -2220,11 +2189,6 @@ export function validateProposal(area, target, proposedChange) {
|
|
|
2220
2189
|
return { valid: false, error: `CRON.md validation failed: ${err}` };
|
|
2221
2190
|
}
|
|
2222
2191
|
}
|
|
2223
|
-
if (area === 'source') {
|
|
2224
|
-
// Deprecated — Phase 1 quarantined source self-edit. Reject up front so
|
|
2225
|
-
// a misbehaving LLM proposal doesn't even get cached.
|
|
2226
|
-
return { valid: false, error: 'source area is deprecated; propose advisor-rule or prompt-override instead' };
|
|
2227
|
-
}
|
|
2228
2192
|
if (area === 'skill') {
|
|
2229
2193
|
// 1.18.136 — skill body validation. The proposedChange is the FULL
|
|
2230
2194
|
// SKILL.md (frontmatter + body). Anthropic spec rules: name must
|
package/dist/types.d.ts
CHANGED
|
@@ -957,7 +957,7 @@ export interface SelfImproveExperiment {
|
|
|
957
957
|
startedAt: string;
|
|
958
958
|
finishedAt: string;
|
|
959
959
|
durationMs: number;
|
|
960
|
-
area: 'soul' | 'cron' | 'workflow' | '
|
|
960
|
+
area: 'soul' | 'cron' | 'workflow' | 'agent' | 'goal' | 'advisor-rule' | 'prompt-override' | 'skill';
|
|
961
961
|
target: string;
|
|
962
962
|
hypothesis: string;
|
|
963
963
|
proposedChange: string;
|
|
@@ -1014,13 +1014,11 @@ export interface SelfImproveConfig {
|
|
|
1014
1014
|
*/
|
|
1015
1015
|
surfaceThreshold?: number;
|
|
1016
1016
|
plateauLimit: number;
|
|
1017
|
-
areas: ('soul' | 'cron' | 'workflow' | '
|
|
1017
|
+
areas: ('soul' | 'cron' | 'workflow' | 'agent' | 'goal' | 'advisor-rule' | 'prompt-override' | 'skill')[];
|
|
1018
1018
|
/** Enable tiered auto-apply: low-risk changes apply without approval. Default: false. */
|
|
1019
1019
|
autoApply?: boolean;
|
|
1020
1020
|
/** Target a specific agent slug (for per-agent improvement cycles). */
|
|
1021
1021
|
agentSlug?: string;
|
|
1022
|
-
/** How to handle source code proposals. 'skip' = drop silently, 'propose-only' = save for human review. Default: 'propose-only'. */
|
|
1023
|
-
sourceMode?: 'skip' | 'propose-only';
|
|
1024
1022
|
}
|
|
1025
1023
|
export interface RestartSentinel {
|
|
1026
1024
|
previousPid: number;
|