clementine-agent 1.18.160 → 1.18.161
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/approval-signals.d.ts +58 -0
- package/dist/agent/approval-signals.js +105 -0
- package/dist/agent/self-improve.d.ts +1 -1
- package/dist/agent/self-improve.js +37 -1
- package/dist/cli/dashboard.js +19 -2
- package/dist/gateway/router.d.ts +1 -0
- package/dist/gateway/router.js +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owner-approval feedback loop for self-improve proposals (1.18.161).
|
|
3
|
+
*
|
|
4
|
+
* Background: the self-improve hypothesizer generates 1-3 proposals each
|
|
5
|
+
* cycle. The owner approves or denies each one in the dashboard. Today
|
|
6
|
+
* that decision is recorded only as a status change on the experiment row
|
|
7
|
+
* — the *implicit signal* ("this kind of fix is good / bad") is lost.
|
|
8
|
+
*
|
|
9
|
+
* This module captures the signal as an append-only JSONL log
|
|
10
|
+
* (`~/.clementine/self-improve/approval-signals.jsonl`) and exposes
|
|
11
|
+
* `formatForHypothesizer()` so the next cycle's prompt includes:
|
|
12
|
+
*
|
|
13
|
+
* ## Owner approval signals (recent)
|
|
14
|
+
* APPROVED (do more like this):
|
|
15
|
+
* - cron/insight-check: "Apply lean mode to reduce prompt size"
|
|
16
|
+
* - agent/sasha-the-cmo: "Add explicit citation requirement to system prompt"
|
|
17
|
+
*
|
|
18
|
+
* DENIED (avoid these patterns):
|
|
19
|
+
* - workflow/email-gen: "Replace template with LLM generation" ← user note: "too generic; loses voice"
|
|
20
|
+
*
|
|
21
|
+
* The hypothesizer reads this and biases future proposals — favoring
|
|
22
|
+
* patterns the owner has approved, avoiding patterns they've denied.
|
|
23
|
+
*
|
|
24
|
+
* Closed-loop autonomy: the system learns from human feedback without
|
|
25
|
+
* needing the human to write rules. Just react to proposals as usual.
|
|
26
|
+
*/
|
|
27
|
+
export interface ApprovalSignal {
|
|
28
|
+
/** ISO timestamp of the decision. */
|
|
29
|
+
ts: string;
|
|
30
|
+
/** Self-improve experiment ID this decision applies to. */
|
|
31
|
+
experimentId: string;
|
|
32
|
+
/** The area the proposal targeted (cron, agent, skill, soul, etc.). */
|
|
33
|
+
area: string;
|
|
34
|
+
/** The specific target (e.g., "insight-check", "sasha-the-cmo"). */
|
|
35
|
+
target: string;
|
|
36
|
+
/** The proposal's one-sentence hypothesis (truncated to 200 chars). */
|
|
37
|
+
hypothesis: string;
|
|
38
|
+
/** Owner's decision. */
|
|
39
|
+
decision: 'approved' | 'denied';
|
|
40
|
+
/** Optional free-text note from the owner explaining the decision. */
|
|
41
|
+
noteFromOwner?: string;
|
|
42
|
+
}
|
|
43
|
+
/** Append a new signal to the log. Best-effort — never throws to the caller. */
|
|
44
|
+
export declare function recordApprovalSignal(signal: Omit<ApprovalSignal, 'ts'>): void;
|
|
45
|
+
/**
|
|
46
|
+
* Read the most recent N signals from the log. Returns newest-first.
|
|
47
|
+
* Defaults to 50 — enough for the hypothesizer to see patterns, not so
|
|
48
|
+
* many that we bloat its prompt.
|
|
49
|
+
*/
|
|
50
|
+
export declare function getRecentApprovalSignals(limit?: number): ApprovalSignal[];
|
|
51
|
+
/**
|
|
52
|
+
* Render a recent-signals prompt block for the hypothesizer. Returns the
|
|
53
|
+
* empty string when there are no signals (so the prompt stays clean for
|
|
54
|
+
* fresh installs). Caps at the most recent 8 of each kind to keep the
|
|
55
|
+
* block compact.
|
|
56
|
+
*/
|
|
57
|
+
export declare function formatApprovalSignalsForHypothesizer(): string;
|
|
58
|
+
//# sourceMappingURL=approval-signals.d.ts.map
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owner-approval feedback loop for self-improve proposals (1.18.161).
|
|
3
|
+
*
|
|
4
|
+
* Background: the self-improve hypothesizer generates 1-3 proposals each
|
|
5
|
+
* cycle. The owner approves or denies each one in the dashboard. Today
|
|
6
|
+
* that decision is recorded only as a status change on the experiment row
|
|
7
|
+
* — the *implicit signal* ("this kind of fix is good / bad") is lost.
|
|
8
|
+
*
|
|
9
|
+
* This module captures the signal as an append-only JSONL log
|
|
10
|
+
* (`~/.clementine/self-improve/approval-signals.jsonl`) and exposes
|
|
11
|
+
* `formatForHypothesizer()` so the next cycle's prompt includes:
|
|
12
|
+
*
|
|
13
|
+
* ## Owner approval signals (recent)
|
|
14
|
+
* APPROVED (do more like this):
|
|
15
|
+
* - cron/insight-check: "Apply lean mode to reduce prompt size"
|
|
16
|
+
* - agent/sasha-the-cmo: "Add explicit citation requirement to system prompt"
|
|
17
|
+
*
|
|
18
|
+
* DENIED (avoid these patterns):
|
|
19
|
+
* - workflow/email-gen: "Replace template with LLM generation" ← user note: "too generic; loses voice"
|
|
20
|
+
*
|
|
21
|
+
* The hypothesizer reads this and biases future proposals — favoring
|
|
22
|
+
* patterns the owner has approved, avoiding patterns they've denied.
|
|
23
|
+
*
|
|
24
|
+
* Closed-loop autonomy: the system learns from human feedback without
|
|
25
|
+
* needing the human to write rules. Just react to proposals as usual.
|
|
26
|
+
*/
|
|
27
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs';
|
|
28
|
+
import path from 'node:path';
|
|
29
|
+
import { BASE_DIR } from '../config.js';
|
|
30
|
+
/** Where the append-only signals log lives. */
|
|
31
|
+
function signalsLogPath() {
|
|
32
|
+
return path.join(BASE_DIR, 'self-improve', 'approval-signals.jsonl');
|
|
33
|
+
}
|
|
34
|
+
/** Append a new signal to the log. Best-effort — never throws to the caller. */
|
|
35
|
+
export function recordApprovalSignal(signal) {
|
|
36
|
+
try {
|
|
37
|
+
const file = signalsLogPath();
|
|
38
|
+
mkdirSync(path.dirname(file), { recursive: true });
|
|
39
|
+
const entry = {
|
|
40
|
+
ts: new Date().toISOString(),
|
|
41
|
+
...signal,
|
|
42
|
+
// Truncate hypothesis to keep the log compact + searchable.
|
|
43
|
+
hypothesis: (signal.hypothesis || '').slice(0, 200),
|
|
44
|
+
};
|
|
45
|
+
appendFileSync(file, JSON.stringify(entry) + '\n');
|
|
46
|
+
}
|
|
47
|
+
catch { /* never block the apply/deny path on telemetry */ }
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Read the most recent N signals from the log. Returns newest-first.
|
|
51
|
+
* Defaults to 50 — enough for the hypothesizer to see patterns, not so
|
|
52
|
+
* many that we bloat its prompt.
|
|
53
|
+
*/
|
|
54
|
+
export function getRecentApprovalSignals(limit = 50) {
|
|
55
|
+
const file = signalsLogPath();
|
|
56
|
+
if (!existsSync(file))
|
|
57
|
+
return [];
|
|
58
|
+
try {
|
|
59
|
+
const lines = readFileSync(file, 'utf-8').trim().split('\n').filter(Boolean);
|
|
60
|
+
const recent = [];
|
|
61
|
+
for (let i = lines.length - 1; i >= 0 && recent.length < limit; i--) {
|
|
62
|
+
try {
|
|
63
|
+
recent.push(JSON.parse(lines[i]));
|
|
64
|
+
}
|
|
65
|
+
catch { /* skip malformed lines */ }
|
|
66
|
+
}
|
|
67
|
+
return recent;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Render a recent-signals prompt block for the hypothesizer. Returns the
|
|
75
|
+
* empty string when there are no signals (so the prompt stays clean for
|
|
76
|
+
* fresh installs). Caps at the most recent 8 of each kind to keep the
|
|
77
|
+
* block compact.
|
|
78
|
+
*/
|
|
79
|
+
export function formatApprovalSignalsForHypothesizer() {
|
|
80
|
+
const signals = getRecentApprovalSignals(40);
|
|
81
|
+
if (signals.length === 0)
|
|
82
|
+
return '';
|
|
83
|
+
const approved = signals.filter(s => s.decision === 'approved').slice(0, 8);
|
|
84
|
+
const denied = signals.filter(s => s.decision === 'denied').slice(0, 8);
|
|
85
|
+
if (approved.length === 0 && denied.length === 0)
|
|
86
|
+
return '';
|
|
87
|
+
const fmt = (s) => {
|
|
88
|
+
const note = s.noteFromOwner ? ` ← owner note: "${s.noteFromOwner.slice(0, 120)}"` : '';
|
|
89
|
+
return `- ${s.area}/${s.target}: "${s.hypothesis}"${note}`;
|
|
90
|
+
};
|
|
91
|
+
const parts = ['### Owner approval signals (recent)'];
|
|
92
|
+
if (approved.length > 0) {
|
|
93
|
+
parts.push('APPROVED (do more like these):');
|
|
94
|
+
parts.push(approved.map(fmt).join('\n'));
|
|
95
|
+
}
|
|
96
|
+
if (denied.length > 0) {
|
|
97
|
+
parts.push('DENIED (avoid these patterns):');
|
|
98
|
+
parts.push(denied.map(fmt).join('\n'));
|
|
99
|
+
}
|
|
100
|
+
parts.push('Bias today\'s proposals toward the approved patterns and away from the denied ones. ' +
|
|
101
|
+
'If a denied pattern reflects a misunderstanding (e.g. you proposed the wrong target), ' +
|
|
102
|
+
'reframe — don\'t just avoid the area entirely.');
|
|
103
|
+
return parts.join('\n') + '\n\n';
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=approval-signals.js.map
|
|
@@ -58,7 +58,7 @@ export declare class SelfImproveLoop {
|
|
|
58
58
|
private savePendingChange;
|
|
59
59
|
applyApprovedChange(experimentId: string): Promise<string>;
|
|
60
60
|
/** Deny a pending change without applying it. */
|
|
61
|
-
denyChange(experimentId: string): string;
|
|
61
|
+
denyChange(experimentId: string, noteFromOwner?: string): string;
|
|
62
62
|
private runMemoryCleanup;
|
|
63
63
|
private synthesizeFeedbackPatterns;
|
|
64
64
|
/** Update the structured user model from interaction data. */
|
|
@@ -18,6 +18,7 @@ import { BASE_DIR, SELF_IMPROVE_DIR, SOUL_FILE, CRON_FILE, WORKFLOWS_DIR, VAULT_
|
|
|
18
18
|
import { listAllGoals } from '../tools/shared.js';
|
|
19
19
|
import { MemoryStore } from '../memory/store.js';
|
|
20
20
|
import { ANTHROPIC_SKILL_NAME_PATTERN } from './skill-store.js';
|
|
21
|
+
import { recordApprovalSignal, formatApprovalSignalsForHypothesizer } from './approval-signals.js';
|
|
21
22
|
const logger = pino({ name: 'clementine.self-improve' });
|
|
22
23
|
// ── Defaults ─────────────────────────────────────────────────────────
|
|
23
24
|
const DEFAULT_CONFIG = {
|
|
@@ -1097,6 +1098,10 @@ export class SelfImproveLoop {
|
|
|
1097
1098
|
}
|
|
1098
1099
|
}
|
|
1099
1100
|
catch { /* non-fatal */ }
|
|
1101
|
+
// Owner-approval feedback (1.18.161) — bias hypotheses toward patterns the
|
|
1102
|
+
// owner has approved, away from those they've denied. Empty string for
|
|
1103
|
+
// fresh installs, which keeps the prompt clean.
|
|
1104
|
+
const approvalSignalsText = formatApprovalSignalsForHypothesizer();
|
|
1100
1105
|
// ── Step 1: Analysis — identify top opportunities from metrics (no config dumps) ──
|
|
1101
1106
|
const analysisPrompt = `You are Clementine's self-improvement strategist. Analyze the performance data below and identify the top 3 improvement opportunities.\n\n` +
|
|
1102
1107
|
`## Recent Performance Data (last 7 days)\n` +
|
|
@@ -1114,6 +1119,7 @@ export class SelfImproveLoop {
|
|
|
1114
1119
|
diversityConstraint +
|
|
1115
1120
|
agentFocusText +
|
|
1116
1121
|
soulCandidatesText +
|
|
1122
|
+
(approvalSignalsText ? `\n${approvalSignalsText}` : '') +
|
|
1117
1123
|
`\n## Instructions\n` +
|
|
1118
1124
|
`Propose **1-3 concrete, high-impact improvements** the owner should review today — no fewer (aim for at least one actionable suggestion when data warrants it), no more (the owner reads each proposal manually and you'll overwhelm them). Rank by expected impact; drop anything below "solid idea".\n\n` +
|
|
1119
1125
|
`For each opportunity, specify:\n` +
|
|
@@ -1486,14 +1492,33 @@ export class SelfImproveLoop {
|
|
|
1486
1492
|
catch (err) {
|
|
1487
1493
|
logger.warn({ err }, 'Failed to schedule impact check');
|
|
1488
1494
|
}
|
|
1495
|
+
// 1.18.161 — record the implicit owner-approval signal so future
|
|
1496
|
+
// hypothesizer cycles can see "the owner approved fixes like this"
|
|
1497
|
+
// and bias proposals accordingly. Best-effort, never blocks apply.
|
|
1498
|
+
recordApprovalSignal({
|
|
1499
|
+
experimentId,
|
|
1500
|
+
area: pending.area,
|
|
1501
|
+
target: pending.target,
|
|
1502
|
+
hypothesis: pending.hypothesis,
|
|
1503
|
+
decision: 'approved',
|
|
1504
|
+
});
|
|
1489
1505
|
return `Applied change to ${pending.area}/${pending.target}`;
|
|
1490
1506
|
}
|
|
1491
1507
|
/** Deny a pending change without applying it. */
|
|
1492
|
-
denyChange(experimentId) {
|
|
1508
|
+
denyChange(experimentId, noteFromOwner) {
|
|
1493
1509
|
const pendingFile = path.join(PENDING_DIR, `${experimentId}.json`);
|
|
1494
1510
|
if (!existsSync(pendingFile)) {
|
|
1495
1511
|
return `Pending change not found: ${experimentId}`;
|
|
1496
1512
|
}
|
|
1513
|
+
// 1.18.161 — capture the area/target/hypothesis BEFORE we delete the
|
|
1514
|
+
// pending file so the approval-signal log gets a meaningful entry
|
|
1515
|
+
// (not just an experiment ID with no context).
|
|
1516
|
+
let signalContext = null;
|
|
1517
|
+
try {
|
|
1518
|
+
const pending = JSON.parse(readFileSync(pendingFile, 'utf-8'));
|
|
1519
|
+
signalContext = { area: pending.area, target: pending.target, hypothesis: pending.hypothesis };
|
|
1520
|
+
}
|
|
1521
|
+
catch { /* file may be malformed; record a minimal signal below */ }
|
|
1497
1522
|
this.updateExperimentStatus(experimentId, 'denied');
|
|
1498
1523
|
try {
|
|
1499
1524
|
unlinkSync(pendingFile);
|
|
@@ -1502,6 +1527,17 @@ export class SelfImproveLoop {
|
|
|
1502
1527
|
const state = this.loadState();
|
|
1503
1528
|
state.pendingApprovals = Math.max(0, state.pendingApprovals - 1);
|
|
1504
1529
|
this.saveState(state);
|
|
1530
|
+
// 1.18.161 — record the denial signal. Owner can pass an optional note
|
|
1531
|
+
// (via the dashboard Reason field, or via Discord) explaining why so
|
|
1532
|
+
// the hypothesizer learns more than just "no."
|
|
1533
|
+
recordApprovalSignal({
|
|
1534
|
+
experimentId,
|
|
1535
|
+
area: signalContext?.area ?? 'unknown',
|
|
1536
|
+
target: signalContext?.target ?? 'unknown',
|
|
1537
|
+
hypothesis: signalContext?.hypothesis ?? '(pending file unreadable at deny time)',
|
|
1538
|
+
decision: 'denied',
|
|
1539
|
+
...(noteFromOwner ? { noteFromOwner } : {}),
|
|
1540
|
+
});
|
|
1505
1541
|
return `Denied change: ${experimentId}`;
|
|
1506
1542
|
}
|
|
1507
1543
|
// ── Memory cleanup ───────────────────────────────────────────────
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -11502,7 +11502,14 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
11502
11502
|
app.post('/api/self-improve/deny/:id', async (req, res) => {
|
|
11503
11503
|
try {
|
|
11504
11504
|
const gw = await getGateway();
|
|
11505
|
-
|
|
11505
|
+
// 1.18.161 — accept an optional `noteFromOwner` in the body so the
|
|
11506
|
+
// approval-signal log captures the *reason* for denial (the
|
|
11507
|
+
// hypothesizer learns more from "too generic — loses voice" than
|
|
11508
|
+
// from a bare "no").
|
|
11509
|
+
const noteFromOwner = typeof req.body?.noteFromOwner === 'string'
|
|
11510
|
+
? req.body.noteFromOwner.slice(0, 500)
|
|
11511
|
+
: undefined;
|
|
11512
|
+
const result = await gw.handleSelfImprove('deny', { experimentId: req.params.id, noteFromOwner });
|
|
11506
11513
|
res.json({ ok: true, message: result });
|
|
11507
11514
|
}
|
|
11508
11515
|
catch (err) {
|
|
@@ -40687,7 +40694,17 @@ async function siApply(id) {
|
|
|
40687
40694
|
|
|
40688
40695
|
async function siDeny(id) {
|
|
40689
40696
|
try {
|
|
40690
|
-
|
|
40697
|
+
// 1.18.161 — invite an optional one-line reason. Cancel = bare deny;
|
|
40698
|
+
// empty string = bare deny; non-empty = sent to the hypothesizer's
|
|
40699
|
+
// approval-signal log so future cycles avoid the rejected pattern.
|
|
40700
|
+
const note = window.prompt('Optional reason for denying (helps the hypothesizer learn — leave blank to skip):', '');
|
|
40701
|
+
if (note === null) return;
|
|
40702
|
+
const body = note.trim() ? JSON.stringify({ noteFromOwner: note.trim() }) : undefined;
|
|
40703
|
+
const r = await apiFetch('/api/self-improve/deny/' + id, {
|
|
40704
|
+
method: 'POST',
|
|
40705
|
+
headers: body ? { 'Content-Type': 'application/json' } : undefined,
|
|
40706
|
+
body,
|
|
40707
|
+
});
|
|
40691
40708
|
const d = await r.json();
|
|
40692
40709
|
if (d.ok) toast(d.message, 'success');
|
|
40693
40710
|
else toast(d.message || 'Failed', 'error');
|
package/dist/gateway/router.d.ts
CHANGED
|
@@ -308,6 +308,7 @@ export declare class Gateway {
|
|
|
308
308
|
getAllProvenance(): Map<string, SessionProvenance>;
|
|
309
309
|
handleSelfImprove(action: string, args?: {
|
|
310
310
|
experimentId?: string;
|
|
311
|
+
noteFromOwner?: string;
|
|
311
312
|
config?: Partial<SelfImproveConfig>;
|
|
312
313
|
}, onProposal?: (experiment: SelfImproveExperiment) => Promise<void>): Promise<string>;
|
|
313
314
|
/** Extract a procedural skill from a successful cron execution (fire-and-forget). */
|
package/dist/gateway/router.js
CHANGED
|
@@ -2437,7 +2437,7 @@ export class Gateway {
|
|
|
2437
2437
|
case 'deny': {
|
|
2438
2438
|
if (!args?.experimentId)
|
|
2439
2439
|
return 'Missing experiment ID.';
|
|
2440
|
-
return loop.denyChange(args.experimentId);
|
|
2440
|
+
return loop.denyChange(args.experimentId, args.noteFromOwner);
|
|
2441
2441
|
}
|
|
2442
2442
|
case 'run-agent': {
|
|
2443
2443
|
const slug = args?.experimentId; // Reuse experimentId field for agent slug
|