chainlesschain 0.45.75 → 0.45.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -15
- package/package.json +1 -1
- package/src/commands/learning.js +273 -0
- package/src/commands/lowcode.js +23 -8
- package/src/gateways/discord/discord-formatter.js +89 -0
- package/src/gateways/gateway-base.js +189 -0
- package/src/gateways/telegram/telegram-formatter.js +93 -0
- package/src/index.js +2 -0
- package/src/lib/app-builder.js +136 -8
- package/src/lib/autonomous-agent.js +8 -1
- package/src/lib/cli-context-engineering.js +15 -0
- package/src/lib/execution-backend.js +239 -0
- package/src/lib/hook-manager.js +2 -0
- package/src/lib/iteration-budget.js +175 -0
- package/src/lib/learning/learning-hooks.js +117 -0
- package/src/lib/learning/learning-tables.js +66 -0
- package/src/lib/learning/outcome-feedback.js +243 -0
- package/src/lib/learning/reflection-engine.js +323 -0
- package/src/lib/learning/skill-improver.js +536 -0
- package/src/lib/learning/skill-synthesizer.js +315 -0
- package/src/lib/learning/trajectory-store.js +409 -0
- package/src/lib/plugin-autodiscovery.js +224 -0
- package/src/lib/session-search.js +193 -0
- package/src/lib/sub-agent-context.js +7 -2
- package/src/lib/user-profile.js +172 -0
- package/src/lib/web-ui-server.js +1 -1
- package/src/repl/agent-repl.js +109 -0
- package/src/runtime/agent-core.js +75 -4
- package/src/runtime/coding-agent-contract-shared.cjs +35 -0
- package/src/runtime/coding-agent-policy.cjs +10 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OutcomeFeedback — Connects tool execution results to quality scores,
|
|
3
|
+
* producing reward signals for the learning loop.
|
|
4
|
+
*
|
|
5
|
+
* Three signal sources:
|
|
6
|
+
* 1. Auto-score — heuristic based on error rate, retries, final status
|
|
7
|
+
* 2. User feedback — explicit 👍/👎 or /feedback command
|
|
8
|
+
* 3. Correction detection — user redoes/corrects within a time window
|
|
9
|
+
*
|
|
10
|
+
* Propagation:
|
|
11
|
+
* - TrajectoryStore (backfill outcome_score)
|
|
12
|
+
* - InstinctManager (tool preference / workflow patterns)
|
|
13
|
+
* - EvolutionSystem (capability assessment)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { INSTINCT_CATEGORIES, recordInstinct } from "../instinct-manager.js";
|
|
17
|
+
import { assessCapability } from "../evolution-system.js";
|
|
18
|
+
|
|
19
|
+
// ── Auto-scoring ────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Detect retries: same tool called consecutively 2+ times.
|
|
23
|
+
* @param {Array<{tool:string}>} chain
|
|
24
|
+
* @returns {boolean}
|
|
25
|
+
*/
|
|
26
|
+
export function hasRetries(chain) {
|
|
27
|
+
for (let i = 1; i < chain.length; i++) {
|
|
28
|
+
if (chain[i].tool === chain[i - 1].tool) return true;
|
|
29
|
+
}
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Auto-score a trajectory based on execution quality heuristics.
|
|
35
|
+
* @param {{toolChain:Array<{status:string, tool:string}>}} trajectory
|
|
36
|
+
* @returns {number} score 0-1
|
|
37
|
+
*/
|
|
38
|
+
export function autoScore(trajectory) {
|
|
39
|
+
const chain = trajectory.toolChain || [];
|
|
40
|
+
if (chain.length === 0) return 0.5;
|
|
41
|
+
|
|
42
|
+
let score = 0.5;
|
|
43
|
+
|
|
44
|
+
const errorCount = chain.filter(
|
|
45
|
+
(t) => t.status === "error" || t.status === "failed",
|
|
46
|
+
).length;
|
|
47
|
+
const totalCount = chain.length;
|
|
48
|
+
|
|
49
|
+
// No errors → +0.2
|
|
50
|
+
if (errorCount === 0) score += 0.2;
|
|
51
|
+
|
|
52
|
+
// Error rate > 50% → -0.3
|
|
53
|
+
if (totalCount > 0 && errorCount / totalCount > 0.5) score -= 0.3;
|
|
54
|
+
|
|
55
|
+
// Has retries → -0.1
|
|
56
|
+
if (hasRetries(chain)) score -= 0.1;
|
|
57
|
+
|
|
58
|
+
// Final tool succeeded → +0.1
|
|
59
|
+
const lastTool = chain[chain.length - 1];
|
|
60
|
+
if (lastTool && lastTool.status === "completed") score += 0.1;
|
|
61
|
+
|
|
62
|
+
return Math.max(0, Math.min(1, score));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ── Correction detection ────────────────────────────────
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Negation patterns for correction detection (Chinese + English).
|
|
69
|
+
*/
|
|
70
|
+
const NEGATION_PATTERNS = [
|
|
71
|
+
/不[是对]/,
|
|
72
|
+
/错了/,
|
|
73
|
+
/重[新做来]/,
|
|
74
|
+
/别这样/,
|
|
75
|
+
/不要这样/,
|
|
76
|
+
/not\s+right/i,
|
|
77
|
+
/wrong/i,
|
|
78
|
+
/redo/i,
|
|
79
|
+
/don't/i,
|
|
80
|
+
/undo/i,
|
|
81
|
+
/try\s+again/i,
|
|
82
|
+
/that's\s+not/i,
|
|
83
|
+
/incorrect/i,
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Detect if a user message is correcting the previous agent action.
|
|
88
|
+
* @param {string} currentMessage — the user's latest input
|
|
89
|
+
* @param {{toolChain:Array<{tool:string}>}} [previousTrajectory] — previous turn's trajectory
|
|
90
|
+
* @returns {{isCorrection:boolean, detail:string}}
|
|
91
|
+
*/
|
|
92
|
+
export function detectCorrection(currentMessage, previousTrajectory) {
|
|
93
|
+
if (!currentMessage || !previousTrajectory) {
|
|
94
|
+
return { isCorrection: false, detail: "" };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const msg = currentMessage.trim();
|
|
98
|
+
|
|
99
|
+
// Strategy 1: negation pattern match
|
|
100
|
+
for (const pattern of NEGATION_PATTERNS) {
|
|
101
|
+
if (pattern.test(msg)) {
|
|
102
|
+
return {
|
|
103
|
+
isCorrection: true,
|
|
104
|
+
detail: `Negation detected: "${msg.slice(0, 80)}"`,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Strategy 2: user references same tool/file from previous trajectory
|
|
110
|
+
const prevTools = (previousTrajectory.toolChain || []).map((t) => t.tool);
|
|
111
|
+
const prevArgs = (previousTrajectory.toolChain || [])
|
|
112
|
+
.map((t) => {
|
|
113
|
+
if (!t.args) return [];
|
|
114
|
+
return Object.values(t.args).filter((v) => typeof v === "string");
|
|
115
|
+
})
|
|
116
|
+
.flat();
|
|
117
|
+
|
|
118
|
+
// Check if user mentions a file path from previous tool args
|
|
119
|
+
for (const arg of prevArgs) {
|
|
120
|
+
if (arg.length > 3 && msg.includes(arg)) {
|
|
121
|
+
// User references same file → could be correction
|
|
122
|
+
// Only flag if combined with imperative language
|
|
123
|
+
if (/instead|rather|actually|but|however/i.test(msg)) {
|
|
124
|
+
return {
|
|
125
|
+
isCorrection: true,
|
|
126
|
+
detail: `References previous arg "${arg}" with correction language`,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return { isCorrection: false, detail: "" };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ── OutcomeFeedback class ───────────────────────────────
|
|
136
|
+
|
|
137
|
+
export class OutcomeFeedback {
|
|
138
|
+
/**
|
|
139
|
+
* @param {import("better-sqlite3").Database} db
|
|
140
|
+
* @param {import("./trajectory-store.js").TrajectoryStore} trajectoryStore
|
|
141
|
+
*/
|
|
142
|
+
constructor(db, trajectoryStore) {
|
|
143
|
+
this.db = db;
|
|
144
|
+
this.trajectoryStore = trajectoryStore;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Auto-score a trajectory and backfill the score.
|
|
149
|
+
* @param {string} trajectoryId
|
|
150
|
+
* @returns {number} the computed score
|
|
151
|
+
*/
|
|
152
|
+
scoreTrajectory(trajectoryId) {
|
|
153
|
+
const traj = this.trajectoryStore.getTrajectory(trajectoryId);
|
|
154
|
+
if (!traj) return 0.5;
|
|
155
|
+
|
|
156
|
+
const score = autoScore(traj);
|
|
157
|
+
this.trajectoryStore.setOutcomeScore(trajectoryId, score, "auto");
|
|
158
|
+
return score;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Record explicit user feedback, overriding auto-score.
|
|
163
|
+
* @param {string} trajectoryId
|
|
164
|
+
* @param {{rating:"positive"|"negative"|number, comment?:string}} feedback
|
|
165
|
+
*/
|
|
166
|
+
recordUserFeedback(trajectoryId, feedback) {
|
|
167
|
+
let score;
|
|
168
|
+
if (typeof feedback.rating === "number") {
|
|
169
|
+
score = Math.max(0, Math.min(1, feedback.rating));
|
|
170
|
+
} else {
|
|
171
|
+
score = feedback.rating === "positive" ? 1.0 : 0.0;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
this.trajectoryStore.setOutcomeScore(trajectoryId, score, "user");
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Check if a new user message is a correction of the previous turn,
|
|
179
|
+
* and if so, downgrade the previous trajectory's score.
|
|
180
|
+
* @param {string} currentMessage
|
|
181
|
+
* @param {string} previousTrajectoryId
|
|
182
|
+
* @returns {{isCorrection:boolean, detail:string}}
|
|
183
|
+
*/
|
|
184
|
+
checkCorrection(currentMessage, previousTrajectoryId) {
|
|
185
|
+
const prevTraj = this.trajectoryStore.getTrajectory(previousTrajectoryId);
|
|
186
|
+
if (!prevTraj) return { isCorrection: false, detail: "" };
|
|
187
|
+
|
|
188
|
+
const result = detectCorrection(currentMessage, prevTraj);
|
|
189
|
+
if (result.isCorrection) {
|
|
190
|
+
// Downgrade the previous trajectory's score
|
|
191
|
+
const currentScore = prevTraj.outcomeScore ?? 0.5;
|
|
192
|
+
const newScore = Math.max(0, currentScore - 0.3);
|
|
193
|
+
this.trajectoryStore.setOutcomeScore(
|
|
194
|
+
previousTrajectoryId,
|
|
195
|
+
newScore,
|
|
196
|
+
"user",
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
return result;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Propagate a trajectory's feedback signal to Instinct and Evolution.
|
|
204
|
+
* Call this after scoring is finalized.
|
|
205
|
+
* @param {string} trajectoryId
|
|
206
|
+
*/
|
|
207
|
+
propagateFeedback(trajectoryId) {
|
|
208
|
+
const traj = this.trajectoryStore.getTrajectory(trajectoryId);
|
|
209
|
+
if (!traj || traj.outcomeScore == null) return;
|
|
210
|
+
|
|
211
|
+
const score = traj.outcomeScore;
|
|
212
|
+
const tools = (traj.toolChain || []).map((t) => t.tool);
|
|
213
|
+
const uniqueTools = [...new Set(tools)];
|
|
214
|
+
|
|
215
|
+
// Propagate to Instinct
|
|
216
|
+
try {
|
|
217
|
+
if (score >= 0.8 && uniqueTools.length > 0) {
|
|
218
|
+
// High score → record tool preference
|
|
219
|
+
const toolPattern = uniqueTools.join(" → ");
|
|
220
|
+
recordInstinct(
|
|
221
|
+
this.db,
|
|
222
|
+
INSTINCT_CATEGORIES.TOOL_PREFERENCE,
|
|
223
|
+
toolPattern,
|
|
224
|
+
);
|
|
225
|
+
} else if (score <= 0.3 && uniqueTools.length > 0) {
|
|
226
|
+
// Low score → record as workflow to avoid
|
|
227
|
+
const toolPattern = `avoid: ${uniqueTools.join(" → ")}`;
|
|
228
|
+
recordInstinct(this.db, INSTINCT_CATEGORIES.WORKFLOW, toolPattern);
|
|
229
|
+
}
|
|
230
|
+
} catch (_err) {
|
|
231
|
+
// Instinct propagation failure is non-critical
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Propagate to Evolution
|
|
235
|
+
try {
|
|
236
|
+
for (const tool of uniqueTools) {
|
|
237
|
+
assessCapability(this.db, tool, score, "tool");
|
|
238
|
+
}
|
|
239
|
+
} catch (_err) {
|
|
240
|
+
// Evolution propagation failure is non-critical
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ReflectionEngine — Periodic self-review of accumulated trajectory data.
|
|
3
|
+
*
|
|
4
|
+
* Generates structured reflection reports:
|
|
5
|
+
* - Tool usage patterns (most used, error-prone)
|
|
6
|
+
* - Score trends (improving / declining)
|
|
7
|
+
* - Skill coverage gaps
|
|
8
|
+
* - Improvement recommendations
|
|
9
|
+
*
|
|
10
|
+
* Trigger: manual via CLI command or scheduled (cron-style)
|
|
11
|
+
* Output: JSON report stored in DB + optional SKILL.md improvements
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { extractToolNames } from "./skill-synthesizer.js";
|
|
15
|
+
|
|
16
|
+
// ── _deps for test injection ────────────────────────
|
|
17
|
+
const _deps = {
|
|
18
|
+
now: () => Date.now(),
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// ── Helpers ─────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Compute tool usage statistics from trajectories.
|
|
25
|
+
* @param {Array<{toolChain:Array<{tool:string, status:string}>}>} trajectories
|
|
26
|
+
* @returns {{toolUsage:Record<string, {count:number, errorRate:number}>, totalTools:number}}
|
|
27
|
+
*/
|
|
28
|
+
export function computeToolStats(trajectories) {
|
|
29
|
+
const stats = {};
|
|
30
|
+
let totalTools = 0;
|
|
31
|
+
|
|
32
|
+
for (const traj of trajectories) {
|
|
33
|
+
for (const step of traj.toolChain || []) {
|
|
34
|
+
totalTools++;
|
|
35
|
+
if (!stats[step.tool]) {
|
|
36
|
+
stats[step.tool] = { count: 0, errors: 0 };
|
|
37
|
+
}
|
|
38
|
+
stats[step.tool].count++;
|
|
39
|
+
if (step.status === "error" || step.status === "failed") {
|
|
40
|
+
stats[step.tool].errors++;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const toolUsage = {};
|
|
46
|
+
for (const [tool, s] of Object.entries(stats)) {
|
|
47
|
+
toolUsage[tool] = {
|
|
48
|
+
count: s.count,
|
|
49
|
+
errorRate: s.count > 0 ? s.errors / s.count : 0,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return { toolUsage, totalTools };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Compute score trend from trajectories (sorted by time).
|
|
58
|
+
* Returns "improving", "declining", or "stable".
|
|
59
|
+
* @param {Array<{outcomeScore:number|null}>} trajectories — ordered oldest→newest
|
|
60
|
+
* @returns {{trend:"improving"|"declining"|"stable", avgScore:number, recentAvg:number}}
|
|
61
|
+
*/
|
|
62
|
+
export function computeScoreTrend(trajectories) {
|
|
63
|
+
const scored = trajectories.filter((t) => t.outcomeScore != null);
|
|
64
|
+
if (scored.length < 2) {
|
|
65
|
+
const avg = scored.length === 1 ? scored[0].outcomeScore : 0;
|
|
66
|
+
return { trend: "stable", avgScore: avg, recentAvg: avg };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const allAvg =
|
|
70
|
+
scored.reduce((sum, t) => sum + t.outcomeScore, 0) / scored.length;
|
|
71
|
+
|
|
72
|
+
// Split into halves
|
|
73
|
+
const mid = Math.floor(scored.length / 2);
|
|
74
|
+
const firstHalf = scored.slice(0, mid);
|
|
75
|
+
const secondHalf = scored.slice(mid);
|
|
76
|
+
|
|
77
|
+
const firstAvg =
|
|
78
|
+
firstHalf.reduce((sum, t) => sum + t.outcomeScore, 0) / firstHalf.length;
|
|
79
|
+
const secondAvg =
|
|
80
|
+
secondHalf.reduce((sum, t) => sum + t.outcomeScore, 0) / secondHalf.length;
|
|
81
|
+
|
|
82
|
+
const delta = secondAvg - firstAvg;
|
|
83
|
+
let trend = "stable";
|
|
84
|
+
if (delta > 0.05) trend = "improving";
|
|
85
|
+
else if (delta < -0.05) trend = "declining";
|
|
86
|
+
|
|
87
|
+
return { trend, avgScore: allAvg, recentAvg: secondAvg };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Identify error-prone tool patterns (tools with error rate > threshold).
|
|
92
|
+
* @param {Record<string, {count:number, errorRate:number}>} toolUsage
|
|
93
|
+
* @param {number} [threshold=0.3]
|
|
94
|
+
* @returns {Array<{tool:string, errorRate:number, count:number}>}
|
|
95
|
+
*/
|
|
96
|
+
export function findErrorProneTools(toolUsage, threshold = 0.3) {
|
|
97
|
+
return Object.entries(toolUsage)
|
|
98
|
+
.filter(([, stats]) => stats.errorRate > threshold && stats.count >= 2)
|
|
99
|
+
.map(([tool, stats]) => ({
|
|
100
|
+
tool,
|
|
101
|
+
errorRate: stats.errorRate,
|
|
102
|
+
count: stats.count,
|
|
103
|
+
}))
|
|
104
|
+
.sort((a, b) => b.errorRate - a.errorRate);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Build LLM prompt for reflection analysis.
|
|
109
|
+
* @param {object} reportData — pre-computed stats
|
|
110
|
+
* @returns {Array<{role:string, content:string}>}
|
|
111
|
+
*/
|
|
112
|
+
export function buildReflectionPrompt(reportData) {
|
|
113
|
+
return [
|
|
114
|
+
{
|
|
115
|
+
role: "system",
|
|
116
|
+
content: `You are a self-improvement analyst for an AI coding assistant.
|
|
117
|
+
Analyze execution statistics and provide actionable recommendations.
|
|
118
|
+
Output ONLY valid JSON:
|
|
119
|
+
{
|
|
120
|
+
"summary": "2-3 sentence overview",
|
|
121
|
+
"strengths": ["strength 1", ...],
|
|
122
|
+
"weaknesses": ["weakness 1", ...],
|
|
123
|
+
"recommendations": [
|
|
124
|
+
{"action": "what to do", "priority": "high|medium|low", "reason": "why"}
|
|
125
|
+
]
|
|
126
|
+
}`,
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
role: "user",
|
|
130
|
+
content: `## Reflection Period Stats
|
|
131
|
+
Total trajectories: ${reportData.totalTrajectories}
|
|
132
|
+
Scored trajectories: ${reportData.scoredCount}
|
|
133
|
+
Average score: ${reportData.avgScore?.toFixed(2) || "N/A"}
|
|
134
|
+
Score trend: ${reportData.trend || "unknown"}
|
|
135
|
+
Recent average: ${reportData.recentAvg?.toFixed(2) || "N/A"}
|
|
136
|
+
|
|
137
|
+
## Tool Usage (top 10)
|
|
138
|
+
${reportData.topTools?.map((t) => `- ${t.tool}: ${t.count}x (error rate: ${(t.errorRate * 100).toFixed(0)}%)`).join("\n") || "No data"}
|
|
139
|
+
|
|
140
|
+
## Error-prone Tools
|
|
141
|
+
${reportData.errorProneTools?.map((t) => `- ${t.tool}: ${(t.errorRate * 100).toFixed(0)}% error rate (${t.count} calls)`).join("\n") || "None"}
|
|
142
|
+
|
|
143
|
+
## Synthesized Skills
|
|
144
|
+
${reportData.synthesizedCount || 0} skills auto-generated`,
|
|
145
|
+
},
|
|
146
|
+
];
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ── ReflectionEngine class ─────────────────────────
|
|
150
|
+
|
|
151
|
+
export class ReflectionEngine {
|
|
152
|
+
/**
|
|
153
|
+
* @param {import("better-sqlite3").Database} db
|
|
154
|
+
* @param {function|null} llmChat — async (messages) => string
|
|
155
|
+
* @param {import("./trajectory-store.js").TrajectoryStore} trajectoryStore
|
|
156
|
+
* @param {{reflectionInterval?:number}} [config]
|
|
157
|
+
*/
|
|
158
|
+
constructor(db, llmChat, trajectoryStore, config = {}) {
|
|
159
|
+
this.db = db;
|
|
160
|
+
this.llmChat = llmChat;
|
|
161
|
+
this.trajectoryStore = trajectoryStore;
|
|
162
|
+
this.reflectionInterval = config.reflectionInterval || 24 * 60 * 60 * 1000; // 24h default
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Run a reflection cycle: gather stats, analyze, produce report.
|
|
167
|
+
* @param {{since?:string, limit?:number}} [options]
|
|
168
|
+
* @returns {Promise<object>} reflection report
|
|
169
|
+
*/
|
|
170
|
+
async reflect(options = {}) {
|
|
171
|
+
const limit = options.limit || 200;
|
|
172
|
+
|
|
173
|
+
// Gather trajectories
|
|
174
|
+
const trajectories = this.trajectoryStore.getRecent({ limit });
|
|
175
|
+
if (trajectories.length === 0) {
|
|
176
|
+
return this._emptyReport("No trajectories to reflect on");
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Compute stats
|
|
180
|
+
const { toolUsage, totalTools } = computeToolStats(trajectories);
|
|
181
|
+
const scoreTrend = computeScoreTrend(trajectories);
|
|
182
|
+
const errorProneTools = findErrorProneTools(toolUsage);
|
|
183
|
+
|
|
184
|
+
// Top tools by usage
|
|
185
|
+
const topTools = Object.entries(toolUsage)
|
|
186
|
+
.map(([tool, stats]) => ({ tool, ...stats }))
|
|
187
|
+
.sort((a, b) => b.count - a.count)
|
|
188
|
+
.slice(0, 10);
|
|
189
|
+
|
|
190
|
+
const dbStats = this.trajectoryStore.getStats();
|
|
191
|
+
|
|
192
|
+
const reportData = {
|
|
193
|
+
totalTrajectories: trajectories.length,
|
|
194
|
+
scoredCount: trajectories.filter((t) => t.outcomeScore != null).length,
|
|
195
|
+
avgScore: scoreTrend.avgScore,
|
|
196
|
+
recentAvg: scoreTrend.recentAvg,
|
|
197
|
+
trend: scoreTrend.trend,
|
|
198
|
+
topTools,
|
|
199
|
+
errorProneTools,
|
|
200
|
+
totalToolCalls: totalTools,
|
|
201
|
+
synthesizedCount: dbStats.synthesized,
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
// LLM analysis (optional)
|
|
205
|
+
let llmAnalysis = null;
|
|
206
|
+
if (this.llmChat) {
|
|
207
|
+
llmAnalysis = await this._getLLMAnalysis(reportData);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const report = {
|
|
211
|
+
timestamp: new Date(_deps.now()).toISOString(),
|
|
212
|
+
...reportData,
|
|
213
|
+
llmAnalysis,
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
// Persist report
|
|
217
|
+
this._saveReport(report);
|
|
218
|
+
|
|
219
|
+
return report;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Get the most recent reflection report.
|
|
224
|
+
* @returns {object|null}
|
|
225
|
+
*/
|
|
226
|
+
getLatestReport() {
|
|
227
|
+
try {
|
|
228
|
+
const row = this.db
|
|
229
|
+
.prepare(
|
|
230
|
+
`SELECT * FROM skill_improvement_log
|
|
231
|
+
WHERE trigger_type = 'reflection'
|
|
232
|
+
ORDER BY created_at DESC
|
|
233
|
+
LIMIT 1`,
|
|
234
|
+
)
|
|
235
|
+
.get();
|
|
236
|
+
|
|
237
|
+
if (!row) return null;
|
|
238
|
+
try {
|
|
239
|
+
return JSON.parse(row.detail);
|
|
240
|
+
} catch {
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
} catch {
|
|
244
|
+
return null;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Check if a reflection is due based on interval.
|
|
250
|
+
* @returns {boolean}
|
|
251
|
+
*/
|
|
252
|
+
isReflectionDue() {
|
|
253
|
+
const latest = this.getLatestReport();
|
|
254
|
+
if (!latest || !latest.timestamp) return true;
|
|
255
|
+
|
|
256
|
+
const lastTime = new Date(latest.timestamp).getTime();
|
|
257
|
+
return _deps.now() - lastTime >= this.reflectionInterval;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ── Internal ────────────────────────────────────
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Get LLM analysis of the report data.
|
|
264
|
+
* @param {object} reportData
|
|
265
|
+
* @returns {Promise<object|null>}
|
|
266
|
+
*/
|
|
267
|
+
async _getLLMAnalysis(reportData) {
|
|
268
|
+
try {
|
|
269
|
+
const messages = buildReflectionPrompt(reportData);
|
|
270
|
+
const response = await this.llmChat(messages);
|
|
271
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
272
|
+
if (!jsonMatch) return null;
|
|
273
|
+
return JSON.parse(jsonMatch[0]);
|
|
274
|
+
} catch {
|
|
275
|
+
return null;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Save report to skill_improvement_log.
|
|
281
|
+
* @param {object} report
|
|
282
|
+
*/
|
|
283
|
+
_saveReport(report) {
|
|
284
|
+
try {
|
|
285
|
+
this.db
|
|
286
|
+
.prepare(
|
|
287
|
+
`INSERT INTO skill_improvement_log (skill_name, trigger_type, detail)
|
|
288
|
+
VALUES (?, ?, ?)`,
|
|
289
|
+
)
|
|
290
|
+
.run(
|
|
291
|
+
"_reflection",
|
|
292
|
+
"reflection",
|
|
293
|
+
JSON.stringify(report).slice(0, 5000),
|
|
294
|
+
);
|
|
295
|
+
} catch {
|
|
296
|
+
// Non-critical
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Build an empty report for edge cases.
|
|
302
|
+
* @param {string} reason
|
|
303
|
+
* @returns {object}
|
|
304
|
+
*/
|
|
305
|
+
_emptyReport(reason) {
|
|
306
|
+
return {
|
|
307
|
+
timestamp: new Date(_deps.now()).toISOString(),
|
|
308
|
+
totalTrajectories: 0,
|
|
309
|
+
scoredCount: 0,
|
|
310
|
+
avgScore: 0,
|
|
311
|
+
recentAvg: 0,
|
|
312
|
+
trend: "stable",
|
|
313
|
+
topTools: [],
|
|
314
|
+
errorProneTools: [],
|
|
315
|
+
totalToolCalls: 0,
|
|
316
|
+
synthesizedCount: 0,
|
|
317
|
+
llmAnalysis: null,
|
|
318
|
+
note: reason,
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
export { _deps };
|