selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* Transcript parsing utilities shared by hooks and grading.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
5
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
6
6
|
import { basename, dirname } from "node:path";
|
|
7
|
-
import
|
|
7
|
+
import { CLAUDE_CODE_PROJECTS_DIR } from "../constants.js";
|
|
8
|
+
import type { SessionTelemetryRecord, TranscriptMetrics } from "../types.js";
|
|
9
|
+
import { isActionableQueryText } from "./query-filter.js";
|
|
8
10
|
|
|
9
11
|
/**
|
|
10
12
|
* Parse a Claude Code transcript JSONL and extract process metrics.
|
|
@@ -23,9 +25,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
23
25
|
const toolCalls: Record<string, number> = {};
|
|
24
26
|
const bashCommands: string[] = [];
|
|
25
27
|
const skillsTriggered: string[] = [];
|
|
28
|
+
const skillsInvoked: string[] = [];
|
|
26
29
|
let errors = 0;
|
|
27
30
|
let assistantTurns = 0;
|
|
28
31
|
let lastUserQuery = "";
|
|
32
|
+
let inputTokens = 0;
|
|
33
|
+
let outputTokens = 0;
|
|
34
|
+
let firstTimestamp: string | null = null;
|
|
35
|
+
let lastTimestamp: string | null = null;
|
|
29
36
|
|
|
30
37
|
for (const raw of lines) {
|
|
31
38
|
const line = raw.trim();
|
|
@@ -38,6 +45,22 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
38
45
|
continue;
|
|
39
46
|
}
|
|
40
47
|
|
|
48
|
+
// Track timestamps for duration calculation
|
|
49
|
+
const ts = entry.timestamp as string | undefined;
|
|
50
|
+
if (ts) {
|
|
51
|
+
if (!firstTimestamp) firstTimestamp = ts;
|
|
52
|
+
lastTimestamp = ts;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Accumulate token usage from usage objects
|
|
56
|
+
const usage = (entry.usage ?? (entry.message as Record<string, unknown>)?.usage) as
|
|
57
|
+
| Record<string, unknown>
|
|
58
|
+
| undefined;
|
|
59
|
+
if (usage && typeof usage === "object") {
|
|
60
|
+
if (typeof usage.input_tokens === "number") inputTokens += usage.input_tokens;
|
|
61
|
+
if (typeof usage.output_tokens === "number") outputTokens += usage.output_tokens;
|
|
62
|
+
}
|
|
63
|
+
|
|
41
64
|
// Normalise: unwrap nested message if present
|
|
42
65
|
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
43
66
|
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
@@ -45,19 +68,8 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
45
68
|
|
|
46
69
|
// Track last user query
|
|
47
70
|
if (role === "user") {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
} else if (Array.isArray(content)) {
|
|
51
|
-
const texts = content
|
|
52
|
-
.filter(
|
|
53
|
-
(p): p is Record<string, unknown> =>
|
|
54
|
-
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
55
|
-
)
|
|
56
|
-
.map((p) => (p.text as string) ?? "")
|
|
57
|
-
.filter(Boolean);
|
|
58
|
-
const text = texts.join(" ").trim();
|
|
59
|
-
if (text) lastUserQuery = text;
|
|
60
|
-
}
|
|
71
|
+
const text = extractActionableUserText(content);
|
|
72
|
+
if (text) lastUserQuery = text;
|
|
61
73
|
}
|
|
62
74
|
|
|
63
75
|
// Count assistant turns and parse tool use
|
|
@@ -72,7 +84,7 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
72
84
|
toolCalls[toolName] = (toolCalls[toolName] ?? 0) + 1;
|
|
73
85
|
const inp = (b.input as Record<string, unknown>) ?? {};
|
|
74
86
|
|
|
75
|
-
// Track SKILL.md reads
|
|
87
|
+
// Track SKILL.md reads (may be browsing — kept for backwards compat)
|
|
76
88
|
const filePath = (inp.file_path as string) ?? "";
|
|
77
89
|
if (basename(filePath).toUpperCase() === "SKILL.MD") {
|
|
78
90
|
const skillName = basename(dirname(filePath));
|
|
@@ -81,6 +93,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
81
93
|
}
|
|
82
94
|
}
|
|
83
95
|
|
|
96
|
+
// Track actual Skill tool invocations (high-confidence signal)
|
|
97
|
+
if (toolName === "Skill") {
|
|
98
|
+
const skillArg = (inp.skill as string) ?? (inp.name as string) ?? "";
|
|
99
|
+
if (skillArg && !skillsInvoked.includes(skillArg)) {
|
|
100
|
+
skillsInvoked.push(skillArg);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
84
104
|
// Track bash commands
|
|
85
105
|
if (toolName === "Bash") {
|
|
86
106
|
const cmd = ((inp.command as string) ?? "").trim();
|
|
@@ -110,15 +130,167 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
110
130
|
}
|
|
111
131
|
}
|
|
112
132
|
|
|
133
|
+
// Compute duration from first to last timestamp
|
|
134
|
+
let durationMs: number | undefined;
|
|
135
|
+
if (firstTimestamp && lastTimestamp && firstTimestamp !== lastTimestamp) {
|
|
136
|
+
const start = new Date(firstTimestamp).getTime();
|
|
137
|
+
const end = new Date(lastTimestamp).getTime();
|
|
138
|
+
if (!Number.isNaN(start) && !Number.isNaN(end) && end > start) {
|
|
139
|
+
durationMs = end - start;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
113
143
|
return {
|
|
114
144
|
tool_calls: toolCalls,
|
|
115
145
|
total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
|
|
116
146
|
bash_commands: bashCommands,
|
|
117
147
|
skills_triggered: skillsTriggered,
|
|
148
|
+
skills_invoked: skillsInvoked,
|
|
118
149
|
assistant_turns: assistantTurns,
|
|
119
150
|
errors_encountered: errors,
|
|
120
151
|
transcript_chars: totalChars,
|
|
121
152
|
last_user_query: lastUserQuery,
|
|
153
|
+
...(inputTokens > 0 ? { input_tokens: inputTokens } : {}),
|
|
154
|
+
...(outputTokens > 0 ? { output_tokens: outputTokens } : {}),
|
|
155
|
+
...(durationMs !== undefined ? { duration_ms: durationMs } : {}),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Extract actionable user queries from a Claude transcript.
|
|
161
|
+
*/
|
|
162
|
+
export function extractActionableUserQueries(
|
|
163
|
+
transcriptPath: string,
|
|
164
|
+
): Array<{ query: string; timestamp: string }> {
|
|
165
|
+
if (!existsSync(transcriptPath)) return [];
|
|
166
|
+
|
|
167
|
+
let content: string;
|
|
168
|
+
try {
|
|
169
|
+
content = readFileSync(transcriptPath, "utf-8");
|
|
170
|
+
} catch {
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const results: Array<{ query: string; timestamp: string }> = [];
|
|
175
|
+
|
|
176
|
+
for (const raw of content.split("\n")) {
|
|
177
|
+
const line = raw.trim();
|
|
178
|
+
if (!line) continue;
|
|
179
|
+
|
|
180
|
+
let entry: Record<string, unknown>;
|
|
181
|
+
try {
|
|
182
|
+
entry = JSON.parse(line);
|
|
183
|
+
} catch {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
188
|
+
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
189
|
+
if (role !== "user") continue;
|
|
190
|
+
|
|
191
|
+
const text = extractActionableUserText(msg.content ?? entry.content ?? "");
|
|
192
|
+
if (!text || text.length < 4) continue;
|
|
193
|
+
|
|
194
|
+
const timestamp = (entry.timestamp as string) ?? (msg.timestamp as string) ?? "";
|
|
195
|
+
results.push({ query: text, timestamp });
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return results;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Recursively find Claude transcript JSONL files under a projects directory.
|
|
203
|
+
*/
|
|
204
|
+
export function findTranscriptFiles(projectsDir: string, since?: Date): string[] {
|
|
205
|
+
if (!existsSync(projectsDir)) return [];
|
|
206
|
+
|
|
207
|
+
const files: string[] = [];
|
|
208
|
+
|
|
209
|
+
const walk = (dir: string): void => {
|
|
210
|
+
let entries: string[];
|
|
211
|
+
try {
|
|
212
|
+
entries = readdirSync(dir).sort();
|
|
213
|
+
} catch {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
for (const entry of entries) {
|
|
218
|
+
const entryPath = `${dir}/${entry}`;
|
|
219
|
+
try {
|
|
220
|
+
const stats = statSync(entryPath);
|
|
221
|
+
|
|
222
|
+
if (stats.isDirectory()) {
|
|
223
|
+
walk(entryPath);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (!stats.isFile() || !entry.endsWith(".jsonl")) continue;
|
|
228
|
+
if (since && stats.mtime < since) continue;
|
|
229
|
+
|
|
230
|
+
files.push(entryPath);
|
|
231
|
+
} catch {
|
|
232
|
+
// Ignore unreadable files and keep scanning.
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
walk(projectsDir);
|
|
238
|
+
return files.sort();
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Find a Claude transcript path by session ID.
|
|
243
|
+
*/
|
|
244
|
+
export function findTranscriptPathForSession(
|
|
245
|
+
sessionId: string,
|
|
246
|
+
projectsDir: string = CLAUDE_CODE_PROJECTS_DIR,
|
|
247
|
+
): string | null {
|
|
248
|
+
const filename = `${sessionId}.jsonl`;
|
|
249
|
+
for (const transcriptPath of findTranscriptFiles(projectsDir)) {
|
|
250
|
+
if (basename(transcriptPath) === filename) return transcriptPath;
|
|
251
|
+
}
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Build a SessionTelemetryRecord directly from a transcript file.
|
|
257
|
+
*/
|
|
258
|
+
export function buildTelemetryFromTranscript(
|
|
259
|
+
sessionId: string,
|
|
260
|
+
transcriptPath: string,
|
|
261
|
+
source = "claude_code_transcript_fallback",
|
|
262
|
+
): SessionTelemetryRecord | null {
|
|
263
|
+
if (!existsSync(transcriptPath)) return null;
|
|
264
|
+
|
|
265
|
+
const metrics = parseTranscript(transcriptPath);
|
|
266
|
+
const userQueries = extractActionableUserQueries(transcriptPath);
|
|
267
|
+
|
|
268
|
+
let timestamp = userQueries[0]?.timestamp ?? "";
|
|
269
|
+
if (!timestamp) {
|
|
270
|
+
try {
|
|
271
|
+
timestamp = statSync(transcriptPath).mtime.toISOString();
|
|
272
|
+
} catch {
|
|
273
|
+
timestamp = new Date().toISOString();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
timestamp,
|
|
279
|
+
session_id: sessionId,
|
|
280
|
+
cwd: "",
|
|
281
|
+
transcript_path: transcriptPath,
|
|
282
|
+
tool_calls: metrics.tool_calls,
|
|
283
|
+
total_tool_calls: metrics.total_tool_calls,
|
|
284
|
+
bash_commands: metrics.bash_commands,
|
|
285
|
+
skills_triggered: metrics.skills_triggered,
|
|
286
|
+
skills_invoked: metrics.skills_invoked,
|
|
287
|
+
assistant_turns: metrics.assistant_turns,
|
|
288
|
+
errors_encountered: metrics.errors_encountered,
|
|
289
|
+
transcript_chars: metrics.transcript_chars,
|
|
290
|
+
last_user_query: metrics.last_user_query,
|
|
291
|
+
source,
|
|
292
|
+
input_tokens: metrics.input_tokens,
|
|
293
|
+
output_tokens: metrics.output_tokens,
|
|
122
294
|
};
|
|
123
295
|
}
|
|
124
296
|
|
|
@@ -142,14 +314,14 @@ export function getLastUserMessage(transcriptPath: string): string | null {
|
|
|
142
314
|
|
|
143
315
|
// Format 1: top-level role field
|
|
144
316
|
if (entry.role === "user") {
|
|
145
|
-
const text =
|
|
317
|
+
const text = extractActionableUserText(entry.content);
|
|
146
318
|
if (text) return text;
|
|
147
319
|
}
|
|
148
320
|
|
|
149
321
|
// Format 2: nested message object
|
|
150
322
|
const msg = entry.message as Record<string, unknown> | undefined;
|
|
151
323
|
if (msg && typeof msg === "object" && msg.role === "user") {
|
|
152
|
-
const text =
|
|
324
|
+
const text = extractActionableUserText(msg.content);
|
|
153
325
|
if (text) return text;
|
|
154
326
|
}
|
|
155
327
|
}
|
|
@@ -160,6 +332,40 @@ export function getLastUserMessage(transcriptPath: string): string | null {
|
|
|
160
332
|
return null;
|
|
161
333
|
}
|
|
162
334
|
|
|
335
|
+
function extractTextParts(content: unknown): string {
|
|
336
|
+
if (!Array.isArray(content)) return "";
|
|
337
|
+
|
|
338
|
+
return content
|
|
339
|
+
.filter(
|
|
340
|
+
(part): part is Record<string, unknown> =>
|
|
341
|
+
typeof part === "object" &&
|
|
342
|
+
part !== null &&
|
|
343
|
+
(part as Record<string, unknown>).type === "text",
|
|
344
|
+
)
|
|
345
|
+
.map((part) => (part.text as string) ?? "")
|
|
346
|
+
.filter(Boolean)
|
|
347
|
+
.join(" ")
|
|
348
|
+
.trim();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function summarizeCodexFunctionArguments(argumentsText: unknown): string {
|
|
352
|
+
if (typeof argumentsText !== "string" || !argumentsText.trim()) return "";
|
|
353
|
+
|
|
354
|
+
try {
|
|
355
|
+
const parsed = JSON.parse(argumentsText) as Record<string, unknown>;
|
|
356
|
+
return (
|
|
357
|
+
(typeof parsed.cmd === "string" && parsed.cmd.trim()) ||
|
|
358
|
+
(typeof parsed.command === "string" && parsed.command.trim()) ||
|
|
359
|
+
(typeof parsed.file_path === "string" && parsed.file_path.trim()) ||
|
|
360
|
+
(typeof parsed.path === "string" && parsed.path.trim()) ||
|
|
361
|
+
(typeof parsed.query === "string" && parsed.query.trim()) ||
|
|
362
|
+
argumentsText.trim()
|
|
363
|
+
).slice(0, 200);
|
|
364
|
+
} catch {
|
|
365
|
+
return argumentsText.trim().slice(0, 200);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
163
369
|
/**
|
|
164
370
|
* Parse a transcript into a human-readable excerpt for the grader.
|
|
165
371
|
*/
|
|
@@ -184,19 +390,13 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
184
390
|
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
185
391
|
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
186
392
|
const entryContent = msg.content ?? entry.content ?? "";
|
|
393
|
+
const eventType = (entry.type as string) ?? "";
|
|
187
394
|
|
|
188
395
|
if (role === "user") {
|
|
189
396
|
if (typeof entryContent === "string") {
|
|
190
397
|
readable.push(`[USER] ${entryContent.slice(0, 200)}`);
|
|
191
398
|
} else if (Array.isArray(entryContent)) {
|
|
192
|
-
const
|
|
193
|
-
.filter(
|
|
194
|
-
(p): p is Record<string, unknown> =>
|
|
195
|
-
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
196
|
-
)
|
|
197
|
-
.map((p) => (p.text as string) ?? "")
|
|
198
|
-
.filter(Boolean);
|
|
199
|
-
const text = texts.join(" ").trim().slice(0, 200);
|
|
399
|
+
const text = extractTextParts(entryContent).slice(0, 200);
|
|
200
400
|
if (text) readable.push(`[USER] ${text}`);
|
|
201
401
|
}
|
|
202
402
|
} else if (role === "assistant") {
|
|
@@ -218,6 +418,45 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
218
418
|
}
|
|
219
419
|
}
|
|
220
420
|
}
|
|
421
|
+
} else if (eventType === "event_msg") {
|
|
422
|
+
const payload = (entry.payload as Record<string, unknown>) ?? {};
|
|
423
|
+
if (payload.type === "user_message") {
|
|
424
|
+
const text = extractActionableUserText(payload.message)?.slice(0, 200) ?? "";
|
|
425
|
+
if (text) readable.push(`[USER] ${text}`);
|
|
426
|
+
}
|
|
427
|
+
} else if (eventType === "turn.completed") {
|
|
428
|
+
const text = extractActionableUserText(entry.user_message)?.slice(0, 200) ?? "";
|
|
429
|
+
if (text) readable.push(`[USER] ${text}`);
|
|
430
|
+
} else if (eventType === "response_item") {
|
|
431
|
+
const payload = (entry.payload as Record<string, unknown>) ?? {};
|
|
432
|
+
const itemType = (payload.type as string) ?? "";
|
|
433
|
+
|
|
434
|
+
if (itemType === "function_call") {
|
|
435
|
+
const name = (payload.name as string) ?? "function_call";
|
|
436
|
+
const detail = summarizeCodexFunctionArguments(payload.arguments);
|
|
437
|
+
if (detail) readable.push(`[TOOL:${name}] ${detail}`);
|
|
438
|
+
} else if (itemType === "agent_reasoning") {
|
|
439
|
+
const text = ((payload.text as string) ?? "").trim().slice(0, 200);
|
|
440
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
441
|
+
} else if (itemType === "message" && (payload.role as string) === "assistant") {
|
|
442
|
+
const text = extractTextParts(payload.content).slice(0, 200);
|
|
443
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
444
|
+
}
|
|
445
|
+
} else if (
|
|
446
|
+
eventType === "item.completed" ||
|
|
447
|
+
eventType === "item.started" ||
|
|
448
|
+
eventType === "item.updated"
|
|
449
|
+
) {
|
|
450
|
+
const item = (entry.item as Record<string, unknown>) ?? {};
|
|
451
|
+
const itemType = (item.item_type as string) ?? (item.type as string) ?? "";
|
|
452
|
+
|
|
453
|
+
if (itemType === "command_execution") {
|
|
454
|
+
const command = ((item.command as string) ?? "").trim().slice(0, 200);
|
|
455
|
+
if (command) readable.push(`[TOOL:command_execution] ${command}`);
|
|
456
|
+
} else {
|
|
457
|
+
const text = ((item.text as string) ?? "").trim().slice(0, 200);
|
|
458
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
459
|
+
}
|
|
221
460
|
}
|
|
222
461
|
}
|
|
223
462
|
|
|
@@ -228,12 +467,48 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
228
467
|
return `${full.slice(0, head)}\n\n... [truncated] ...\n\n${full.slice(-tail)}`;
|
|
229
468
|
}
|
|
230
469
|
|
|
470
|
+
/**
|
|
471
|
+
* Extract token usage from a transcript JSONL by summing usage fields.
|
|
472
|
+
*
|
|
473
|
+
* Scans for entries with a `usage` object containing `input_tokens` and
|
|
474
|
+
* `output_tokens` (the format Claude Code transcripts use).
|
|
475
|
+
*/
|
|
476
|
+
export function extractTokenUsage(transcriptPath: string): { input: number; output: number } {
|
|
477
|
+
if (!existsSync(transcriptPath)) return { input: 0, output: 0 };
|
|
478
|
+
|
|
479
|
+
const content = readFileSync(transcriptPath, "utf-8");
|
|
480
|
+
const lines = content.split("\n");
|
|
481
|
+
let input = 0;
|
|
482
|
+
let output = 0;
|
|
483
|
+
|
|
484
|
+
for (const raw of lines) {
|
|
485
|
+
const line = raw.trim();
|
|
486
|
+
if (!line) continue;
|
|
487
|
+
|
|
488
|
+
let entry: Record<string, unknown>;
|
|
489
|
+
try {
|
|
490
|
+
entry = JSON.parse(line);
|
|
491
|
+
} catch {
|
|
492
|
+
continue;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
const usage = entry.usage as Record<string, unknown> | undefined;
|
|
496
|
+
if (usage && typeof usage === "object") {
|
|
497
|
+
if (typeof usage.input_tokens === "number") input += usage.input_tokens;
|
|
498
|
+
if (typeof usage.output_tokens === "number") output += usage.output_tokens;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
return { input, output };
|
|
503
|
+
}
|
|
504
|
+
|
|
231
505
|
function emptyMetrics(): TranscriptMetrics {
|
|
232
506
|
return {
|
|
233
507
|
tool_calls: {},
|
|
234
508
|
total_tool_calls: 0,
|
|
235
509
|
bash_commands: [],
|
|
236
510
|
skills_triggered: [],
|
|
511
|
+
skills_invoked: [],
|
|
237
512
|
assistant_turns: 0,
|
|
238
513
|
errors_encountered: 0,
|
|
239
514
|
transcript_chars: 0,
|
|
@@ -258,3 +533,9 @@ function extractUserText(content: unknown): string | null {
|
|
|
258
533
|
}
|
|
259
534
|
return null;
|
|
260
535
|
}
|
|
536
|
+
|
|
537
|
+
function extractActionableUserText(content: unknown): string | null {
|
|
538
|
+
const text = extractUserText(content);
|
|
539
|
+
if (!text) return null;
|
|
540
|
+
return isActionableQueryText(text) ? text : null;
|
|
541
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared trigger-check utilities.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from validate-proposal.ts so other modules (e.g. body validation,
|
|
5
|
+
* routing validation) can reuse the same prompt-building and response-parsing
|
|
6
|
+
* logic without depending on the evolution layer.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Prompt building
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
/** Build the trigger check prompt for the LLM. */
|
|
14
|
+
export function buildTriggerCheckPrompt(description: string, query: string): string {
|
|
15
|
+
return [
|
|
16
|
+
"Given this skill description, would the following user query trigger this skill?",
|
|
17
|
+
"Respond YES or NO only.",
|
|
18
|
+
"",
|
|
19
|
+
"Skill description:",
|
|
20
|
+
description,
|
|
21
|
+
"",
|
|
22
|
+
"User query:",
|
|
23
|
+
query,
|
|
24
|
+
].join("\n");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Response parsing
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
/** Parse YES/NO from LLM response. */
|
|
32
|
+
export function parseTriggerResponse(response: string): boolean {
|
|
33
|
+
const normalized = response.trim().toUpperCase();
|
|
34
|
+
if (normalized.startsWith("YES")) return true;
|
|
35
|
+
if (normalized.startsWith("NO")) return false;
|
|
36
|
+
return false; // conservative default
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Batch prompt building
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
/** Build a batch trigger check prompt for multiple queries at once. */
|
|
44
|
+
export function buildBatchTriggerCheckPrompt(description: string, queries: string[]): string {
|
|
45
|
+
const numbered = queries.map((q, i) => `${i + 1}. "${q}"`).join("\n");
|
|
46
|
+
return [
|
|
47
|
+
"Given this skill description, would each query trigger this skill?",
|
|
48
|
+
"Respond with the query number followed by YES or NO, one per line.",
|
|
49
|
+
"",
|
|
50
|
+
"Skill description:",
|
|
51
|
+
description,
|
|
52
|
+
"",
|
|
53
|
+
"Queries:",
|
|
54
|
+
numbered,
|
|
55
|
+
].join("\n");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Batch response parsing
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Parse a batch YES/NO response. Returns a boolean array aligned to the
|
|
64
|
+
* original query order. Defaults to false for unparseable or missing lines.
|
|
65
|
+
*/
|
|
66
|
+
export function parseBatchTriggerResponse(response: string, queryCount: number): boolean[] {
|
|
67
|
+
const results: boolean[] = new Array(queryCount).fill(false);
|
|
68
|
+
const lines = response.trim().split("\n");
|
|
69
|
+
|
|
70
|
+
for (const line of lines) {
|
|
71
|
+
const trimmed = line.trim();
|
|
72
|
+
if (!trimmed) continue;
|
|
73
|
+
|
|
74
|
+
// Try to extract a number prefix: "1. YES", "1: YES", "1 YES", "1) YES"
|
|
75
|
+
const match = trimmed.match(/^(\d+)[.):\s]+\s*(.*)/);
|
|
76
|
+
if (!match) continue;
|
|
77
|
+
|
|
78
|
+
const idx = parseInt(match[1], 10) - 1; // 1-based to 0-based
|
|
79
|
+
if (idx < 0 || idx >= queryCount) continue;
|
|
80
|
+
|
|
81
|
+
const answer = match[2].trim().toUpperCase();
|
|
82
|
+
if (answer.startsWith("YES")) {
|
|
83
|
+
results[idx] = true;
|
|
84
|
+
}
|
|
85
|
+
// NO or anything else stays false (the default)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return results;
|
|
89
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tui.ts
|
|
3
|
+
*
|
|
4
|
+
* Zero-dependency TUI primitives for the selftune evolve pipeline.
|
|
5
|
+
* Uses raw ANSI escape codes for spinners, timers, and step progression.
|
|
6
|
+
* All output goes to stderr to keep stdout clean for JSON results.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
10
|
+
const TICK_MS = 80;
|
|
11
|
+
|
|
12
|
+
export interface EvolveTUI {
|
|
13
|
+
/** Start a new step with a spinner. Completes the previous step (if any) with checkmark. */
|
|
14
|
+
step(label: string): void;
|
|
15
|
+
/** Complete the current step with checkmark and a custom label. */
|
|
16
|
+
done(label: string): void;
|
|
17
|
+
/** Complete the current step as failed with cross mark and a custom label. */
|
|
18
|
+
fail(label: string): void;
|
|
19
|
+
/** Stop all timers and print a summary line. */
|
|
20
|
+
finish(summary: string): void;
|
|
21
|
+
/** Clean up timers without printing. For error paths. */
|
|
22
|
+
destroy(): void;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function createNoopTUI(): EvolveTUI {
|
|
26
|
+
return { step() {}, done() {}, fail() {}, finish() {}, destroy() {} };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function createEvolveTUI(opts: { skillName: string; model: string }): EvolveTUI {
|
|
30
|
+
const noColor = !!process.env.NO_COLOR;
|
|
31
|
+
const isTTY = !!process.stderr.isTTY;
|
|
32
|
+
|
|
33
|
+
// If not a TTY, return no-op to avoid ANSI noise in pipes/tests
|
|
34
|
+
if (!isTTY && !process.env.SELFTUNE_TUI_FORCE) {
|
|
35
|
+
return createNoopTUI();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const write = (s: string) => process.stderr.write(s);
|
|
39
|
+
|
|
40
|
+
let spinnerFrame = 0;
|
|
41
|
+
let stepStartTime = Date.now();
|
|
42
|
+
let currentLabel = "";
|
|
43
|
+
let hasActiveSpinner = false;
|
|
44
|
+
let intervalId: ReturnType<typeof setInterval> | null = null;
|
|
45
|
+
let destroyed = false;
|
|
46
|
+
|
|
47
|
+
const checkMark = noColor ? "+" : "\u2713";
|
|
48
|
+
const crossMark = noColor ? "x" : "\u2717";
|
|
49
|
+
|
|
50
|
+
// Print header
|
|
51
|
+
write(`\n selftune evolve \u2500\u2500 ${opts.skillName} \u2500\u2500 ${opts.model}\n\n`);
|
|
52
|
+
|
|
53
|
+
function formatTime(ms: number): string {
|
|
54
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function clearSpinnerLine(): void {
|
|
58
|
+
if (hasActiveSpinner) {
|
|
59
|
+
write("\x1b[A\x1b[2K");
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function writeSpinnerLine(): void {
|
|
64
|
+
const frame = noColor ? ">" : SPINNER_FRAMES[spinnerFrame % SPINNER_FRAMES.length];
|
|
65
|
+
const elapsed = formatTime(Date.now() - stepStartTime);
|
|
66
|
+
const padding = Math.max(1, 48 - currentLabel.length);
|
|
67
|
+
write(` ${frame} ${currentLabel}${" ".repeat(padding)}${elapsed}\n`);
|
|
68
|
+
hasActiveSpinner = true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function startSpinner(label: string): void {
|
|
72
|
+
currentLabel = label;
|
|
73
|
+
stepStartTime = Date.now();
|
|
74
|
+
spinnerFrame = 0;
|
|
75
|
+
writeSpinnerLine();
|
|
76
|
+
intervalId = setInterval(() => {
|
|
77
|
+
spinnerFrame++;
|
|
78
|
+
clearSpinnerLine();
|
|
79
|
+
writeSpinnerLine();
|
|
80
|
+
}, TICK_MS);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function stopSpinner(): void {
|
|
84
|
+
if (intervalId !== null) {
|
|
85
|
+
clearInterval(intervalId);
|
|
86
|
+
intervalId = null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function writeCompletedLine(marker: string, label: string, elapsed: number): void {
|
|
91
|
+
const time = formatTime(elapsed);
|
|
92
|
+
const padding = Math.max(1, 48 - label.length);
|
|
93
|
+
write(` ${marker} ${label}${" ".repeat(padding)}${time}\n`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function completeCurrentStep(marker: string, label: string): void {
|
|
97
|
+
const elapsed = Date.now() - stepStartTime;
|
|
98
|
+
stopSpinner();
|
|
99
|
+
clearSpinnerLine();
|
|
100
|
+
hasActiveSpinner = false;
|
|
101
|
+
writeCompletedLine(marker, label, elapsed);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
step(label: string): void {
|
|
106
|
+
if (destroyed) return;
|
|
107
|
+
// Complete previous step if there was one
|
|
108
|
+
if (hasActiveSpinner) {
|
|
109
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
110
|
+
}
|
|
111
|
+
startSpinner(label);
|
|
112
|
+
},
|
|
113
|
+
|
|
114
|
+
done(label: string): void {
|
|
115
|
+
if (destroyed) return;
|
|
116
|
+
if (hasActiveSpinner) {
|
|
117
|
+
// Complete active spinner with custom label
|
|
118
|
+
completeCurrentStep(checkMark, label);
|
|
119
|
+
} else {
|
|
120
|
+
// No active spinner — instant step
|
|
121
|
+
writeCompletedLine(checkMark, label, 0);
|
|
122
|
+
}
|
|
123
|
+
currentLabel = "";
|
|
124
|
+
},
|
|
125
|
+
|
|
126
|
+
fail(label: string): void {
|
|
127
|
+
if (destroyed) return;
|
|
128
|
+
if (hasActiveSpinner) {
|
|
129
|
+
completeCurrentStep(crossMark, label);
|
|
130
|
+
} else {
|
|
131
|
+
writeCompletedLine(crossMark, label, 0);
|
|
132
|
+
}
|
|
133
|
+
currentLabel = "";
|
|
134
|
+
},
|
|
135
|
+
|
|
136
|
+
finish(summary: string): void {
|
|
137
|
+
if (destroyed) return;
|
|
138
|
+
if (hasActiveSpinner) {
|
|
139
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
140
|
+
}
|
|
141
|
+
stopSpinner();
|
|
142
|
+
write(`\n ${summary}\n`);
|
|
143
|
+
destroyed = true;
|
|
144
|
+
},
|
|
145
|
+
|
|
146
|
+
destroy(): void {
|
|
147
|
+
if (destroyed) return;
|
|
148
|
+
stopSpinner();
|
|
149
|
+
if (hasActiveSpinner) {
|
|
150
|
+
clearSpinnerLine();
|
|
151
|
+
hasActiveSpinner = false;
|
|
152
|
+
}
|
|
153
|
+
destroyed = true;
|
|
154
|
+
},
|
|
155
|
+
};
|
|
156
|
+
}
|