selftune 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +20 -10
- package/.claude/agents/evolution-reviewer.md +14 -1
- package/.claude/agents/integration-guide.md +18 -6
- package/.claude/agents/pattern-analyst.md +18 -5
- package/CHANGELOG.md +12 -4
- package/README.md +43 -35
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/cli/selftune/badge/badge-data.ts +1 -1
- package/cli/selftune/badge/badge.ts +4 -8
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +28 -0
- package/cli/selftune/contribute/contribute.ts +1 -1
- package/cli/selftune/cron/setup.ts +17 -17
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +653 -186
- package/cli/selftune/dashboard.ts +41 -176
- package/cli/selftune/eval/baseline.ts +5 -4
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/hooks-to-evals.ts +34 -15
- package/cli/selftune/eval/unit-test-cli.ts +1 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +105 -11
- package/cli/selftune/evolution/evolve.ts +371 -25
- package/cli/selftune/evolution/extract-patterns.ts +87 -29
- package/cli/selftune/evolution/rollback.ts +2 -2
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +448 -97
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +395 -116
- package/cli/selftune/ingestors/claude-replay.ts +140 -114
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +227 -14
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/monitoring/watch.ts +66 -15
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +48 -26
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +148 -0
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +78 -20
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +272 -26
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +21 -8
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +84 -53
- package/skill/Workflows/AutoActivation.md +17 -16
- package/skill/Workflows/Badge.md +6 -0
- package/skill/Workflows/Baseline.md +46 -23
- package/skill/Workflows/Composability.md +12 -5
- package/skill/Workflows/Contribute.md +17 -14
- package/skill/Workflows/Cron.md +56 -79
- package/skill/Workflows/Dashboard.md +45 -34
- package/skill/Workflows/Doctor.md +30 -17
- package/skill/Workflows/Evals.md +64 -40
- package/skill/Workflows/EvolutionMemory.md +2 -0
- package/skill/Workflows/Evolve.md +102 -47
- package/skill/Workflows/EvolveBody.md +6 -6
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +11 -5
- package/skill/Workflows/Ingest.md +43 -36
- package/skill/Workflows/Initialize.md +44 -30
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +39 -18
- package/skill/Workflows/Rollback.md +3 -3
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +34 -22
- package/skill/Workflows/Watch.md +14 -4
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +1 -1
- package/templates/multi-skill-settings.json +7 -7
- package/templates/single-skill-settings.json +6 -6
- package/dashboard/index.html +0 -1680
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* Transcript parsing utilities shared by hooks and grading.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
5
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
6
6
|
import { basename, dirname } from "node:path";
|
|
7
|
-
import
|
|
7
|
+
import { CLAUDE_CODE_PROJECTS_DIR } from "../constants.js";
|
|
8
|
+
import type { SessionTelemetryRecord, TranscriptMetrics } from "../types.js";
|
|
9
|
+
import { isActionableQueryText } from "./query-filter.js";
|
|
8
10
|
|
|
9
11
|
/**
|
|
10
12
|
* Parse a Claude Code transcript JSONL and extract process metrics.
|
|
@@ -23,9 +25,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
23
25
|
const toolCalls: Record<string, number> = {};
|
|
24
26
|
const bashCommands: string[] = [];
|
|
25
27
|
const skillsTriggered: string[] = [];
|
|
28
|
+
const skillsInvoked: string[] = [];
|
|
26
29
|
let errors = 0;
|
|
27
30
|
let assistantTurns = 0;
|
|
28
31
|
let lastUserQuery = "";
|
|
32
|
+
let inputTokens = 0;
|
|
33
|
+
let outputTokens = 0;
|
|
34
|
+
let firstTimestamp: string | null = null;
|
|
35
|
+
let lastTimestamp: string | null = null;
|
|
29
36
|
|
|
30
37
|
for (const raw of lines) {
|
|
31
38
|
const line = raw.trim();
|
|
@@ -38,6 +45,22 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
38
45
|
continue;
|
|
39
46
|
}
|
|
40
47
|
|
|
48
|
+
// Track timestamps for duration calculation
|
|
49
|
+
const ts = entry.timestamp as string | undefined;
|
|
50
|
+
if (ts) {
|
|
51
|
+
if (!firstTimestamp) firstTimestamp = ts;
|
|
52
|
+
lastTimestamp = ts;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Accumulate token usage from usage objects
|
|
56
|
+
const usage = (entry.usage ?? (entry.message as Record<string, unknown>)?.usage) as
|
|
57
|
+
| Record<string, unknown>
|
|
58
|
+
| undefined;
|
|
59
|
+
if (usage && typeof usage === "object") {
|
|
60
|
+
if (typeof usage.input_tokens === "number") inputTokens += usage.input_tokens;
|
|
61
|
+
if (typeof usage.output_tokens === "number") outputTokens += usage.output_tokens;
|
|
62
|
+
}
|
|
63
|
+
|
|
41
64
|
// Normalise: unwrap nested message if present
|
|
42
65
|
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
43
66
|
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
@@ -45,19 +68,8 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
45
68
|
|
|
46
69
|
// Track last user query
|
|
47
70
|
if (role === "user") {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
} else if (Array.isArray(content)) {
|
|
51
|
-
const texts = content
|
|
52
|
-
.filter(
|
|
53
|
-
(p): p is Record<string, unknown> =>
|
|
54
|
-
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
55
|
-
)
|
|
56
|
-
.map((p) => (p.text as string) ?? "")
|
|
57
|
-
.filter(Boolean);
|
|
58
|
-
const text = texts.join(" ").trim();
|
|
59
|
-
if (text) lastUserQuery = text;
|
|
60
|
-
}
|
|
71
|
+
const text = extractActionableUserText(content);
|
|
72
|
+
if (text) lastUserQuery = text;
|
|
61
73
|
}
|
|
62
74
|
|
|
63
75
|
// Count assistant turns and parse tool use
|
|
@@ -72,7 +84,7 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
72
84
|
toolCalls[toolName] = (toolCalls[toolName] ?? 0) + 1;
|
|
73
85
|
const inp = (b.input as Record<string, unknown>) ?? {};
|
|
74
86
|
|
|
75
|
-
// Track SKILL.md reads
|
|
87
|
+
// Track SKILL.md reads (may be browsing — kept for backwards compat)
|
|
76
88
|
const filePath = (inp.file_path as string) ?? "";
|
|
77
89
|
if (basename(filePath).toUpperCase() === "SKILL.MD") {
|
|
78
90
|
const skillName = basename(dirname(filePath));
|
|
@@ -81,6 +93,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
81
93
|
}
|
|
82
94
|
}
|
|
83
95
|
|
|
96
|
+
// Track actual Skill tool invocations (high-confidence signal)
|
|
97
|
+
if (toolName === "Skill") {
|
|
98
|
+
const skillArg = (inp.skill as string) ?? (inp.name as string) ?? "";
|
|
99
|
+
if (skillArg && !skillsInvoked.includes(skillArg)) {
|
|
100
|
+
skillsInvoked.push(skillArg);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
84
104
|
// Track bash commands
|
|
85
105
|
if (toolName === "Bash") {
|
|
86
106
|
const cmd = ((inp.command as string) ?? "").trim();
|
|
@@ -110,15 +130,167 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
110
130
|
}
|
|
111
131
|
}
|
|
112
132
|
|
|
133
|
+
// Compute duration from first to last timestamp
|
|
134
|
+
let durationMs: number | undefined;
|
|
135
|
+
if (firstTimestamp && lastTimestamp && firstTimestamp !== lastTimestamp) {
|
|
136
|
+
const start = new Date(firstTimestamp).getTime();
|
|
137
|
+
const end = new Date(lastTimestamp).getTime();
|
|
138
|
+
if (!Number.isNaN(start) && !Number.isNaN(end) && end > start) {
|
|
139
|
+
durationMs = end - start;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
113
143
|
return {
|
|
114
144
|
tool_calls: toolCalls,
|
|
115
145
|
total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
|
|
116
146
|
bash_commands: bashCommands,
|
|
117
147
|
skills_triggered: skillsTriggered,
|
|
148
|
+
skills_invoked: skillsInvoked,
|
|
118
149
|
assistant_turns: assistantTurns,
|
|
119
150
|
errors_encountered: errors,
|
|
120
151
|
transcript_chars: totalChars,
|
|
121
152
|
last_user_query: lastUserQuery,
|
|
153
|
+
...(inputTokens > 0 ? { input_tokens: inputTokens } : {}),
|
|
154
|
+
...(outputTokens > 0 ? { output_tokens: outputTokens } : {}),
|
|
155
|
+
...(durationMs !== undefined ? { duration_ms: durationMs } : {}),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Extract actionable user queries from a Claude transcript.
|
|
161
|
+
*/
|
|
162
|
+
export function extractActionableUserQueries(
|
|
163
|
+
transcriptPath: string,
|
|
164
|
+
): Array<{ query: string; timestamp: string }> {
|
|
165
|
+
if (!existsSync(transcriptPath)) return [];
|
|
166
|
+
|
|
167
|
+
let content: string;
|
|
168
|
+
try {
|
|
169
|
+
content = readFileSync(transcriptPath, "utf-8");
|
|
170
|
+
} catch {
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const results: Array<{ query: string; timestamp: string }> = [];
|
|
175
|
+
|
|
176
|
+
for (const raw of content.split("\n")) {
|
|
177
|
+
const line = raw.trim();
|
|
178
|
+
if (!line) continue;
|
|
179
|
+
|
|
180
|
+
let entry: Record<string, unknown>;
|
|
181
|
+
try {
|
|
182
|
+
entry = JSON.parse(line);
|
|
183
|
+
} catch {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
188
|
+
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
189
|
+
if (role !== "user") continue;
|
|
190
|
+
|
|
191
|
+
const text = extractActionableUserText(msg.content ?? entry.content ?? "");
|
|
192
|
+
if (!text || text.length < 4) continue;
|
|
193
|
+
|
|
194
|
+
const timestamp = (entry.timestamp as string) ?? (msg.timestamp as string) ?? "";
|
|
195
|
+
results.push({ query: text, timestamp });
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return results;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Recursively find Claude transcript JSONL files under a projects directory.
|
|
203
|
+
*/
|
|
204
|
+
export function findTranscriptFiles(projectsDir: string, since?: Date): string[] {
|
|
205
|
+
if (!existsSync(projectsDir)) return [];
|
|
206
|
+
|
|
207
|
+
const files: string[] = [];
|
|
208
|
+
|
|
209
|
+
const walk = (dir: string): void => {
|
|
210
|
+
let entries: string[];
|
|
211
|
+
try {
|
|
212
|
+
entries = readdirSync(dir).sort();
|
|
213
|
+
} catch {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
for (const entry of entries) {
|
|
218
|
+
const entryPath = `${dir}/${entry}`;
|
|
219
|
+
try {
|
|
220
|
+
const stats = statSync(entryPath);
|
|
221
|
+
|
|
222
|
+
if (stats.isDirectory()) {
|
|
223
|
+
walk(entryPath);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (!stats.isFile() || !entry.endsWith(".jsonl")) continue;
|
|
228
|
+
if (since && stats.mtime < since) continue;
|
|
229
|
+
|
|
230
|
+
files.push(entryPath);
|
|
231
|
+
} catch {
|
|
232
|
+
// Ignore unreadable files and keep scanning.
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
walk(projectsDir);
|
|
238
|
+
return files.sort();
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Find a Claude transcript path by session ID.
|
|
243
|
+
*/
|
|
244
|
+
export function findTranscriptPathForSession(
|
|
245
|
+
sessionId: string,
|
|
246
|
+
projectsDir: string = CLAUDE_CODE_PROJECTS_DIR,
|
|
247
|
+
): string | null {
|
|
248
|
+
const filename = `${sessionId}.jsonl`;
|
|
249
|
+
for (const transcriptPath of findTranscriptFiles(projectsDir)) {
|
|
250
|
+
if (basename(transcriptPath) === filename) return transcriptPath;
|
|
251
|
+
}
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Build a SessionTelemetryRecord directly from a transcript file.
|
|
257
|
+
*/
|
|
258
|
+
export function buildTelemetryFromTranscript(
|
|
259
|
+
sessionId: string,
|
|
260
|
+
transcriptPath: string,
|
|
261
|
+
source = "claude_code_transcript_fallback",
|
|
262
|
+
): SessionTelemetryRecord | null {
|
|
263
|
+
if (!existsSync(transcriptPath)) return null;
|
|
264
|
+
|
|
265
|
+
const metrics = parseTranscript(transcriptPath);
|
|
266
|
+
const userQueries = extractActionableUserQueries(transcriptPath);
|
|
267
|
+
|
|
268
|
+
let timestamp = userQueries[0]?.timestamp ?? "";
|
|
269
|
+
if (!timestamp) {
|
|
270
|
+
try {
|
|
271
|
+
timestamp = statSync(transcriptPath).mtime.toISOString();
|
|
272
|
+
} catch {
|
|
273
|
+
timestamp = new Date().toISOString();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
timestamp,
|
|
279
|
+
session_id: sessionId,
|
|
280
|
+
cwd: "",
|
|
281
|
+
transcript_path: transcriptPath,
|
|
282
|
+
tool_calls: metrics.tool_calls,
|
|
283
|
+
total_tool_calls: metrics.total_tool_calls,
|
|
284
|
+
bash_commands: metrics.bash_commands,
|
|
285
|
+
skills_triggered: metrics.skills_triggered,
|
|
286
|
+
skills_invoked: metrics.skills_invoked,
|
|
287
|
+
assistant_turns: metrics.assistant_turns,
|
|
288
|
+
errors_encountered: metrics.errors_encountered,
|
|
289
|
+
transcript_chars: metrics.transcript_chars,
|
|
290
|
+
last_user_query: metrics.last_user_query,
|
|
291
|
+
source,
|
|
292
|
+
input_tokens: metrics.input_tokens,
|
|
293
|
+
output_tokens: metrics.output_tokens,
|
|
122
294
|
};
|
|
123
295
|
}
|
|
124
296
|
|
|
@@ -142,14 +314,14 @@ export function getLastUserMessage(transcriptPath: string): string | null {
|
|
|
142
314
|
|
|
143
315
|
// Format 1: top-level role field
|
|
144
316
|
if (entry.role === "user") {
|
|
145
|
-
const text =
|
|
317
|
+
const text = extractActionableUserText(entry.content);
|
|
146
318
|
if (text) return text;
|
|
147
319
|
}
|
|
148
320
|
|
|
149
321
|
// Format 2: nested message object
|
|
150
322
|
const msg = entry.message as Record<string, unknown> | undefined;
|
|
151
323
|
if (msg && typeof msg === "object" && msg.role === "user") {
|
|
152
|
-
const text =
|
|
324
|
+
const text = extractActionableUserText(msg.content);
|
|
153
325
|
if (text) return text;
|
|
154
326
|
}
|
|
155
327
|
}
|
|
@@ -160,6 +332,40 @@ export function getLastUserMessage(transcriptPath: string): string | null {
|
|
|
160
332
|
return null;
|
|
161
333
|
}
|
|
162
334
|
|
|
335
|
+
function extractTextParts(content: unknown): string {
|
|
336
|
+
if (!Array.isArray(content)) return "";
|
|
337
|
+
|
|
338
|
+
return content
|
|
339
|
+
.filter(
|
|
340
|
+
(part): part is Record<string, unknown> =>
|
|
341
|
+
typeof part === "object" &&
|
|
342
|
+
part !== null &&
|
|
343
|
+
(part as Record<string, unknown>).type === "text",
|
|
344
|
+
)
|
|
345
|
+
.map((part) => (part.text as string) ?? "")
|
|
346
|
+
.filter(Boolean)
|
|
347
|
+
.join(" ")
|
|
348
|
+
.trim();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function summarizeCodexFunctionArguments(argumentsText: unknown): string {
|
|
352
|
+
if (typeof argumentsText !== "string" || !argumentsText.trim()) return "";
|
|
353
|
+
|
|
354
|
+
try {
|
|
355
|
+
const parsed = JSON.parse(argumentsText) as Record<string, unknown>;
|
|
356
|
+
return (
|
|
357
|
+
(typeof parsed.cmd === "string" && parsed.cmd.trim()) ||
|
|
358
|
+
(typeof parsed.command === "string" && parsed.command.trim()) ||
|
|
359
|
+
(typeof parsed.file_path === "string" && parsed.file_path.trim()) ||
|
|
360
|
+
(typeof parsed.path === "string" && parsed.path.trim()) ||
|
|
361
|
+
(typeof parsed.query === "string" && parsed.query.trim()) ||
|
|
362
|
+
argumentsText.trim()
|
|
363
|
+
).slice(0, 200);
|
|
364
|
+
} catch {
|
|
365
|
+
return argumentsText.trim().slice(0, 200);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
163
369
|
/**
|
|
164
370
|
* Parse a transcript into a human-readable excerpt for the grader.
|
|
165
371
|
*/
|
|
@@ -184,19 +390,13 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
184
390
|
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
185
391
|
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
186
392
|
const entryContent = msg.content ?? entry.content ?? "";
|
|
393
|
+
const eventType = (entry.type as string) ?? "";
|
|
187
394
|
|
|
188
395
|
if (role === "user") {
|
|
189
396
|
if (typeof entryContent === "string") {
|
|
190
397
|
readable.push(`[USER] ${entryContent.slice(0, 200)}`);
|
|
191
398
|
} else if (Array.isArray(entryContent)) {
|
|
192
|
-
const
|
|
193
|
-
.filter(
|
|
194
|
-
(p): p is Record<string, unknown> =>
|
|
195
|
-
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
196
|
-
)
|
|
197
|
-
.map((p) => (p.text as string) ?? "")
|
|
198
|
-
.filter(Boolean);
|
|
199
|
-
const text = texts.join(" ").trim().slice(0, 200);
|
|
399
|
+
const text = extractTextParts(entryContent).slice(0, 200);
|
|
200
400
|
if (text) readable.push(`[USER] ${text}`);
|
|
201
401
|
}
|
|
202
402
|
} else if (role === "assistant") {
|
|
@@ -218,6 +418,45 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
218
418
|
}
|
|
219
419
|
}
|
|
220
420
|
}
|
|
421
|
+
} else if (eventType === "event_msg") {
|
|
422
|
+
const payload = (entry.payload as Record<string, unknown>) ?? {};
|
|
423
|
+
if (payload.type === "user_message") {
|
|
424
|
+
const text = extractActionableUserText(payload.message)?.slice(0, 200) ?? "";
|
|
425
|
+
if (text) readable.push(`[USER] ${text}`);
|
|
426
|
+
}
|
|
427
|
+
} else if (eventType === "turn.completed") {
|
|
428
|
+
const text = extractActionableUserText(entry.user_message)?.slice(0, 200) ?? "";
|
|
429
|
+
if (text) readable.push(`[USER] ${text}`);
|
|
430
|
+
} else if (eventType === "response_item") {
|
|
431
|
+
const payload = (entry.payload as Record<string, unknown>) ?? {};
|
|
432
|
+
const itemType = (payload.type as string) ?? "";
|
|
433
|
+
|
|
434
|
+
if (itemType === "function_call") {
|
|
435
|
+
const name = (payload.name as string) ?? "function_call";
|
|
436
|
+
const detail = summarizeCodexFunctionArguments(payload.arguments);
|
|
437
|
+
if (detail) readable.push(`[TOOL:${name}] ${detail}`);
|
|
438
|
+
} else if (itemType === "agent_reasoning") {
|
|
439
|
+
const text = ((payload.text as string) ?? "").trim().slice(0, 200);
|
|
440
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
441
|
+
} else if (itemType === "message" && (payload.role as string) === "assistant") {
|
|
442
|
+
const text = extractTextParts(payload.content).slice(0, 200);
|
|
443
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
444
|
+
}
|
|
445
|
+
} else if (
|
|
446
|
+
eventType === "item.completed" ||
|
|
447
|
+
eventType === "item.started" ||
|
|
448
|
+
eventType === "item.updated"
|
|
449
|
+
) {
|
|
450
|
+
const item = (entry.item as Record<string, unknown>) ?? {};
|
|
451
|
+
const itemType = (item.item_type as string) ?? (item.type as string) ?? "";
|
|
452
|
+
|
|
453
|
+
if (itemType === "command_execution") {
|
|
454
|
+
const command = ((item.command as string) ?? "").trim().slice(0, 200);
|
|
455
|
+
if (command) readable.push(`[TOOL:command_execution] ${command}`);
|
|
456
|
+
} else {
|
|
457
|
+
const text = ((item.text as string) ?? "").trim().slice(0, 200);
|
|
458
|
+
if (text) readable.push(`[ASSISTANT] ${text}`);
|
|
459
|
+
}
|
|
221
460
|
}
|
|
222
461
|
}
|
|
223
462
|
|
|
@@ -269,6 +508,7 @@ function emptyMetrics(): TranscriptMetrics {
|
|
|
269
508
|
total_tool_calls: 0,
|
|
270
509
|
bash_commands: [],
|
|
271
510
|
skills_triggered: [],
|
|
511
|
+
skills_invoked: [],
|
|
272
512
|
assistant_turns: 0,
|
|
273
513
|
errors_encountered: 0,
|
|
274
514
|
transcript_chars: 0,
|
|
@@ -293,3 +533,9 @@ function extractUserText(content: unknown): string | null {
|
|
|
293
533
|
}
|
|
294
534
|
return null;
|
|
295
535
|
}
|
|
536
|
+
|
|
537
|
+
function extractActionableUserText(content: unknown): string | null {
|
|
538
|
+
const text = extractUserText(content);
|
|
539
|
+
if (!text) return null;
|
|
540
|
+
return isActionableQueryText(text) ? text : null;
|
|
541
|
+
}
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* discover.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure analysis functions for discovering multi-skill workflows from
|
|
5
|
+
* telemetry and usage data. No I/O -- CLI wrapper handles reading JSONL.
|
|
6
|
+
*
|
|
7
|
+
* Adapts patterns from composability-v2.ts but removes single-skill scoping
|
|
8
|
+
* to discover ALL multi-skill workflows across the codebase.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type {
|
|
12
|
+
DiscoveredWorkflow,
|
|
13
|
+
SessionTelemetryRecord,
|
|
14
|
+
SkillUsageRecord,
|
|
15
|
+
WorkflowDiscoveryReport,
|
|
16
|
+
} from "../types.js";
|
|
17
|
+
import { clamp } from "../utils/math.js";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Discover multi-skill workflows from telemetry and usage data.
|
|
21
|
+
*
|
|
22
|
+
* Algorithm:
|
|
23
|
+
* 1. Apply window filter to telemetry (sort by timestamp desc, take N)
|
|
24
|
+
* 2. Build session ID set from filtered telemetry
|
|
25
|
+
* 3. Filter usage records to in-scope sessions
|
|
26
|
+
* 4. Group usage by session_id, sort by timestamp, deduplicate consecutive same-skill
|
|
27
|
+
* 5. Keep sequences with 2+ skills
|
|
28
|
+
* 6. Count frequency of each unique sequence, filter by minOccurrences (default 3)
|
|
29
|
+
* 7. For each qualifying sequence compute metrics
|
|
30
|
+
* 8. If --skill provided, filter to workflows containing that skill
|
|
31
|
+
* 9. Sort by occurrence_count descending
|
|
32
|
+
* 10. Return WorkflowDiscoveryReport
|
|
33
|
+
*/
|
|
34
|
+
export function discoverWorkflows(
|
|
35
|
+
telemetry: SessionTelemetryRecord[],
|
|
36
|
+
usage: SkillUsageRecord[],
|
|
37
|
+
options?: { minOccurrences?: number; window?: number; skill?: string },
|
|
38
|
+
): WorkflowDiscoveryReport {
|
|
39
|
+
const minOccurrences = options?.minOccurrences ?? 3;
|
|
40
|
+
|
|
41
|
+
// 1. Apply window: sort by timestamp descending, take last N
|
|
42
|
+
let sessions = telemetry.filter((r) => r && Array.isArray(r.skills_triggered));
|
|
43
|
+
|
|
44
|
+
if (options?.window && options.window > 0) {
|
|
45
|
+
sessions = sessions
|
|
46
|
+
.sort((a, b) => (b.timestamp ?? "").localeCompare(a.timestamp ?? ""))
|
|
47
|
+
.slice(0, options.window);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 2. Build a set of session IDs in scope (after windowing)
|
|
51
|
+
const sessionIdSet = new Set(sessions.map((s) => s.session_id));
|
|
52
|
+
|
|
53
|
+
// 3. Filter usage records to in-scope sessions
|
|
54
|
+
const usageInScope = usage.filter((u) => sessionIdSet.has(u.session_id));
|
|
55
|
+
|
|
56
|
+
// 4. Group usage by session_id
|
|
57
|
+
const usageBySession = new Map<string, SkillUsageRecord[]>();
|
|
58
|
+
for (const u of usageInScope) {
|
|
59
|
+
const group = usageBySession.get(u.session_id);
|
|
60
|
+
if (group) {
|
|
61
|
+
group.push(u);
|
|
62
|
+
} else {
|
|
63
|
+
usageBySession.set(u.session_id, [u]);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Build ordered sequences per session (ALL sessions, no target skill filter)
|
|
68
|
+
const sessionSequences: Array<{
|
|
69
|
+
skills: string[];
|
|
70
|
+
sessionId: string;
|
|
71
|
+
firstQuery: string;
|
|
72
|
+
}> = [];
|
|
73
|
+
|
|
74
|
+
for (const [sessionId, records] of usageBySession) {
|
|
75
|
+
// Sort by timestamp ascending
|
|
76
|
+
const sorted = [...records].sort((a, b) =>
|
|
77
|
+
(a.timestamp ?? "").localeCompare(b.timestamp ?? ""),
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
// Extract skill names, deduplicate consecutive same-skill entries
|
|
81
|
+
const skills: string[] = [];
|
|
82
|
+
for (const r of sorted) {
|
|
83
|
+
if (skills.length === 0 || skills[skills.length - 1] !== r.skill_name) {
|
|
84
|
+
skills.push(r.skill_name);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 5. Only record sequences with 2+ skills
|
|
89
|
+
if (skills.length >= 2) {
|
|
90
|
+
sessionSequences.push({
|
|
91
|
+
skills,
|
|
92
|
+
sessionId,
|
|
93
|
+
firstQuery: sorted[0]?.query ?? "",
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 6. Count frequency of each unique sequence (by JSON key)
|
|
99
|
+
const sequenceCounts = new Map<
|
|
100
|
+
string,
|
|
101
|
+
{ count: number; query: string; skills: string[]; sessionIds: string[] }
|
|
102
|
+
>();
|
|
103
|
+
for (const seq of sessionSequences) {
|
|
104
|
+
const key = JSON.stringify(seq.skills);
|
|
105
|
+
const existing = sequenceCounts.get(key);
|
|
106
|
+
if (existing) {
|
|
107
|
+
existing.count++;
|
|
108
|
+
existing.sessionIds.push(seq.sessionId);
|
|
109
|
+
} else {
|
|
110
|
+
sequenceCounts.set(key, {
|
|
111
|
+
count: 1,
|
|
112
|
+
query: seq.firstQuery,
|
|
113
|
+
skills: seq.skills,
|
|
114
|
+
sessionIds: [seq.sessionId],
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Count all orderings of each skill set (for consistency computation)
|
|
120
|
+
const skillSetCounts = new Map<string, number>();
|
|
121
|
+
for (const seq of sessionSequences) {
|
|
122
|
+
const setKey = JSON.stringify([...seq.skills].sort());
|
|
123
|
+
skillSetCounts.set(setKey, (skillSetCounts.get(setKey) ?? 0) + 1);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Build telemetry lookup by session_id
|
|
127
|
+
const telemetryBySession = new Map<string, SessionTelemetryRecord>();
|
|
128
|
+
for (const s of sessions) {
|
|
129
|
+
telemetryBySession.set(s.session_id, s);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Compute per-skill solo error rates (for avg_errors_individual)
|
|
133
|
+
const skillSoloErrors = new Map<string, { totalErrors: number; count: number }>();
|
|
134
|
+
for (const s of sessions) {
|
|
135
|
+
if (s.skills_triggered.length === 1) {
|
|
136
|
+
const skillName = s.skills_triggered[0];
|
|
137
|
+
const entry = skillSoloErrors.get(skillName);
|
|
138
|
+
if (entry) {
|
|
139
|
+
entry.totalErrors += s.errors_encountered ?? 0;
|
|
140
|
+
entry.count++;
|
|
141
|
+
} else {
|
|
142
|
+
skillSoloErrors.set(skillName, {
|
|
143
|
+
totalErrors: s.errors_encountered ?? 0,
|
|
144
|
+
count: 1,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function getSkillSoloErrorRate(skillName: string): number | undefined {
|
|
151
|
+
const entry = skillSoloErrors.get(skillName);
|
|
152
|
+
if (!entry || entry.count === 0) return undefined;
|
|
153
|
+
return entry.totalErrors / entry.count;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// 7. Build workflows, filtered by minOccurrences
|
|
157
|
+
const workflows: DiscoveredWorkflow[] = [];
|
|
158
|
+
for (const data of sequenceCounts.values()) {
|
|
159
|
+
if (data.count < minOccurrences) continue;
|
|
160
|
+
|
|
161
|
+
// workflow_id = skills.join("->")
|
|
162
|
+
const workflowId = data.skills.join("\u2192");
|
|
163
|
+
|
|
164
|
+
// Get matching telemetry sessions
|
|
165
|
+
const matchingSessions = data.sessionIds
|
|
166
|
+
.map((id) => telemetryBySession.get(id))
|
|
167
|
+
.filter((s): s is SessionTelemetryRecord => s !== undefined);
|
|
168
|
+
|
|
169
|
+
// avg_errors from matching telemetry sessions
|
|
170
|
+
const avgErrors =
|
|
171
|
+
matchingSessions.length > 0
|
|
172
|
+
? matchingSessions.reduce((sum, r) => sum + (r.errors_encountered ?? 0), 0) /
|
|
173
|
+
matchingSessions.length
|
|
174
|
+
: 0;
|
|
175
|
+
|
|
176
|
+
const soloRates = data.skills
|
|
177
|
+
.map((s) => getSkillSoloErrorRate(s))
|
|
178
|
+
.filter((rate): rate is number => rate !== undefined);
|
|
179
|
+
|
|
180
|
+
// avg_errors_individual = max of each skill's solo error rate
|
|
181
|
+
// Note: This differs from composability-v2.ts which uses a single-skill anchor.
|
|
182
|
+
// For multi-skill discovery, we conservatively anchor to the worst solo performer.
|
|
183
|
+
const avgErrorsIndividual = soloRates.length > 0 ? Math.max(...soloRates) : 0;
|
|
184
|
+
|
|
185
|
+
// synergy_score = clamp((individual - together) / (individual + 1), -1, 1)
|
|
186
|
+
// If no solo baseline exists yet, keep the workflow neutral instead of treating missing data as zero.
|
|
187
|
+
const synergyScore =
|
|
188
|
+
soloRates.length > 0
|
|
189
|
+
? clamp((avgErrorsIndividual - avgErrors) / (avgErrorsIndividual + 1), -1, 1)
|
|
190
|
+
: 0;
|
|
191
|
+
|
|
192
|
+
// sequence_consistency = this_order_count / all_orderings_of_same_set
|
|
193
|
+
const setKey = JSON.stringify([...data.skills].sort());
|
|
194
|
+
const totalOrderings = skillSetCounts.get(setKey) ?? data.count;
|
|
195
|
+
const sequenceConsistency = totalOrderings > 0 ? data.count / totalOrderings : 1;
|
|
196
|
+
|
|
197
|
+
// completion_rate = sessions with ALL skills fired / sessions with ANY skill from set
|
|
198
|
+
const skillSet = new Set(data.skills);
|
|
199
|
+
let sessionsWithAny = 0;
|
|
200
|
+
let sessionsWithAll = 0;
|
|
201
|
+
for (const s of sessions) {
|
|
202
|
+
const hasAny = s.skills_triggered.some((sk) => skillSet.has(sk));
|
|
203
|
+
if (hasAny) {
|
|
204
|
+
sessionsWithAny++;
|
|
205
|
+
const hasAll = data.skills.every((sk) => s.skills_triggered.includes(sk));
|
|
206
|
+
if (hasAll) sessionsWithAll++;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
const completionRate = sessionsWithAny > 0 ? sessionsWithAll / sessionsWithAny : 0;
|
|
210
|
+
|
|
211
|
+
// representative_query = first query from first matching session
|
|
212
|
+
const representativeQuery = data.query;
|
|
213
|
+
|
|
214
|
+
// first_seen / last_seen from matching sessions
|
|
215
|
+
const timestamps = matchingSessions
|
|
216
|
+
.map((s) => s.timestamp)
|
|
217
|
+
.filter((t) => t)
|
|
218
|
+
.sort();
|
|
219
|
+
const firstSeen = timestamps[0] ?? "";
|
|
220
|
+
const lastSeen = timestamps[timestamps.length - 1] ?? "";
|
|
221
|
+
|
|
222
|
+
workflows.push({
|
|
223
|
+
workflow_id: workflowId,
|
|
224
|
+
skills: data.skills,
|
|
225
|
+
occurrence_count: data.count,
|
|
226
|
+
avg_errors: avgErrors,
|
|
227
|
+
avg_errors_individual: avgErrorsIndividual,
|
|
228
|
+
synergy_score: synergyScore,
|
|
229
|
+
representative_query: representativeQuery,
|
|
230
|
+
sequence_consistency: sequenceConsistency,
|
|
231
|
+
completion_rate: completionRate,
|
|
232
|
+
first_seen: firstSeen,
|
|
233
|
+
last_seen: lastSeen,
|
|
234
|
+
session_ids: data.sessionIds,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// 8. If --skill provided, filter to workflows containing that skill
|
|
239
|
+
let filtered = workflows;
|
|
240
|
+
if (options?.skill) {
|
|
241
|
+
const skillFilter = options.skill;
|
|
242
|
+
filtered = workflows.filter((w) => w.skills.includes(skillFilter));
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// 9. Sort by occurrence_count descending
|
|
246
|
+
filtered.sort((a, b) => b.occurrence_count - a.occurrence_count);
|
|
247
|
+
|
|
248
|
+
// 10. Return WorkflowDiscoveryReport
|
|
249
|
+
return {
|
|
250
|
+
workflows: filtered,
|
|
251
|
+
total_sessions_analyzed: sessions.length,
|
|
252
|
+
generated_at: new Date().toISOString(),
|
|
253
|
+
};
|
|
254
|
+
}
|