selftune 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +259 -0
- package/bin/selftune.cjs +29 -0
- package/cli/selftune/constants.ts +71 -0
- package/cli/selftune/eval/hooks-to-evals.ts +422 -0
- package/cli/selftune/evolution/audit.ts +44 -0
- package/cli/selftune/evolution/deploy-proposal.ts +244 -0
- package/cli/selftune/evolution/evolve.ts +406 -0
- package/cli/selftune/evolution/extract-patterns.ts +145 -0
- package/cli/selftune/evolution/propose-description.ts +146 -0
- package/cli/selftune/evolution/rollback.ts +242 -0
- package/cli/selftune/evolution/stopping-criteria.ts +69 -0
- package/cli/selftune/evolution/validate-proposal.ts +137 -0
- package/cli/selftune/grading/grade-session.ts +459 -0
- package/cli/selftune/hooks/prompt-log.ts +52 -0
- package/cli/selftune/hooks/session-stop.ts +54 -0
- package/cli/selftune/hooks/skill-eval.ts +73 -0
- package/cli/selftune/index.ts +104 -0
- package/cli/selftune/ingestors/codex-rollout.ts +416 -0
- package/cli/selftune/ingestors/codex-wrapper.ts +332 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +565 -0
- package/cli/selftune/init.ts +297 -0
- package/cli/selftune/monitoring/watch.ts +328 -0
- package/cli/selftune/observability.ts +255 -0
- package/cli/selftune/types.ts +255 -0
- package/cli/selftune/utils/jsonl.ts +75 -0
- package/cli/selftune/utils/llm-call.ts +192 -0
- package/cli/selftune/utils/logging.ts +40 -0
- package/cli/selftune/utils/schema-validator.ts +47 -0
- package/cli/selftune/utils/seeded-random.ts +31 -0
- package/cli/selftune/utils/transcript.ts +260 -0
- package/package.json +29 -0
- package/skill/SKILL.md +120 -0
- package/skill/Workflows/Doctor.md +145 -0
- package/skill/Workflows/Evals.md +193 -0
- package/skill/Workflows/Evolve.md +159 -0
- package/skill/Workflows/Grade.md +157 -0
- package/skill/Workflows/Ingest.md +159 -0
- package/skill/Workflows/Initialize.md +125 -0
- package/skill/Workflows/Rollback.md +131 -0
- package/skill/Workflows/Watch.md +128 -0
- package/skill/references/grading-methodology.md +176 -0
- package/skill/references/invocation-taxonomy.md +144 -0
- package/skill/references/logs.md +168 -0
- package/skill/settings_snippet.json +41 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared LLM call utility.
|
|
3
|
+
*
|
|
4
|
+
* Provides a unified interface for calling LLMs via agent subprocess
|
|
5
|
+
* (claude/codex/opencode) or via the direct Anthropic API. Extracted from
|
|
6
|
+
* grade-session.ts so other modules can reuse the same calling logic.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
10
|
+
import { tmpdir } from "node:os";
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
|
|
13
|
+
import { AGENT_CANDIDATES, API_URL, MODEL } from "../constants.js";
|
|
14
|
+
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Agent detection
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
/** Detect first available agent CLI in PATH. */
|
|
20
|
+
export function detectAgent(): string | null {
|
|
21
|
+
for (const agent of AGENT_CANDIDATES) {
|
|
22
|
+
if (Bun.which(agent)) return agent;
|
|
23
|
+
}
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Markdown fence stripping
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
/** Strip markdown code fences from LLM response text. */
|
|
32
|
+
export function stripMarkdownFences(raw: string): string {
|
|
33
|
+
let text = raw.trim();
|
|
34
|
+
|
|
35
|
+
// Strip fence layers (handles nested fences by repeating)
|
|
36
|
+
let stripped = true;
|
|
37
|
+
while (stripped) {
|
|
38
|
+
stripped = false;
|
|
39
|
+
const fenceMatch = text.match(/(`{3,})/);
|
|
40
|
+
if (fenceMatch) {
|
|
41
|
+
const fence = fenceMatch[0]; // e.g. "```" or "````"
|
|
42
|
+
const fenceStart = fenceMatch.index ?? 0;
|
|
43
|
+
// Jump to the fence
|
|
44
|
+
let inner = text.slice(fenceStart);
|
|
45
|
+
// Remove opening fence line (```json or ```)
|
|
46
|
+
const newlineIdx = inner.indexOf("\n");
|
|
47
|
+
inner = newlineIdx >= 0 ? inner.slice(newlineIdx + 1) : inner.slice(fence.length);
|
|
48
|
+
// Find matching closing fence (same length of backticks on its own line)
|
|
49
|
+
const closingPattern = new RegExp(`^${fence.replace(/`/g, "\\`")}\\s*$`, "m");
|
|
50
|
+
const closingMatch = inner.match(closingPattern);
|
|
51
|
+
if (closingMatch && closingMatch.index != null) {
|
|
52
|
+
inner = inner.slice(0, closingMatch.index);
|
|
53
|
+
}
|
|
54
|
+
const result = inner.trim();
|
|
55
|
+
if (result !== text) {
|
|
56
|
+
text = result;
|
|
57
|
+
stripped = true;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Find first { in case there's preamble text
|
|
63
|
+
const braceIdx = text.indexOf("{");
|
|
64
|
+
if (braceIdx > 0) {
|
|
65
|
+
text = text.slice(braceIdx);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return text;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Call LLM via agent subprocess
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
/** Call LLM via agent subprocess (claude/codex/opencode). Returns raw text. */
|
|
76
|
+
export async function callViaAgent(
|
|
77
|
+
systemPrompt: string,
|
|
78
|
+
userPrompt: string,
|
|
79
|
+
agent: string,
|
|
80
|
+
): Promise<string> {
|
|
81
|
+
// Write prompt to temp file to avoid shell quoting issues
|
|
82
|
+
const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
|
|
83
|
+
writeFileSync(promptFile, `${systemPrompt}\n\n${userPrompt}`, "utf-8");
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const promptContent = readFileSync(promptFile, "utf-8");
|
|
87
|
+
let cmd: string[];
|
|
88
|
+
|
|
89
|
+
if (agent === "claude") {
|
|
90
|
+
cmd = ["claude", "-p", promptContent];
|
|
91
|
+
} else if (agent === "codex") {
|
|
92
|
+
cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
|
|
93
|
+
} else if (agent === "opencode") {
|
|
94
|
+
cmd = ["opencode", "-p", promptContent, "-f", "text", "-q"];
|
|
95
|
+
} else {
|
|
96
|
+
throw new Error(`Unknown agent: ${agent}`);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const proc = Bun.spawn(cmd, {
|
|
100
|
+
stdout: "pipe",
|
|
101
|
+
stderr: "pipe",
|
|
102
|
+
env: { ...process.env, CLAUDECODE: "" },
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// 120s timeout
|
|
106
|
+
const timeout = setTimeout(() => proc.kill(), 120_000);
|
|
107
|
+
const exitCode = await proc.exited;
|
|
108
|
+
clearTimeout(timeout);
|
|
109
|
+
|
|
110
|
+
if (exitCode !== 0) {
|
|
111
|
+
const stderr = await new Response(proc.stderr).text();
|
|
112
|
+
throw new Error(
|
|
113
|
+
`Agent '${agent}' exited with code ${exitCode}.\nstderr: ${stderr.slice(0, 500)}`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const raw = await new Response(proc.stdout).text();
|
|
118
|
+
return raw;
|
|
119
|
+
} finally {
|
|
120
|
+
try {
|
|
121
|
+
const { unlinkSync } = await import("node:fs");
|
|
122
|
+
unlinkSync(promptFile);
|
|
123
|
+
} catch {
|
|
124
|
+
// ignore cleanup errors
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// Call LLM via direct Anthropic API
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
/** Call LLM via direct Anthropic API. Returns raw text. */
|
|
134
|
+
export async function callViaApi(systemPrompt: string, userPrompt: string): Promise<string> {
|
|
135
|
+
const apiKey = process.env.ANTHROPIC_API_KEY ?? "";
|
|
136
|
+
if (!apiKey) {
|
|
137
|
+
throw new Error(
|
|
138
|
+
"ANTHROPIC_API_KEY not set. Use --use-agent to grade via your " +
|
|
139
|
+
"installed Claude Code / Codex / OpenCode subscription instead.",
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const payload = {
|
|
144
|
+
model: MODEL,
|
|
145
|
+
max_tokens: 2000,
|
|
146
|
+
system: systemPrompt,
|
|
147
|
+
messages: [{ role: "user", content: userPrompt }],
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
const resp = await fetch(API_URL, {
|
|
151
|
+
method: "POST",
|
|
152
|
+
headers: {
|
|
153
|
+
"Content-Type": "application/json",
|
|
154
|
+
"x-api-key": apiKey,
|
|
155
|
+
"anthropic-version": "2023-06-01",
|
|
156
|
+
},
|
|
157
|
+
body: JSON.stringify(payload),
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
if (!resp.ok) {
|
|
161
|
+
const body = await resp.text();
|
|
162
|
+
throw new Error(`API error ${resp.status}: ${body}`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const data = await resp.json();
|
|
166
|
+
let raw = "";
|
|
167
|
+
for (const block of data.content ?? []) {
|
|
168
|
+
if (block.type === "text") raw += block.text ?? "";
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return raw;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Unified dispatcher
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
/** Dispatch to callViaAgent or callViaApi. Returns raw text. */
|
|
179
|
+
export async function callLlm(
|
|
180
|
+
systemPrompt: string,
|
|
181
|
+
userPrompt: string,
|
|
182
|
+
mode: "agent" | "api",
|
|
183
|
+
agent?: string,
|
|
184
|
+
): Promise<string> {
|
|
185
|
+
if (mode === "agent") {
|
|
186
|
+
if (!agent) {
|
|
187
|
+
throw new Error("Agent must be specified when mode is 'agent'");
|
|
188
|
+
}
|
|
189
|
+
return callViaAgent(systemPrompt, userPrompt, agent);
|
|
190
|
+
}
|
|
191
|
+
return callViaApi(systemPrompt, userPrompt);
|
|
192
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured JSON logging to stderr.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface Logger {
|
|
6
|
+
info(message: string): void;
|
|
7
|
+
warn(message: string): void;
|
|
8
|
+
error(message: string, err?: unknown): void;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Create a structured JSON logger for a selftune module.
|
|
13
|
+
* Output goes to stderr as one JSON line per log call.
|
|
14
|
+
*/
|
|
15
|
+
export function createLogger(module: string): Logger {
|
|
16
|
+
function emit(level: string, message: string, exception?: string): void {
|
|
17
|
+
const entry: Record<string, string> = {
|
|
18
|
+
timestamp: new Date().toISOString(),
|
|
19
|
+
level,
|
|
20
|
+
module,
|
|
21
|
+
message,
|
|
22
|
+
};
|
|
23
|
+
if (exception) entry.exception = exception;
|
|
24
|
+
process.stderr.write(`${JSON.stringify(entry)}\n`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
info(message: string) {
|
|
29
|
+
emit("INFO", message);
|
|
30
|
+
},
|
|
31
|
+
warn(message: string) {
|
|
32
|
+
emit("WARN", message);
|
|
33
|
+
},
|
|
34
|
+
error(message: string, err?: unknown) {
|
|
35
|
+
const exception =
|
|
36
|
+
err instanceof Error ? `${err.name}: ${err.message}\n${err.stack}` : undefined;
|
|
37
|
+
emit("ERROR", message, exception);
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSONL schema validator for selftune log records.
|
|
3
|
+
* Validates records against REQUIRED_FIELDS from constants.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { REQUIRED_FIELDS } from "../constants.js";
|
|
7
|
+
|
|
8
|
+
export type LogType = "session_telemetry" | "skill_usage" | "all_queries";
|
|
9
|
+
|
|
10
|
+
export interface ValidationResult {
|
|
11
|
+
valid: boolean;
|
|
12
|
+
errors: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Fields that must be strings when present. */
|
|
16
|
+
const STRING_FIELDS = new Set(["timestamp", "session_id", "query", "skill_name", "source"]);
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Validate a record against the schema for the given log type.
|
|
20
|
+
* Checks field presence and basic type constraints.
|
|
21
|
+
*/
|
|
22
|
+
export function validateRecord(record: unknown, logType: LogType): ValidationResult {
|
|
23
|
+
const errors: string[] = [];
|
|
24
|
+
|
|
25
|
+
if (record === null || typeof record !== "object" || Array.isArray(record)) {
|
|
26
|
+
errors.push("Record must be a non-null object");
|
|
27
|
+
return { valid: false, errors };
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const rec = record as Record<string, unknown>;
|
|
31
|
+
const requiredFields = REQUIRED_FIELDS[logType];
|
|
32
|
+
|
|
33
|
+
if (!requiredFields) {
|
|
34
|
+
errors.push(`Unknown log type: ${logType}`);
|
|
35
|
+
return { valid: false, errors };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
for (const field of requiredFields) {
|
|
39
|
+
if (!(field in rec)) {
|
|
40
|
+
errors.push(`Missing required field: ${field}`);
|
|
41
|
+
} else if (STRING_FIELDS.has(field) && typeof rec[field] !== "string") {
|
|
42
|
+
errors.push(`Field "${field}" must be a string, got ${typeof rec[field]}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return { valid: errors.length === 0, errors };
|
|
47
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic PRNG using mulberry32 algorithm.
|
|
3
|
+
* Used for reproducible eval set shuffling.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Create a seeded random number generator (mulberry32).
|
|
8
|
+
* Returns a function that produces numbers in [0, 1).
|
|
9
|
+
*/
|
|
10
|
+
export function mulberry32(seed: number): () => number {
|
|
11
|
+
let s = seed | 0;
|
|
12
|
+
return () => {
|
|
13
|
+
s = (s + 0x6d2b79f5) | 0;
|
|
14
|
+
let t = Math.imul(s ^ (s >>> 15), 1 | s);
|
|
15
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
16
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Fisher-Yates shuffle using a seeded PRNG.
|
|
22
|
+
*/
|
|
23
|
+
export function seededShuffle<T>(array: T[], seed: number): T[] {
|
|
24
|
+
const result = [...array];
|
|
25
|
+
const rng = mulberry32(seed);
|
|
26
|
+
for (let i = result.length - 1; i > 0; i--) {
|
|
27
|
+
const j = Math.floor(rng() * (i + 1));
|
|
28
|
+
[result[i], result[j]] = [result[j], result[i]];
|
|
29
|
+
}
|
|
30
|
+
return result;
|
|
31
|
+
}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript parsing utilities shared by hooks and grading.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
6
|
+
import { basename, dirname } from "node:path";
|
|
7
|
+
import type { TranscriptMetrics } from "../types.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Parse a Claude Code transcript JSONL and extract process metrics.
|
|
11
|
+
*
|
|
12
|
+
* Handles two observed transcript variants:
|
|
13
|
+
* Variant A (newer): {"type": "user", "message": {"role": "user", "content": [...]}}
|
|
14
|
+
* Variant B (older): {"role": "user", "content": "..."}
|
|
15
|
+
*/
|
|
16
|
+
export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
17
|
+
if (!existsSync(transcriptPath)) return emptyMetrics();
|
|
18
|
+
|
|
19
|
+
const content = readFileSync(transcriptPath, "utf-8");
|
|
20
|
+
const lines = content.split("\n");
|
|
21
|
+
const totalChars = lines.reduce((sum, l) => sum + l.length, 0);
|
|
22
|
+
|
|
23
|
+
const toolCalls: Record<string, number> = {};
|
|
24
|
+
const bashCommands: string[] = [];
|
|
25
|
+
const skillsTriggered: string[] = [];
|
|
26
|
+
let errors = 0;
|
|
27
|
+
let assistantTurns = 0;
|
|
28
|
+
let lastUserQuery = "";
|
|
29
|
+
|
|
30
|
+
for (const raw of lines) {
|
|
31
|
+
const line = raw.trim();
|
|
32
|
+
if (!line) continue;
|
|
33
|
+
|
|
34
|
+
let entry: Record<string, unknown>;
|
|
35
|
+
try {
|
|
36
|
+
entry = JSON.parse(line);
|
|
37
|
+
} catch {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Normalise: unwrap nested message if present
|
|
42
|
+
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
43
|
+
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
44
|
+
const content = msg.content ?? entry.content ?? "";
|
|
45
|
+
|
|
46
|
+
// Track last user query
|
|
47
|
+
if (role === "user") {
|
|
48
|
+
if (typeof content === "string" && content.trim()) {
|
|
49
|
+
lastUserQuery = content.trim();
|
|
50
|
+
} else if (Array.isArray(content)) {
|
|
51
|
+
const texts = content
|
|
52
|
+
.filter(
|
|
53
|
+
(p): p is Record<string, unknown> =>
|
|
54
|
+
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
55
|
+
)
|
|
56
|
+
.map((p) => (p.text as string) ?? "")
|
|
57
|
+
.filter(Boolean);
|
|
58
|
+
const text = texts.join(" ").trim();
|
|
59
|
+
if (text) lastUserQuery = text;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Count assistant turns and parse tool use
|
|
64
|
+
if (role === "assistant") {
|
|
65
|
+
assistantTurns++;
|
|
66
|
+
const contentBlocks = Array.isArray(content) ? content : [];
|
|
67
|
+
for (const block of contentBlocks) {
|
|
68
|
+
if (typeof block !== "object" || block === null) continue;
|
|
69
|
+
const b = block as Record<string, unknown>;
|
|
70
|
+
if (b.type === "tool_use") {
|
|
71
|
+
const toolName = (b.name as string) ?? "Unknown";
|
|
72
|
+
toolCalls[toolName] = (toolCalls[toolName] ?? 0) + 1;
|
|
73
|
+
const inp = (b.input as Record<string, unknown>) ?? {};
|
|
74
|
+
|
|
75
|
+
// Track SKILL.md reads
|
|
76
|
+
const filePath = (inp.file_path as string) ?? "";
|
|
77
|
+
if (basename(filePath).toUpperCase() === "SKILL.MD") {
|
|
78
|
+
const skillName = basename(dirname(filePath));
|
|
79
|
+
if (!skillsTriggered.includes(skillName)) {
|
|
80
|
+
skillsTriggered.push(skillName);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Track bash commands
|
|
85
|
+
if (toolName === "Bash") {
|
|
86
|
+
const cmd = ((inp.command as string) ?? "").trim();
|
|
87
|
+
if (cmd) bashCommands.push(cmd);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Count tool errors from result entries
|
|
94
|
+
const entryType = entry.type as string;
|
|
95
|
+
if (entryType === "tool_result" && entry.is_error) {
|
|
96
|
+
errors++;
|
|
97
|
+
}
|
|
98
|
+
// Also check inside user content (tool_result blocks)
|
|
99
|
+
if (role === "user" && Array.isArray(content)) {
|
|
100
|
+
for (const block of content) {
|
|
101
|
+
if (
|
|
102
|
+
typeof block === "object" &&
|
|
103
|
+
block !== null &&
|
|
104
|
+
(block as Record<string, unknown>).type === "tool_result" &&
|
|
105
|
+
(block as Record<string, unknown>).is_error
|
|
106
|
+
) {
|
|
107
|
+
errors++;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
tool_calls: toolCalls,
|
|
115
|
+
total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
|
|
116
|
+
bash_commands: bashCommands,
|
|
117
|
+
skills_triggered: skillsTriggered,
|
|
118
|
+
assistant_turns: assistantTurns,
|
|
119
|
+
errors_encountered: errors,
|
|
120
|
+
transcript_chars: totalChars,
|
|
121
|
+
last_user_query: lastUserQuery,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Walk the transcript JSONL backwards to find the most recent user message.
|
|
127
|
+
*/
|
|
128
|
+
export function getLastUserMessage(transcriptPath: string): string | null {
|
|
129
|
+
if (!existsSync(transcriptPath)) return null;
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
const content = readFileSync(transcriptPath, "utf-8");
|
|
133
|
+
const lines = content.trim().split("\n");
|
|
134
|
+
|
|
135
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
136
|
+
let entry: Record<string, unknown>;
|
|
137
|
+
try {
|
|
138
|
+
entry = JSON.parse(lines[i]);
|
|
139
|
+
} catch {
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Format 1: top-level role field
|
|
144
|
+
if (entry.role === "user") {
|
|
145
|
+
const text = extractUserText(entry.content);
|
|
146
|
+
if (text) return text;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Format 2: nested message object
|
|
150
|
+
const msg = entry.message as Record<string, unknown> | undefined;
|
|
151
|
+
if (msg && typeof msg === "object" && msg.role === "user") {
|
|
152
|
+
const text = extractUserText(msg.content);
|
|
153
|
+
if (text) return text;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} catch {
|
|
157
|
+
// silent
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse a transcript into a human-readable excerpt for the grader.
|
|
165
|
+
*/
|
|
166
|
+
export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
167
|
+
if (!existsSync(transcriptPath)) return "(transcript not found)";
|
|
168
|
+
|
|
169
|
+
const content = readFileSync(transcriptPath, "utf-8");
|
|
170
|
+
const lines = content.trim().split("\n");
|
|
171
|
+
const readable: string[] = [];
|
|
172
|
+
|
|
173
|
+
for (const raw of lines) {
|
|
174
|
+
const line = raw.trim();
|
|
175
|
+
if (!line) continue;
|
|
176
|
+
|
|
177
|
+
let entry: Record<string, unknown>;
|
|
178
|
+
try {
|
|
179
|
+
entry = JSON.parse(line);
|
|
180
|
+
} catch {
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const msg = (entry.message as Record<string, unknown>) ?? entry;
|
|
185
|
+
const role = (msg.role as string) ?? (entry.role as string) ?? "";
|
|
186
|
+
const entryContent = msg.content ?? entry.content ?? "";
|
|
187
|
+
|
|
188
|
+
if (role === "user") {
|
|
189
|
+
if (typeof entryContent === "string") {
|
|
190
|
+
readable.push(`[USER] ${entryContent.slice(0, 200)}`);
|
|
191
|
+
} else if (Array.isArray(entryContent)) {
|
|
192
|
+
const texts = entryContent
|
|
193
|
+
.filter(
|
|
194
|
+
(p): p is Record<string, unknown> =>
|
|
195
|
+
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
196
|
+
)
|
|
197
|
+
.map((p) => (p.text as string) ?? "")
|
|
198
|
+
.filter(Boolean);
|
|
199
|
+
const text = texts.join(" ").trim().slice(0, 200);
|
|
200
|
+
if (text) readable.push(`[USER] ${text}`);
|
|
201
|
+
}
|
|
202
|
+
} else if (role === "assistant") {
|
|
203
|
+
if (Array.isArray(entryContent)) {
|
|
204
|
+
for (const block of entryContent) {
|
|
205
|
+
if (typeof block !== "object" || block === null) continue;
|
|
206
|
+
const b = block as Record<string, unknown>;
|
|
207
|
+
if (b.type === "text") {
|
|
208
|
+
readable.push(`[ASSISTANT] ${((b.text as string) ?? "").slice(0, 200)}`);
|
|
209
|
+
} else if (b.type === "tool_use") {
|
|
210
|
+
const name = (b.name as string) ?? "?";
|
|
211
|
+
const inp = (b.input as Record<string, unknown>) ?? {};
|
|
212
|
+
const detail =
|
|
213
|
+
(inp.file_path as string) ??
|
|
214
|
+
(inp.command as string) ??
|
|
215
|
+
(inp.query as string) ??
|
|
216
|
+
JSON.stringify(inp).slice(0, 100);
|
|
217
|
+
readable.push(`[TOOL:${name}] ${detail}`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const full = readable.join("\n");
|
|
225
|
+
if (full.length <= maxChars) return full;
|
|
226
|
+
const head = Math.floor(maxChars * 0.6);
|
|
227
|
+
const tail = maxChars - head;
|
|
228
|
+
return `${full.slice(0, head)}\n\n... [truncated] ...\n\n${full.slice(-tail)}`;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function emptyMetrics(): TranscriptMetrics {
|
|
232
|
+
return {
|
|
233
|
+
tool_calls: {},
|
|
234
|
+
total_tool_calls: 0,
|
|
235
|
+
bash_commands: [],
|
|
236
|
+
skills_triggered: [],
|
|
237
|
+
assistant_turns: 0,
|
|
238
|
+
errors_encountered: 0,
|
|
239
|
+
transcript_chars: 0,
|
|
240
|
+
last_user_query: "",
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function extractUserText(content: unknown): string | null {
|
|
245
|
+
if (typeof content === "string" && content.trim()) {
|
|
246
|
+
return content.trim();
|
|
247
|
+
}
|
|
248
|
+
if (Array.isArray(content)) {
|
|
249
|
+
const texts = content
|
|
250
|
+
.filter(
|
|
251
|
+
(p): p is Record<string, unknown> =>
|
|
252
|
+
typeof p === "object" && p !== null && (p as Record<string, unknown>).type === "text",
|
|
253
|
+
)
|
|
254
|
+
.map((p) => (p.text as string) ?? "")
|
|
255
|
+
.filter(Boolean);
|
|
256
|
+
const combined = texts.join(" ").trim();
|
|
257
|
+
if (combined) return combined;
|
|
258
|
+
}
|
|
259
|
+
return null;
|
|
260
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "selftune",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Skill observability and continuous improvement CLI for agent platforms",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"author": "Daniel Petro",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"bin": {
|
|
9
|
+
"selftune": "bin/selftune.cjs"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"bin/",
|
|
13
|
+
"cli/selftune/",
|
|
14
|
+
"skill/",
|
|
15
|
+
"README.md",
|
|
16
|
+
"CHANGELOG.md"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"lint": "bunx biome check .",
|
|
20
|
+
"lint:fix": "bunx biome check --write .",
|
|
21
|
+
"lint:arch": "bun run lint-architecture.ts",
|
|
22
|
+
"test": "bun test",
|
|
23
|
+
"check": "bun run lint && bun run lint:arch && bun test"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@biomejs/biome": "^1.9.4",
|
|
27
|
+
"@types/bun": "^1.1.0"
|
|
28
|
+
}
|
|
29
|
+
}
|