selftune 0.2.16 → 0.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -19
- package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
- package/cli/selftune/alpha-upload/client.ts +51 -1
- package/cli/selftune/alpha-upload/flush.ts +46 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
- package/cli/selftune/alpha-upload-contract.ts +9 -0
- package/cli/selftune/constants.ts +82 -5
- package/cli/selftune/contribute/sanitize.ts +52 -5
- package/cli/selftune/dashboard-contract.ts +100 -0
- package/cli/selftune/dashboard-server.ts +2 -2
- package/cli/selftune/evolution/description-quality.ts +12 -11
- package/cli/selftune/evolution/evolve.ts +214 -51
- package/cli/selftune/evolution/validate-proposal.ts +9 -6
- package/cli/selftune/grading/grade-session.ts +20 -0
- package/cli/selftune/hooks/commit-track.ts +188 -0
- package/cli/selftune/hooks/prompt-log.ts +10 -1
- package/cli/selftune/hooks/session-stop.ts +2 -2
- package/cli/selftune/hooks/skill-eval.ts +15 -1
- package/cli/selftune/hooks/stdin-preview.ts +32 -0
- package/cli/selftune/localdb/direct-write.ts +69 -6
- package/cli/selftune/localdb/queries.ts +552 -7
- package/cli/selftune/localdb/schema.ts +46 -0
- package/cli/selftune/orchestrate.ts +32 -4
- package/cli/selftune/routes/overview.ts +41 -3
- package/cli/selftune/routes/skill-report.ts +88 -17
- package/cli/selftune/types.ts +31 -0
- package/cli/selftune/utils/transcript.ts +210 -1
- package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
- package/package.json +1 -1
- package/packages/telemetry-contract/src/types.ts +11 -0
- package/skill/SKILL.md +29 -1
- package/skill/Workflows/Evolve.md +31 -13
- package/skill/Workflows/ExportCanonical.md +121 -0
- package/skill/Workflows/Hook.md +131 -0
- package/skill/Workflows/Initialize.md +9 -8
- package/skill/Workflows/Orchestrate.md +27 -5
- package/skill/Workflows/Quickstart.md +94 -0
- package/skill/Workflows/RepairSkillUsage.md +87 -0
- package/skill/Workflows/Uninstall.md +82 -0
- package/skill/settings_snippet.json +11 -0
|
@@ -16,9 +16,9 @@ import { parseArgs } from "node:util";
|
|
|
16
16
|
|
|
17
17
|
import { readAlphaIdentity } from "./alpha-identity.js";
|
|
18
18
|
import type { UploadCycleSummary } from "./alpha-upload/index.js";
|
|
19
|
-
import {
|
|
19
|
+
import { getOrchestrateLockPath, SELFTUNE_CONFIG_PATH } from "./constants.js";
|
|
20
20
|
import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
|
|
21
|
-
import type { EvolveResult } from "./evolution/evolve.js";
|
|
21
|
+
import type { EvolveOptions, EvolveResult } from "./evolution/evolve.js";
|
|
22
22
|
import {
|
|
23
23
|
buildDefaultGradingOutputPath,
|
|
24
24
|
deriveExpectationsFromSkill,
|
|
@@ -74,7 +74,7 @@ interface LockInfo {
|
|
|
74
74
|
|
|
75
75
|
const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
|
|
76
76
|
|
|
77
|
-
export function acquireLock(lockPath: string =
|
|
77
|
+
export function acquireLock(lockPath: string = getOrchestrateLockPath()): boolean {
|
|
78
78
|
try {
|
|
79
79
|
if (existsSync(lockPath)) {
|
|
80
80
|
try {
|
|
@@ -98,7 +98,7 @@ export function acquireLock(lockPath: string = ORCHESTRATE_LOCK): boolean {
|
|
|
98
98
|
}
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
export function releaseLock(lockPath: string =
|
|
101
|
+
export function releaseLock(lockPath: string = getOrchestrateLockPath()): void {
|
|
102
102
|
try {
|
|
103
103
|
unlinkSync(lockPath);
|
|
104
104
|
} catch {
|
|
@@ -377,6 +377,33 @@ export const MIN_CANDIDATE_EVIDENCE = 3;
|
|
|
377
377
|
/** Default cooldown hours after a deploy before re-evolving the same skill. */
|
|
378
378
|
export const DEFAULT_COOLDOWN_HOURS = 24;
|
|
379
379
|
|
|
380
|
+
type AutonomousEvolveDefaults = Pick<
|
|
381
|
+
EvolveOptions,
|
|
382
|
+
| "paretoEnabled"
|
|
383
|
+
| "candidateCount"
|
|
384
|
+
| "tokenEfficiencyEnabled"
|
|
385
|
+
| "withBaseline"
|
|
386
|
+
| "validationModel"
|
|
387
|
+
| "cheapLoop"
|
|
388
|
+
| "gateModel"
|
|
389
|
+
| "adaptiveGate"
|
|
390
|
+
| "proposalModel"
|
|
391
|
+
>;
|
|
392
|
+
|
|
393
|
+
// Keep the autonomous loop aligned with the evolve CLI defaults so scheduled
|
|
394
|
+
// runs stay cheap by default and still get a stronger gate before deploy.
|
|
395
|
+
const AUTONOMOUS_EVOLVE_DEFAULTS: AutonomousEvolveDefaults = {
|
|
396
|
+
paretoEnabled: true,
|
|
397
|
+
candidateCount: 3,
|
|
398
|
+
tokenEfficiencyEnabled: false,
|
|
399
|
+
withBaseline: false,
|
|
400
|
+
validationModel: "haiku",
|
|
401
|
+
cheapLoop: true,
|
|
402
|
+
gateModel: "sonnet",
|
|
403
|
+
adaptiveGate: true,
|
|
404
|
+
proposalModel: "haiku",
|
|
405
|
+
};
|
|
406
|
+
|
|
380
407
|
function candidatePriority(skill: SkillStatus, signalCount = 0): number {
|
|
381
408
|
const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
|
|
382
409
|
const missedWeight = Math.min(skill.missedQueries, 50);
|
|
@@ -1012,6 +1039,7 @@ export async function orchestrate(
|
|
|
1012
1039
|
maxIterations: 3,
|
|
1013
1040
|
gradingResults: _readGradingResults(candidate.skill),
|
|
1014
1041
|
syncFirst: false, // We already synced
|
|
1042
|
+
...AUTONOMOUS_EVOLVE_DEFAULTS,
|
|
1015
1043
|
});
|
|
1016
1044
|
|
|
1017
1045
|
candidate.evolveResult = evolveResult;
|
|
@@ -2,14 +2,52 @@
|
|
|
2
2
|
* Route handler: GET /api/v2/overview
|
|
3
3
|
*
|
|
4
4
|
* Returns SQLite-backed overview payload with skill listing and version info.
|
|
5
|
+
* Supports optional cursor-based pagination via query params:
|
|
6
|
+
* ?telemetry_cursor=<json>&telemetry_limit=N&skills_cursor=<json>&skills_limit=N
|
|
5
7
|
*/
|
|
6
8
|
|
|
7
9
|
import type { Database } from "bun:sqlite";
|
|
8
10
|
|
|
9
|
-
import {
|
|
11
|
+
import { parseCursorParam, parseIntParam } from "../dashboard-contract.js";
|
|
12
|
+
import {
|
|
13
|
+
getOverviewPayload,
|
|
14
|
+
getOverviewPayloadPaginated,
|
|
15
|
+
getSkillsList,
|
|
16
|
+
} from "../localdb/queries.js";
|
|
10
17
|
|
|
11
|
-
export function handleOverview(
|
|
12
|
-
|
|
18
|
+
export function handleOverview(
|
|
19
|
+
db: Database,
|
|
20
|
+
version: string,
|
|
21
|
+
searchParams?: URLSearchParams,
|
|
22
|
+
): Response {
|
|
13
23
|
const skills = getSkillsList(db);
|
|
24
|
+
|
|
25
|
+
// Check if any pagination params are provided
|
|
26
|
+
const hasPaginationParams =
|
|
27
|
+
searchParams &&
|
|
28
|
+
(searchParams.has("telemetry_cursor") ||
|
|
29
|
+
searchParams.has("telemetry_limit") ||
|
|
30
|
+
searchParams.has("skills_cursor") ||
|
|
31
|
+
searchParams.has("skills_limit"));
|
|
32
|
+
|
|
33
|
+
if (!hasPaginationParams) {
|
|
34
|
+
// Backward-compatible: return the unpaginated overview
|
|
35
|
+
const overview = getOverviewPayload(db);
|
|
36
|
+
return Response.json({ overview, skills, version });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Parse pagination params
|
|
40
|
+
const telemetryCursor = parseCursorParam(searchParams.get("telemetry_cursor"));
|
|
41
|
+
const telemetryLimit = parseIntParam(searchParams.get("telemetry_limit"), 1000);
|
|
42
|
+
const skillsCursor = parseCursorParam(searchParams.get("skills_cursor"));
|
|
43
|
+
const skillsLimit = parseIntParam(searchParams.get("skills_limit"), 2000);
|
|
44
|
+
|
|
45
|
+
const overview = getOverviewPayloadPaginated(db, {
|
|
46
|
+
telemetry_cursor: telemetryCursor,
|
|
47
|
+
telemetry_limit: telemetryLimit,
|
|
48
|
+
skills_cursor: skillsCursor,
|
|
49
|
+
skills_limit: skillsLimit,
|
|
50
|
+
});
|
|
51
|
+
|
|
14
52
|
return Response.json({ overview, skills, version });
|
|
15
53
|
}
|
|
@@ -8,10 +8,21 @@
|
|
|
8
8
|
|
|
9
9
|
import type { Database } from "bun:sqlite";
|
|
10
10
|
|
|
11
|
+
import { parseCursorParam } from "../dashboard-contract.js";
|
|
11
12
|
import { scoreDescription } from "../evolution/description-quality.js";
|
|
12
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
getExecutionMetrics,
|
|
15
|
+
getPendingProposals,
|
|
16
|
+
getSkillCommitSummary,
|
|
17
|
+
getSkillReportPayload,
|
|
18
|
+
safeParseJson,
|
|
19
|
+
} from "../localdb/queries.js";
|
|
13
20
|
|
|
14
|
-
export function handleSkillReport(
|
|
21
|
+
export function handleSkillReport(
|
|
22
|
+
db: Database,
|
|
23
|
+
skillName: string,
|
|
24
|
+
searchParams?: URLSearchParams,
|
|
25
|
+
): Response {
|
|
15
26
|
const report = getSkillReportPayload(db, skillName);
|
|
16
27
|
|
|
17
28
|
// 1. Evolution audit with eval_snapshot
|
|
@@ -87,21 +98,17 @@ export function handleSkillReport(db: Database, skillName: string): Response {
|
|
|
87
98
|
run_count: selftuneRunCount,
|
|
88
99
|
};
|
|
89
100
|
|
|
90
|
-
// 4. Skill invocations — single source of truth
|
|
101
|
+
// 4. Skill invocations — single source of truth (with optional cursor pagination)
|
|
91
102
|
// JOIN prompts to recover query text when si.query is null (canonical records
|
|
92
103
|
// don't carry query; it's only populated via the direct-write hook path).
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
ORDER BY si.occurred_at DESC
|
|
102
|
-
LIMIT 100`,
|
|
103
|
-
)
|
|
104
|
-
.all(skillName) as Array<{
|
|
104
|
+
const invCursor = parseCursorParam(searchParams?.get("invocations_cursor") ?? null);
|
|
105
|
+
const invLimitParam = searchParams?.get("invocations_limit");
|
|
106
|
+
const invLimit = invLimitParam
|
|
107
|
+
? Math.max(1, Math.min(Number.parseInt(invLimitParam, 10) || 100, 10000))
|
|
108
|
+
: 100;
|
|
109
|
+
const invFetchLimit = invLimit + 1;
|
|
110
|
+
|
|
111
|
+
let invocationsWithConfidence: Array<{
|
|
105
112
|
timestamp: string;
|
|
106
113
|
session_id: string;
|
|
107
114
|
skill_name: string;
|
|
@@ -112,8 +119,56 @@ export function handleSkillReport(db: Database, skillName: string): Response {
|
|
|
112
119
|
agent_type: string | null;
|
|
113
120
|
query: string | null;
|
|
114
121
|
source: string | null;
|
|
122
|
+
skill_invocation_id: string;
|
|
115
123
|
}>;
|
|
116
124
|
|
|
125
|
+
if (invCursor) {
|
|
126
|
+
invocationsWithConfidence = db
|
|
127
|
+
.query(
|
|
128
|
+
`SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
|
|
129
|
+
si.invocation_mode, si.triggered, si.confidence, si.tool_name,
|
|
130
|
+
si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source,
|
|
131
|
+
si.skill_invocation_id
|
|
132
|
+
FROM skill_invocations si
|
|
133
|
+
LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
|
|
134
|
+
WHERE si.skill_name = ?
|
|
135
|
+
AND (si.occurred_at < ? OR (si.occurred_at = ? AND si.skill_invocation_id < ?))
|
|
136
|
+
ORDER BY si.occurred_at DESC, si.skill_invocation_id DESC
|
|
137
|
+
LIMIT ?`,
|
|
138
|
+
)
|
|
139
|
+
.all(
|
|
140
|
+
skillName,
|
|
141
|
+
invCursor.timestamp,
|
|
142
|
+
invCursor.timestamp,
|
|
143
|
+
String(invCursor.id),
|
|
144
|
+
invFetchLimit,
|
|
145
|
+
) as typeof invocationsWithConfidence;
|
|
146
|
+
} else {
|
|
147
|
+
invocationsWithConfidence = db
|
|
148
|
+
.query(
|
|
149
|
+
`SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
|
|
150
|
+
si.invocation_mode, si.triggered, si.confidence, si.tool_name,
|
|
151
|
+
si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source,
|
|
152
|
+
si.skill_invocation_id
|
|
153
|
+
FROM skill_invocations si
|
|
154
|
+
LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
|
|
155
|
+
WHERE si.skill_name = ?
|
|
156
|
+
ORDER BY si.occurred_at DESC, si.skill_invocation_id DESC
|
|
157
|
+
LIMIT ?`,
|
|
158
|
+
)
|
|
159
|
+
.all(skillName, invFetchLimit) as typeof invocationsWithConfidence;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const invHasMore = invocationsWithConfidence.length > invLimit;
|
|
163
|
+
const invPageRows = invHasMore
|
|
164
|
+
? invocationsWithConfidence.slice(0, invLimit)
|
|
165
|
+
: invocationsWithConfidence;
|
|
166
|
+
const invLastRow = invPageRows[invPageRows.length - 1];
|
|
167
|
+
const invNextCursor =
|
|
168
|
+
invHasMore && invLastRow
|
|
169
|
+
? { timestamp: invLastRow.timestamp, id: invLastRow.skill_invocation_id }
|
|
170
|
+
: null;
|
|
171
|
+
|
|
117
172
|
// Not-found check — after all enrichment queries so evidence-only skills aren't 404'd
|
|
118
173
|
const hasData =
|
|
119
174
|
report.usage.total_checks > 0 ||
|
|
@@ -121,7 +176,7 @@ export function handleSkillReport(db: Database, skillName: string): Response {
|
|
|
121
176
|
report.evidence.length > 0 ||
|
|
122
177
|
evolution.length > 0 ||
|
|
123
178
|
pending_proposals.length > 0 ||
|
|
124
|
-
|
|
179
|
+
invPageRows.length > 0;
|
|
125
180
|
if (!hasData) {
|
|
126
181
|
return Response.json({ error: "Skill not found" }, { status: 404 });
|
|
127
182
|
}
|
|
@@ -156,6 +211,18 @@ export function handleSkillReport(db: Database, skillName: string): Response {
|
|
|
156
211
|
)
|
|
157
212
|
.get(skillName) as { missed_triggers: number } | null;
|
|
158
213
|
|
|
214
|
+
// 5b. Execution metrics (enrichment columns from execution_facts)
|
|
215
|
+
const skillSessionIds = db
|
|
216
|
+
.query(`SELECT DISTINCT session_id FROM skill_invocations WHERE skill_name = ?`)
|
|
217
|
+
.all(skillName) as Array<{ session_id: string }>;
|
|
218
|
+
const executionMetrics = getExecutionMetrics(
|
|
219
|
+
db,
|
|
220
|
+
skillSessionIds.map((r) => r.session_id),
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
// 5c. Commit summary (from commit_tracking via session join)
|
|
224
|
+
const commitSummary = getSkillCommitSummary(db, skillName);
|
|
225
|
+
|
|
159
226
|
// 6. Prompt texts — prefer matched prompts (the prompt that invoked the skill),
|
|
160
227
|
// fall back to all prompts from sessions that used the skill.
|
|
161
228
|
const promptSamples = db
|
|
@@ -227,16 +294,20 @@ export function handleSkillReport(db: Database, skillName: string): Response {
|
|
|
227
294
|
total_input_tokens: executionRow?.total_input_tokens ?? 0,
|
|
228
295
|
total_output_tokens: executionRow?.total_output_tokens ?? 0,
|
|
229
296
|
},
|
|
230
|
-
canonical_invocations:
|
|
297
|
+
canonical_invocations: invPageRows.map((i) => ({
|
|
231
298
|
...i,
|
|
232
299
|
triggered: i.triggered === 1,
|
|
233
300
|
})),
|
|
301
|
+
invocations_pagination:
|
|
302
|
+
invNextCursor || invCursor ? { next_cursor: invNextCursor, has_more: invHasMore } : undefined,
|
|
234
303
|
duration_stats: {
|
|
235
304
|
avg_duration_ms: executionRow?.avg_duration_ms ?? 0,
|
|
236
305
|
total_duration_ms: executionRow?.total_duration_ms ?? 0,
|
|
237
306
|
execution_count: executionRow?.execution_count ?? 0,
|
|
238
307
|
missed_triggers: missedRow?.missed_triggers ?? 0,
|
|
239
308
|
},
|
|
309
|
+
execution_metrics: executionMetrics,
|
|
310
|
+
commit_summary: commitSummary.total_commits > 0 ? commitSummary : null,
|
|
240
311
|
selftune_stats: selftuneStats,
|
|
241
312
|
prompt_samples: promptSamples.map((p) => ({
|
|
242
313
|
...p,
|
package/cli/selftune/types.ts
CHANGED
|
@@ -93,6 +93,17 @@ export interface SessionTelemetryRecord {
|
|
|
93
93
|
source?: string;
|
|
94
94
|
input_tokens?: number;
|
|
95
95
|
output_tokens?: number;
|
|
96
|
+
cached_input_tokens?: number;
|
|
97
|
+
reasoning_output_tokens?: number;
|
|
98
|
+
cost_usd?: number;
|
|
99
|
+
files_changed?: number;
|
|
100
|
+
lines_added?: number;
|
|
101
|
+
lines_removed?: number;
|
|
102
|
+
lines_modified?: number;
|
|
103
|
+
/** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
|
|
104
|
+
artifact_count?: number;
|
|
105
|
+
/** Inferred session type based on tool distribution. */
|
|
106
|
+
session_type?: SessionType;
|
|
96
107
|
agent_summary?: string;
|
|
97
108
|
rollout_path?: string;
|
|
98
109
|
}
|
|
@@ -140,6 +151,13 @@ export {
|
|
|
140
151
|
CANONICAL_SOURCE_SESSION_KINDS,
|
|
141
152
|
} from "@selftune/telemetry-contract/types";
|
|
142
153
|
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
// Session classification
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
/** Inferred session type based on tool distribution. */
|
|
159
|
+
export type SessionType = "dev" | "research" | "content" | "mixed";
|
|
160
|
+
|
|
143
161
|
// ---------------------------------------------------------------------------
|
|
144
162
|
// Transcript parsing
|
|
145
163
|
// ---------------------------------------------------------------------------
|
|
@@ -156,6 +174,17 @@ export interface TranscriptMetrics {
|
|
|
156
174
|
last_user_query: string;
|
|
157
175
|
input_tokens?: number;
|
|
158
176
|
output_tokens?: number;
|
|
177
|
+
cached_input_tokens?: number;
|
|
178
|
+
reasoning_output_tokens?: number;
|
|
179
|
+
cost_usd?: number;
|
|
180
|
+
files_changed?: number;
|
|
181
|
+
lines_added?: number;
|
|
182
|
+
lines_removed?: number;
|
|
183
|
+
lines_modified?: number;
|
|
184
|
+
/** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
|
|
185
|
+
artifact_count?: number;
|
|
186
|
+
/** Inferred session type based on tool distribution. */
|
|
187
|
+
session_type?: SessionType;
|
|
159
188
|
duration_ms?: number;
|
|
160
189
|
model?: string;
|
|
161
190
|
started_at?: string;
|
|
@@ -290,6 +319,8 @@ export interface ExecutionMetrics {
|
|
|
290
319
|
errors_encountered: number;
|
|
291
320
|
skills_triggered: string[];
|
|
292
321
|
transcript_chars: number;
|
|
322
|
+
artifact_count?: number;
|
|
323
|
+
session_type?: SessionType;
|
|
293
324
|
}
|
|
294
325
|
|
|
295
326
|
// ---------------------------------------------------------------------------
|
|
@@ -6,9 +6,15 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
|
|
6
6
|
import { basename, dirname } from "node:path";
|
|
7
7
|
|
|
8
8
|
import { CLAUDE_CODE_PROJECTS_DIR } from "../constants.js";
|
|
9
|
-
import type { SessionTelemetryRecord, TranscriptMetrics } from "../types.js";
|
|
9
|
+
import type { SessionTelemetryRecord, SessionType, TranscriptMetrics } from "../types.js";
|
|
10
10
|
import { isActionableQueryText } from "./query-filter.js";
|
|
11
11
|
|
|
12
|
+
/** Tools that produce durable output artifacts (not reads or exploration). */
|
|
13
|
+
const ARTIFACT_TOOLS = new Set(["Write", "Edit", "WebFetch", "WebSearch", "Skill", "Agent"]);
|
|
14
|
+
|
|
15
|
+
/** Matches any bash command containing a git invocation. */
|
|
16
|
+
const GIT_CMD_RE = /\bgit\b/;
|
|
17
|
+
|
|
12
18
|
/**
|
|
13
19
|
* Parse a Claude Code transcript JSONL and extract process metrics.
|
|
14
20
|
*
|
|
@@ -32,10 +38,18 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
32
38
|
let lastUserQuery = "";
|
|
33
39
|
let inputTokens = 0;
|
|
34
40
|
let outputTokens = 0;
|
|
41
|
+
let cachedInputTokens = 0;
|
|
42
|
+
let reasoningOutputTokens = 0;
|
|
35
43
|
let firstTimestamp: string | null = null;
|
|
36
44
|
let lastTimestamp: string | null = null;
|
|
37
45
|
let model: string | undefined;
|
|
38
46
|
|
|
47
|
+
// File change tracking (Win 2)
|
|
48
|
+
const changedFiles = new Set<string>();
|
|
49
|
+
let linesAdded = 0;
|
|
50
|
+
let linesRemoved = 0;
|
|
51
|
+
let linesModified = 0;
|
|
52
|
+
|
|
39
53
|
for (const raw of lines) {
|
|
40
54
|
const line = raw.trim();
|
|
41
55
|
if (!line) continue;
|
|
@@ -61,6 +75,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
61
75
|
if (usage && typeof usage === "object") {
|
|
62
76
|
if (typeof usage.input_tokens === "number") inputTokens += usage.input_tokens;
|
|
63
77
|
if (typeof usage.output_tokens === "number") outputTokens += usage.output_tokens;
|
|
78
|
+
// Win 3: Token granularity — cached input tokens
|
|
79
|
+
if (typeof usage.cache_read_input_tokens === "number")
|
|
80
|
+
cachedInputTokens += usage.cache_read_input_tokens;
|
|
81
|
+
if (typeof usage.cache_creation_input_tokens === "number")
|
|
82
|
+
cachedInputTokens += usage.cache_creation_input_tokens;
|
|
83
|
+
// Win 3: Reasoning output tokens
|
|
84
|
+
if (typeof usage.reasoning_output_tokens === "number")
|
|
85
|
+
reasoningOutputTokens += usage.reasoning_output_tokens;
|
|
64
86
|
}
|
|
65
87
|
|
|
66
88
|
// Normalise: unwrap nested message if present
|
|
@@ -119,6 +141,26 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
119
141
|
const cmd = ((inp.command as string) ?? "").trim();
|
|
120
142
|
if (cmd) bashCommands.push(cmd);
|
|
121
143
|
}
|
|
144
|
+
|
|
145
|
+
// Win 2: Track file changes from Write and Edit tools
|
|
146
|
+
if (toolName === "Write" || toolName === "Edit") {
|
|
147
|
+
const fp = (inp.file_path as string) ?? "";
|
|
148
|
+
if (fp) changedFiles.add(fp);
|
|
149
|
+
}
|
|
150
|
+
if (toolName === "Write" && typeof inp.content === "string") {
|
|
151
|
+
linesAdded += inp.content.split("\n").length;
|
|
152
|
+
}
|
|
153
|
+
if (toolName === "Edit") {
|
|
154
|
+
const oldStr = inp.old_string;
|
|
155
|
+
const newStr = inp.new_string;
|
|
156
|
+
if (typeof oldStr === "string" && typeof newStr === "string") {
|
|
157
|
+
const oldLines = oldStr.split("\n").length;
|
|
158
|
+
const newLines = newStr.split("\n").length;
|
|
159
|
+
linesModified += Math.min(oldLines, newLines);
|
|
160
|
+
linesAdded += Math.max(0, newLines - oldLines);
|
|
161
|
+
linesRemoved += Math.max(0, oldLines - newLines);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
122
164
|
}
|
|
123
165
|
}
|
|
124
166
|
}
|
|
@@ -143,6 +185,12 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
143
185
|
}
|
|
144
186
|
}
|
|
145
187
|
|
|
188
|
+
// Compute artifact count: output-producing tool calls
|
|
189
|
+
let artifactCount = 0;
|
|
190
|
+
for (const [tool, count] of Object.entries(toolCalls)) {
|
|
191
|
+
if (ARTIFACT_TOOLS.has(tool)) artifactCount += count;
|
|
192
|
+
}
|
|
193
|
+
|
|
146
194
|
// Compute duration from first to last timestamp
|
|
147
195
|
let durationMs: number | undefined;
|
|
148
196
|
if (firstTimestamp && lastTimestamp && firstTimestamp !== lastTimestamp) {
|
|
@@ -153,6 +201,12 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
153
201
|
}
|
|
154
202
|
}
|
|
155
203
|
|
|
204
|
+
// Win 3: Calculate cost from model and token counts
|
|
205
|
+
const costUsd = calculateCost(model, inputTokens, outputTokens);
|
|
206
|
+
|
|
207
|
+
// Infer session type from tool distribution
|
|
208
|
+
const sessionType = inferSessionType(toolCalls, bashCommands);
|
|
209
|
+
|
|
156
210
|
return {
|
|
157
211
|
tool_calls: toolCalls,
|
|
158
212
|
total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
|
|
@@ -163,8 +217,18 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
|
|
|
163
217
|
errors_encountered: errors,
|
|
164
218
|
transcript_chars: totalChars,
|
|
165
219
|
last_user_query: lastUserQuery,
|
|
220
|
+
// Win 2: File change metrics
|
|
221
|
+
files_changed: changedFiles.size,
|
|
222
|
+
lines_added: linesAdded,
|
|
223
|
+
lines_removed: linesRemoved,
|
|
224
|
+
lines_modified: linesModified,
|
|
225
|
+
artifact_count: artifactCount,
|
|
226
|
+
session_type: sessionType,
|
|
166
227
|
...(inputTokens > 0 ? { input_tokens: inputTokens } : {}),
|
|
167
228
|
...(outputTokens > 0 ? { output_tokens: outputTokens } : {}),
|
|
229
|
+
...(cachedInputTokens > 0 ? { cached_input_tokens: cachedInputTokens } : {}),
|
|
230
|
+
...(reasoningOutputTokens > 0 ? { reasoning_output_tokens: reasoningOutputTokens } : {}),
|
|
231
|
+
...(costUsd !== undefined ? { cost_usd: costUsd } : {}),
|
|
168
232
|
...(durationMs !== undefined ? { duration_ms: durationMs } : {}),
|
|
169
233
|
...(model ? { model } : {}),
|
|
170
234
|
...(firstTimestamp ? { started_at: firstTimestamp } : {}),
|
|
@@ -307,6 +371,16 @@ export function buildTelemetryFromTranscript(
|
|
|
307
371
|
source,
|
|
308
372
|
input_tokens: metrics.input_tokens,
|
|
309
373
|
output_tokens: metrics.output_tokens,
|
|
374
|
+
cached_input_tokens: metrics.cached_input_tokens,
|
|
375
|
+
reasoning_output_tokens: metrics.reasoning_output_tokens,
|
|
376
|
+
cost_usd: metrics.cost_usd,
|
|
377
|
+
files_changed: metrics.files_changed,
|
|
378
|
+
lines_added: metrics.lines_added,
|
|
379
|
+
lines_removed: metrics.lines_removed,
|
|
380
|
+
lines_modified: metrics.lines_modified,
|
|
381
|
+
artifact_count: metrics.artifact_count,
|
|
382
|
+
session_type: metrics.session_type,
|
|
383
|
+
agent_summary: generateSessionSummary(metrics),
|
|
310
384
|
};
|
|
311
385
|
}
|
|
312
386
|
|
|
@@ -518,6 +592,141 @@ export function extractTokenUsage(transcriptPath: string): { input: number; outp
|
|
|
518
592
|
return { input, output };
|
|
519
593
|
}
|
|
520
594
|
|
|
595
|
+
// ---------------------------------------------------------------------------
|
|
596
|
+
// Win 3: Model cost lookup (USD per million tokens)
|
|
597
|
+
// ---------------------------------------------------------------------------
|
|
598
|
+
|
|
599
|
+
const MODEL_COSTS: Record<string, { input: number; output: number }> = {
|
|
600
|
+
"claude-sonnet-4-20250514": { input: 3.0, output: 15.0 },
|
|
601
|
+
"claude-opus-4-20250514": { input: 15.0, output: 75.0 },
|
|
602
|
+
"claude-haiku-3-5-20241022": { input: 0.8, output: 4.0 },
|
|
603
|
+
"claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
|
|
604
|
+
"claude-3-5-haiku-20241022": { input: 0.8, output: 4.0 },
|
|
605
|
+
"claude-3-opus-20240229": { input: 15.0, output: 75.0 },
|
|
606
|
+
"claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
|
|
607
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
608
|
+
};
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Calculate estimated cost in USD from model name and token counts.
|
|
612
|
+
* Returns undefined if the model is unknown or not provided.
|
|
613
|
+
*/
|
|
614
|
+
export function calculateCost(
|
|
615
|
+
model: string | undefined,
|
|
616
|
+
inputTokens: number,
|
|
617
|
+
outputTokens: number,
|
|
618
|
+
): number | undefined {
|
|
619
|
+
if (!model) return undefined;
|
|
620
|
+
const costs =
|
|
621
|
+
MODEL_COSTS[model] ??
|
|
622
|
+
Object.entries(MODEL_COSTS).find(([k]) =>
|
|
623
|
+
model.startsWith(k.split("-").slice(0, -1).join("-")),
|
|
624
|
+
)?.[1];
|
|
625
|
+
if (!costs) return undefined;
|
|
626
|
+
return (inputTokens * costs.input + outputTokens * costs.output) / 1_000_000;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Infer session type from tool call distribution.
|
|
631
|
+
*
|
|
632
|
+
* - "dev": majority of output tools are Write/Edit/Bash with git commands
|
|
633
|
+
* - "research": majority are WebFetch/WebSearch/Read
|
|
634
|
+
* - "content": majority are Write/Edit but no git commands
|
|
635
|
+
* - "mixed": no clear majority
|
|
636
|
+
*/
|
|
637
|
+
export function inferSessionType(
|
|
638
|
+
toolCalls: Record<string, number>,
|
|
639
|
+
bashCommands: string[],
|
|
640
|
+
): "dev" | "research" | "content" | "mixed" {
|
|
641
|
+
const total = Object.values(toolCalls).reduce((a, b) => a + b, 0);
|
|
642
|
+
if (total === 0) return "mixed";
|
|
643
|
+
|
|
644
|
+
const writeEdit = (toolCalls.Write ?? 0) + (toolCalls.Edit ?? 0);
|
|
645
|
+
const research = (toolCalls.WebFetch ?? 0) + (toolCalls.WebSearch ?? 0);
|
|
646
|
+
const bash = toolCalls.Bash ?? 0;
|
|
647
|
+
const read = toolCalls.Read ?? 0;
|
|
648
|
+
const hasGit = bashCommands.some((cmd) => GIT_CMD_RE.test(cmd));
|
|
649
|
+
|
|
650
|
+
// Dev: file mutations + git commands OR bash-heavy with git
|
|
651
|
+
if (hasGit && (writeEdit + bash) / total > 0.3) return "dev";
|
|
652
|
+
|
|
653
|
+
// Research: web tools + read-heavy, low file mutations
|
|
654
|
+
if (research > 0 && research / total > 0.2 && writeEdit / total < 0.15) return "research";
|
|
655
|
+
if (read / total > 0.5 && writeEdit / total < 0.1) return "research";
|
|
656
|
+
|
|
657
|
+
// Content: file mutations but no git
|
|
658
|
+
if (writeEdit / total > 0.2 && !hasGit) return "content";
|
|
659
|
+
|
|
660
|
+
return "mixed";
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Generate a short heuristic session summary from transcript metrics.
|
|
665
|
+
* No LLM call — pure template-based approach. Kept under 120 chars.
|
|
666
|
+
*/
|
|
667
|
+
export function generateSessionSummary(metrics: TranscriptMetrics): string {
|
|
668
|
+
const MAX_LEN = 120;
|
|
669
|
+
const sessionType: SessionType = metrics.session_type ?? "mixed";
|
|
670
|
+
const lastQuery = truncateQuery(metrics.last_user_query, 60);
|
|
671
|
+
|
|
672
|
+
if (metrics.total_tool_calls === 0 && !lastQuery) {
|
|
673
|
+
return "Empty session — no tool calls or queries";
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
const topTools = getTopTools(metrics.tool_calls, 2);
|
|
677
|
+
|
|
678
|
+
let summary: string;
|
|
679
|
+
switch (sessionType) {
|
|
680
|
+
case "dev": {
|
|
681
|
+
const filesChanged = metrics.files_changed ?? 0;
|
|
682
|
+
const toolStr = topTools.length > 0 ? ` via ${topTools.join(", ")}` : "";
|
|
683
|
+
const queryStr = lastQuery ? ` — ${lastQuery}` : "";
|
|
684
|
+
summary = `${filesChanged} files changed${toolStr}${queryStr}`;
|
|
685
|
+
break;
|
|
686
|
+
}
|
|
687
|
+
case "research": {
|
|
688
|
+
const searches = (metrics.tool_calls.WebSearch ?? 0) + (metrics.tool_calls.WebFetch ?? 0);
|
|
689
|
+
const reads = metrics.tool_calls.Read ?? 0;
|
|
690
|
+
const queryStr = lastQuery ? ` — ${lastQuery}` : "";
|
|
691
|
+
summary = `${searches} searches + ${reads} reads${queryStr}`;
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
case "content": {
|
|
695
|
+
const filesChanged = metrics.files_changed ?? 0;
|
|
696
|
+
const queryStr = lastQuery ? ` — ${lastQuery}` : "";
|
|
697
|
+
summary = `${filesChanged} files created/edited${queryStr}`;
|
|
698
|
+
break;
|
|
699
|
+
}
|
|
700
|
+
default: {
|
|
701
|
+
const toolCount = Object.keys(metrics.tool_calls).length;
|
|
702
|
+
const queryStr = lastQuery ? ` — ${lastQuery}` : "";
|
|
703
|
+
summary = `${metrics.total_tool_calls} tool calls across ${toolCount} tools${queryStr}`;
|
|
704
|
+
break;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
if (summary.length > MAX_LEN) {
|
|
709
|
+
return `${summary.slice(0, MAX_LEN - 3)}...`;
|
|
710
|
+
}
|
|
711
|
+
return summary;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/** Get the top N tools by call count. */
|
|
715
|
+
function getTopTools(toolCalls: Record<string, number>, n: number): string[] {
|
|
716
|
+
return Object.entries(toolCalls)
|
|
717
|
+
.sort((a, b) => b[1] - a[1])
|
|
718
|
+
.slice(0, n)
|
|
719
|
+
.map(([name]) => name);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
/** Truncate a query string to maxLen, adding ellipsis if needed. */
|
|
723
|
+
function truncateQuery(query: string, maxLen: number): string {
|
|
724
|
+
const trimmed = query.trim();
|
|
725
|
+
if (!trimmed) return "";
|
|
726
|
+
if (trimmed.length <= maxLen) return trimmed;
|
|
727
|
+
return `${trimmed.slice(0, maxLen - 3)}...`;
|
|
728
|
+
}
|
|
729
|
+
|
|
521
730
|
function emptyMetrics(): TranscriptMetrics {
|
|
522
731
|
return {
|
|
523
732
|
tool_calls: {},
|
|
@@ -143,7 +143,18 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
|
|
|
143
143
|
errors_encountered: number;
|
|
144
144
|
input_tokens?: number;
|
|
145
145
|
output_tokens?: number;
|
|
146
|
+
cached_input_tokens?: number;
|
|
147
|
+
reasoning_output_tokens?: number;
|
|
148
|
+
cost_usd?: number;
|
|
146
149
|
duration_ms?: number;
|
|
150
|
+
files_changed?: number;
|
|
151
|
+
lines_added?: number;
|
|
152
|
+
lines_removed?: number;
|
|
153
|
+
lines_modified?: number;
|
|
154
|
+
/** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
|
|
155
|
+
artifact_count?: number;
|
|
156
|
+
/** Inferred session type based on tool distribution. */
|
|
157
|
+
session_type?: "dev" | "research" | "content" | "mixed";
|
|
147
158
|
completion_status?: CanonicalCompletionStatus;
|
|
148
159
|
end_reason?: string;
|
|
149
160
|
}
|
package/package.json
CHANGED
|
@@ -143,7 +143,18 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
|
|
|
143
143
|
errors_encountered: number;
|
|
144
144
|
input_tokens?: number;
|
|
145
145
|
output_tokens?: number;
|
|
146
|
+
cached_input_tokens?: number;
|
|
147
|
+
reasoning_output_tokens?: number;
|
|
148
|
+
cost_usd?: number;
|
|
146
149
|
duration_ms?: number;
|
|
150
|
+
files_changed?: number;
|
|
151
|
+
lines_added?: number;
|
|
152
|
+
lines_removed?: number;
|
|
153
|
+
lines_modified?: number;
|
|
154
|
+
/** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
|
|
155
|
+
artifact_count?: number;
|
|
156
|
+
/** Inferred session type based on tool distribution. */
|
|
157
|
+
session_type?: "dev" | "research" | "content" | "mixed";
|
|
147
158
|
completion_status?: CanonicalCompletionStatus;
|
|
148
159
|
end_reason?: string;
|
|
149
160
|
}
|