selftune 0.2.16 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +24 -19
  2. package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
  3. package/cli/selftune/alpha-upload/client.ts +51 -1
  4. package/cli/selftune/alpha-upload/flush.ts +46 -5
  5. package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
  6. package/cli/selftune/alpha-upload-contract.ts +9 -0
  7. package/cli/selftune/constants.ts +82 -5
  8. package/cli/selftune/contribute/sanitize.ts +52 -5
  9. package/cli/selftune/dashboard-contract.ts +100 -0
  10. package/cli/selftune/dashboard-server.ts +2 -2
  11. package/cli/selftune/evolution/description-quality.ts +12 -11
  12. package/cli/selftune/evolution/evolve.ts +214 -51
  13. package/cli/selftune/evolution/validate-proposal.ts +9 -6
  14. package/cli/selftune/grading/grade-session.ts +20 -0
  15. package/cli/selftune/hooks/commit-track.ts +188 -0
  16. package/cli/selftune/hooks/prompt-log.ts +10 -1
  17. package/cli/selftune/hooks/session-stop.ts +2 -2
  18. package/cli/selftune/hooks/skill-eval.ts +15 -1
  19. package/cli/selftune/hooks/stdin-preview.ts +32 -0
  20. package/cli/selftune/localdb/direct-write.ts +69 -6
  21. package/cli/selftune/localdb/queries.ts +552 -7
  22. package/cli/selftune/localdb/schema.ts +46 -0
  23. package/cli/selftune/orchestrate.ts +32 -4
  24. package/cli/selftune/routes/overview.ts +41 -3
  25. package/cli/selftune/routes/skill-report.ts +88 -17
  26. package/cli/selftune/types.ts +31 -0
  27. package/cli/selftune/utils/transcript.ts +210 -1
  28. package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
  29. package/package.json +1 -1
  30. package/packages/telemetry-contract/src/types.ts +11 -0
  31. package/skill/SKILL.md +29 -1
  32. package/skill/Workflows/Evolve.md +31 -13
  33. package/skill/Workflows/ExportCanonical.md +121 -0
  34. package/skill/Workflows/Hook.md +131 -0
  35. package/skill/Workflows/Initialize.md +9 -8
  36. package/skill/Workflows/Orchestrate.md +27 -5
  37. package/skill/Workflows/Quickstart.md +94 -0
  38. package/skill/Workflows/RepairSkillUsage.md +87 -0
  39. package/skill/Workflows/Uninstall.md +82 -0
  40. package/skill/settings_snippet.json +11 -0
@@ -16,9 +16,9 @@ import { parseArgs } from "node:util";
16
16
 
17
17
  import { readAlphaIdentity } from "./alpha-identity.js";
18
18
  import type { UploadCycleSummary } from "./alpha-upload/index.js";
19
- import { ORCHESTRATE_LOCK, SELFTUNE_CONFIG_PATH } from "./constants.js";
19
+ import { getOrchestrateLockPath, SELFTUNE_CONFIG_PATH } from "./constants.js";
20
20
  import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
21
- import type { EvolveResult } from "./evolution/evolve.js";
21
+ import type { EvolveOptions, EvolveResult } from "./evolution/evolve.js";
22
22
  import {
23
23
  buildDefaultGradingOutputPath,
24
24
  deriveExpectationsFromSkill,
@@ -74,7 +74,7 @@ interface LockInfo {
74
74
 
75
75
  const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
76
76
 
77
- export function acquireLock(lockPath: string = ORCHESTRATE_LOCK): boolean {
77
+ export function acquireLock(lockPath: string = getOrchestrateLockPath()): boolean {
78
78
  try {
79
79
  if (existsSync(lockPath)) {
80
80
  try {
@@ -98,7 +98,7 @@ export function acquireLock(lockPath: string = ORCHESTRATE_LOCK): boolean {
98
98
  }
99
99
  }
100
100
 
101
- export function releaseLock(lockPath: string = ORCHESTRATE_LOCK): void {
101
+ export function releaseLock(lockPath: string = getOrchestrateLockPath()): void {
102
102
  try {
103
103
  unlinkSync(lockPath);
104
104
  } catch {
@@ -377,6 +377,33 @@ export const MIN_CANDIDATE_EVIDENCE = 3;
377
377
  /** Default cooldown hours after a deploy before re-evolving the same skill. */
378
378
  export const DEFAULT_COOLDOWN_HOURS = 24;
379
379
 
380
+ type AutonomousEvolveDefaults = Pick<
381
+ EvolveOptions,
382
+ | "paretoEnabled"
383
+ | "candidateCount"
384
+ | "tokenEfficiencyEnabled"
385
+ | "withBaseline"
386
+ | "validationModel"
387
+ | "cheapLoop"
388
+ | "gateModel"
389
+ | "adaptiveGate"
390
+ | "proposalModel"
391
+ >;
392
+
393
+ // Keep the autonomous loop aligned with the evolve CLI defaults so scheduled
394
+ // runs stay cheap by default and still get a stronger gate before deploy.
395
+ const AUTONOMOUS_EVOLVE_DEFAULTS: AutonomousEvolveDefaults = {
396
+ paretoEnabled: true,
397
+ candidateCount: 3,
398
+ tokenEfficiencyEnabled: false,
399
+ withBaseline: false,
400
+ validationModel: "haiku",
401
+ cheapLoop: true,
402
+ gateModel: "sonnet",
403
+ adaptiveGate: true,
404
+ proposalModel: "haiku",
405
+ };
406
+
380
407
  function candidatePriority(skill: SkillStatus, signalCount = 0): number {
381
408
  const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
382
409
  const missedWeight = Math.min(skill.missedQueries, 50);
@@ -1012,6 +1039,7 @@ export async function orchestrate(
1012
1039
  maxIterations: 3,
1013
1040
  gradingResults: _readGradingResults(candidate.skill),
1014
1041
  syncFirst: false, // We already synced
1042
+ ...AUTONOMOUS_EVOLVE_DEFAULTS,
1015
1043
  });
1016
1044
 
1017
1045
  candidate.evolveResult = evolveResult;
@@ -2,14 +2,52 @@
2
2
  * Route handler: GET /api/v2/overview
3
3
  *
4
4
  * Returns SQLite-backed overview payload with skill listing and version info.
5
+ * Supports optional cursor-based pagination via query params:
6
+ * ?telemetry_cursor=<json>&telemetry_limit=N&skills_cursor=<json>&skills_limit=N
5
7
  */
6
8
 
7
9
  import type { Database } from "bun:sqlite";
8
10
 
9
- import { getOverviewPayload, getSkillsList } from "../localdb/queries.js";
11
+ import { parseCursorParam, parseIntParam } from "../dashboard-contract.js";
12
+ import {
13
+ getOverviewPayload,
14
+ getOverviewPayloadPaginated,
15
+ getSkillsList,
16
+ } from "../localdb/queries.js";
10
17
 
11
- export function handleOverview(db: Database, version: string): Response {
12
- const overview = getOverviewPayload(db);
18
+ export function handleOverview(
19
+ db: Database,
20
+ version: string,
21
+ searchParams?: URLSearchParams,
22
+ ): Response {
13
23
  const skills = getSkillsList(db);
24
+
25
+ // Check if any pagination params are provided
26
+ const hasPaginationParams =
27
+ searchParams &&
28
+ (searchParams.has("telemetry_cursor") ||
29
+ searchParams.has("telemetry_limit") ||
30
+ searchParams.has("skills_cursor") ||
31
+ searchParams.has("skills_limit"));
32
+
33
+ if (!hasPaginationParams) {
34
+ // Backward-compatible: return the unpaginated overview
35
+ const overview = getOverviewPayload(db);
36
+ return Response.json({ overview, skills, version });
37
+ }
38
+
39
+ // Parse pagination params
40
+ const telemetryCursor = parseCursorParam(searchParams.get("telemetry_cursor"));
41
+ const telemetryLimit = parseIntParam(searchParams.get("telemetry_limit"), 1000);
42
+ const skillsCursor = parseCursorParam(searchParams.get("skills_cursor"));
43
+ const skillsLimit = parseIntParam(searchParams.get("skills_limit"), 2000);
44
+
45
+ const overview = getOverviewPayloadPaginated(db, {
46
+ telemetry_cursor: telemetryCursor,
47
+ telemetry_limit: telemetryLimit,
48
+ skills_cursor: skillsCursor,
49
+ skills_limit: skillsLimit,
50
+ });
51
+
14
52
  return Response.json({ overview, skills, version });
15
53
  }
@@ -8,10 +8,21 @@
8
8
 
9
9
  import type { Database } from "bun:sqlite";
10
10
 
11
+ import { parseCursorParam } from "../dashboard-contract.js";
11
12
  import { scoreDescription } from "../evolution/description-quality.js";
12
- import { getPendingProposals, getSkillReportPayload, safeParseJson } from "../localdb/queries.js";
13
+ import {
14
+ getExecutionMetrics,
15
+ getPendingProposals,
16
+ getSkillCommitSummary,
17
+ getSkillReportPayload,
18
+ safeParseJson,
19
+ } from "../localdb/queries.js";
13
20
 
14
- export function handleSkillReport(db: Database, skillName: string): Response {
21
+ export function handleSkillReport(
22
+ db: Database,
23
+ skillName: string,
24
+ searchParams?: URLSearchParams,
25
+ ): Response {
15
26
  const report = getSkillReportPayload(db, skillName);
16
27
 
17
28
  // 1. Evolution audit with eval_snapshot
@@ -87,21 +98,17 @@ export function handleSkillReport(db: Database, skillName: string): Response {
87
98
  run_count: selftuneRunCount,
88
99
  };
89
100
 
90
- // 4. Skill invocations — single source of truth
101
+ // 4. Skill invocations — single source of truth (with optional cursor pagination)
91
102
  // JOIN prompts to recover query text when si.query is null (canonical records
92
103
  // don't carry query; it's only populated via the direct-write hook path).
93
- const invocationsWithConfidence = db
94
- .query(
95
- `SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
96
- si.invocation_mode, si.triggered, si.confidence, si.tool_name,
97
- si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source
98
- FROM skill_invocations si
99
- LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
100
- WHERE si.skill_name = ?
101
- ORDER BY si.occurred_at DESC
102
- LIMIT 100`,
103
- )
104
- .all(skillName) as Array<{
104
+ const invCursor = parseCursorParam(searchParams?.get("invocations_cursor") ?? null);
105
+ const invLimitParam = searchParams?.get("invocations_limit");
106
+ const invLimit = invLimitParam
107
+ ? Math.max(1, Math.min(Number.parseInt(invLimitParam, 10) || 100, 10000))
108
+ : 100;
109
+ const invFetchLimit = invLimit + 1;
110
+
111
+ let invocationsWithConfidence: Array<{
105
112
  timestamp: string;
106
113
  session_id: string;
107
114
  skill_name: string;
@@ -112,8 +119,56 @@ export function handleSkillReport(db: Database, skillName: string): Response {
112
119
  agent_type: string | null;
113
120
  query: string | null;
114
121
  source: string | null;
122
+ skill_invocation_id: string;
115
123
  }>;
116
124
 
125
+ if (invCursor) {
126
+ invocationsWithConfidence = db
127
+ .query(
128
+ `SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
129
+ si.invocation_mode, si.triggered, si.confidence, si.tool_name,
130
+ si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source,
131
+ si.skill_invocation_id
132
+ FROM skill_invocations si
133
+ LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
134
+ WHERE si.skill_name = ?
135
+ AND (si.occurred_at < ? OR (si.occurred_at = ? AND si.skill_invocation_id < ?))
136
+ ORDER BY si.occurred_at DESC, si.skill_invocation_id DESC
137
+ LIMIT ?`,
138
+ )
139
+ .all(
140
+ skillName,
141
+ invCursor.timestamp,
142
+ invCursor.timestamp,
143
+ String(invCursor.id),
144
+ invFetchLimit,
145
+ ) as typeof invocationsWithConfidence;
146
+ } else {
147
+ invocationsWithConfidence = db
148
+ .query(
149
+ `SELECT si.occurred_at as timestamp, si.session_id, si.skill_name,
150
+ si.invocation_mode, si.triggered, si.confidence, si.tool_name,
151
+ si.agent_type, COALESCE(si.query, p.prompt_text) as query, si.source,
152
+ si.skill_invocation_id
153
+ FROM skill_invocations si
154
+ LEFT JOIN prompts p ON si.matched_prompt_id = p.prompt_id
155
+ WHERE si.skill_name = ?
156
+ ORDER BY si.occurred_at DESC, si.skill_invocation_id DESC
157
+ LIMIT ?`,
158
+ )
159
+ .all(skillName, invFetchLimit) as typeof invocationsWithConfidence;
160
+ }
161
+
162
+ const invHasMore = invocationsWithConfidence.length > invLimit;
163
+ const invPageRows = invHasMore
164
+ ? invocationsWithConfidence.slice(0, invLimit)
165
+ : invocationsWithConfidence;
166
+ const invLastRow = invPageRows[invPageRows.length - 1];
167
+ const invNextCursor =
168
+ invHasMore && invLastRow
169
+ ? { timestamp: invLastRow.timestamp, id: invLastRow.skill_invocation_id }
170
+ : null;
171
+
117
172
  // Not-found check — after all enrichment queries so evidence-only skills aren't 404'd
118
173
  const hasData =
119
174
  report.usage.total_checks > 0 ||
@@ -121,7 +176,7 @@ export function handleSkillReport(db: Database, skillName: string): Response {
121
176
  report.evidence.length > 0 ||
122
177
  evolution.length > 0 ||
123
178
  pending_proposals.length > 0 ||
124
- invocationsWithConfidence.length > 0;
179
+ invPageRows.length > 0;
125
180
  if (!hasData) {
126
181
  return Response.json({ error: "Skill not found" }, { status: 404 });
127
182
  }
@@ -156,6 +211,18 @@ export function handleSkillReport(db: Database, skillName: string): Response {
156
211
  )
157
212
  .get(skillName) as { missed_triggers: number } | null;
158
213
 
214
+ // 5b. Execution metrics (enrichment columns from execution_facts)
215
+ const skillSessionIds = db
216
+ .query(`SELECT DISTINCT session_id FROM skill_invocations WHERE skill_name = ?`)
217
+ .all(skillName) as Array<{ session_id: string }>;
218
+ const executionMetrics = getExecutionMetrics(
219
+ db,
220
+ skillSessionIds.map((r) => r.session_id),
221
+ );
222
+
223
+ // 5c. Commit summary (from commit_tracking via session join)
224
+ const commitSummary = getSkillCommitSummary(db, skillName);
225
+
159
226
  // 6. Prompt texts — prefer matched prompts (the prompt that invoked the skill),
160
227
  // fall back to all prompts from sessions that used the skill.
161
228
  const promptSamples = db
@@ -227,16 +294,20 @@ export function handleSkillReport(db: Database, skillName: string): Response {
227
294
  total_input_tokens: executionRow?.total_input_tokens ?? 0,
228
295
  total_output_tokens: executionRow?.total_output_tokens ?? 0,
229
296
  },
230
- canonical_invocations: invocationsWithConfidence.map((i) => ({
297
+ canonical_invocations: invPageRows.map((i) => ({
231
298
  ...i,
232
299
  triggered: i.triggered === 1,
233
300
  })),
301
+ invocations_pagination:
302
+ invNextCursor || invCursor ? { next_cursor: invNextCursor, has_more: invHasMore } : undefined,
234
303
  duration_stats: {
235
304
  avg_duration_ms: executionRow?.avg_duration_ms ?? 0,
236
305
  total_duration_ms: executionRow?.total_duration_ms ?? 0,
237
306
  execution_count: executionRow?.execution_count ?? 0,
238
307
  missed_triggers: missedRow?.missed_triggers ?? 0,
239
308
  },
309
+ execution_metrics: executionMetrics,
310
+ commit_summary: commitSummary.total_commits > 0 ? commitSummary : null,
240
311
  selftune_stats: selftuneStats,
241
312
  prompt_samples: promptSamples.map((p) => ({
242
313
  ...p,
@@ -93,6 +93,17 @@ export interface SessionTelemetryRecord {
93
93
  source?: string;
94
94
  input_tokens?: number;
95
95
  output_tokens?: number;
96
+ cached_input_tokens?: number;
97
+ reasoning_output_tokens?: number;
98
+ cost_usd?: number;
99
+ files_changed?: number;
100
+ lines_added?: number;
101
+ lines_removed?: number;
102
+ lines_modified?: number;
103
+ /** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
104
+ artifact_count?: number;
105
+ /** Inferred session type based on tool distribution. */
106
+ session_type?: SessionType;
96
107
  agent_summary?: string;
97
108
  rollout_path?: string;
98
109
  }
@@ -140,6 +151,13 @@ export {
140
151
  CANONICAL_SOURCE_SESSION_KINDS,
141
152
  } from "@selftune/telemetry-contract/types";
142
153
 
154
+ // ---------------------------------------------------------------------------
155
+ // Session classification
156
+ // ---------------------------------------------------------------------------
157
+
158
+ /** Inferred session type based on tool distribution. */
159
+ export type SessionType = "dev" | "research" | "content" | "mixed";
160
+
143
161
  // ---------------------------------------------------------------------------
144
162
  // Transcript parsing
145
163
  // ---------------------------------------------------------------------------
@@ -156,6 +174,17 @@ export interface TranscriptMetrics {
156
174
  last_user_query: string;
157
175
  input_tokens?: number;
158
176
  output_tokens?: number;
177
+ cached_input_tokens?: number;
178
+ reasoning_output_tokens?: number;
179
+ cost_usd?: number;
180
+ files_changed?: number;
181
+ lines_added?: number;
182
+ lines_removed?: number;
183
+ lines_modified?: number;
184
+ /** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
185
+ artifact_count?: number;
186
+ /** Inferred session type based on tool distribution. */
187
+ session_type?: SessionType;
159
188
  duration_ms?: number;
160
189
  model?: string;
161
190
  started_at?: string;
@@ -290,6 +319,8 @@ export interface ExecutionMetrics {
290
319
  errors_encountered: number;
291
320
  skills_triggered: string[];
292
321
  transcript_chars: number;
322
+ artifact_count?: number;
323
+ session_type?: SessionType;
293
324
  }
294
325
 
295
326
  // ---------------------------------------------------------------------------
@@ -6,9 +6,15 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
6
6
  import { basename, dirname } from "node:path";
7
7
 
8
8
  import { CLAUDE_CODE_PROJECTS_DIR } from "../constants.js";
9
- import type { SessionTelemetryRecord, TranscriptMetrics } from "../types.js";
9
+ import type { SessionTelemetryRecord, SessionType, TranscriptMetrics } from "../types.js";
10
10
  import { isActionableQueryText } from "./query-filter.js";
11
11
 
12
+ /** Tools that produce durable output artifacts (not reads or exploration). */
13
+ const ARTIFACT_TOOLS = new Set(["Write", "Edit", "WebFetch", "WebSearch", "Skill", "Agent"]);
14
+
15
+ /** Matches any bash command containing a git invocation. */
16
+ const GIT_CMD_RE = /\bgit\b/;
17
+
12
18
  /**
13
19
  * Parse a Claude Code transcript JSONL and extract process metrics.
14
20
  *
@@ -32,10 +38,18 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
32
38
  let lastUserQuery = "";
33
39
  let inputTokens = 0;
34
40
  let outputTokens = 0;
41
+ let cachedInputTokens = 0;
42
+ let reasoningOutputTokens = 0;
35
43
  let firstTimestamp: string | null = null;
36
44
  let lastTimestamp: string | null = null;
37
45
  let model: string | undefined;
38
46
 
47
+ // File change tracking (Win 2)
48
+ const changedFiles = new Set<string>();
49
+ let linesAdded = 0;
50
+ let linesRemoved = 0;
51
+ let linesModified = 0;
52
+
39
53
  for (const raw of lines) {
40
54
  const line = raw.trim();
41
55
  if (!line) continue;
@@ -61,6 +75,14 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
61
75
  if (usage && typeof usage === "object") {
62
76
  if (typeof usage.input_tokens === "number") inputTokens += usage.input_tokens;
63
77
  if (typeof usage.output_tokens === "number") outputTokens += usage.output_tokens;
78
+ // Win 3: Token granularity — cached input tokens
79
+ if (typeof usage.cache_read_input_tokens === "number")
80
+ cachedInputTokens += usage.cache_read_input_tokens;
81
+ if (typeof usage.cache_creation_input_tokens === "number")
82
+ cachedInputTokens += usage.cache_creation_input_tokens;
83
+ // Win 3: Reasoning output tokens
84
+ if (typeof usage.reasoning_output_tokens === "number")
85
+ reasoningOutputTokens += usage.reasoning_output_tokens;
64
86
  }
65
87
 
66
88
  // Normalise: unwrap nested message if present
@@ -119,6 +141,26 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
119
141
  const cmd = ((inp.command as string) ?? "").trim();
120
142
  if (cmd) bashCommands.push(cmd);
121
143
  }
144
+
145
+ // Win 2: Track file changes from Write and Edit tools
146
+ if (toolName === "Write" || toolName === "Edit") {
147
+ const fp = (inp.file_path as string) ?? "";
148
+ if (fp) changedFiles.add(fp);
149
+ }
150
+ if (toolName === "Write" && typeof inp.content === "string") {
151
+ linesAdded += inp.content.split("\n").length;
152
+ }
153
+ if (toolName === "Edit") {
154
+ const oldStr = inp.old_string;
155
+ const newStr = inp.new_string;
156
+ if (typeof oldStr === "string" && typeof newStr === "string") {
157
+ const oldLines = oldStr.split("\n").length;
158
+ const newLines = newStr.split("\n").length;
159
+ linesModified += Math.min(oldLines, newLines);
160
+ linesAdded += Math.max(0, newLines - oldLines);
161
+ linesRemoved += Math.max(0, oldLines - newLines);
162
+ }
163
+ }
122
164
  }
123
165
  }
124
166
  }
@@ -143,6 +185,12 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
143
185
  }
144
186
  }
145
187
 
188
+ // Compute artifact count: output-producing tool calls
189
+ let artifactCount = 0;
190
+ for (const [tool, count] of Object.entries(toolCalls)) {
191
+ if (ARTIFACT_TOOLS.has(tool)) artifactCount += count;
192
+ }
193
+
146
194
  // Compute duration from first to last timestamp
147
195
  let durationMs: number | undefined;
148
196
  if (firstTimestamp && lastTimestamp && firstTimestamp !== lastTimestamp) {
@@ -153,6 +201,12 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
153
201
  }
154
202
  }
155
203
 
204
+ // Win 3: Calculate cost from model and token counts
205
+ const costUsd = calculateCost(model, inputTokens, outputTokens);
206
+
207
+ // Infer session type from tool distribution
208
+ const sessionType = inferSessionType(toolCalls, bashCommands);
209
+
156
210
  return {
157
211
  tool_calls: toolCalls,
158
212
  total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
@@ -163,8 +217,18 @@ export function parseTranscript(transcriptPath: string): TranscriptMetrics {
163
217
  errors_encountered: errors,
164
218
  transcript_chars: totalChars,
165
219
  last_user_query: lastUserQuery,
220
+ // Win 2: File change metrics
221
+ files_changed: changedFiles.size,
222
+ lines_added: linesAdded,
223
+ lines_removed: linesRemoved,
224
+ lines_modified: linesModified,
225
+ artifact_count: artifactCount,
226
+ session_type: sessionType,
166
227
  ...(inputTokens > 0 ? { input_tokens: inputTokens } : {}),
167
228
  ...(outputTokens > 0 ? { output_tokens: outputTokens } : {}),
229
+ ...(cachedInputTokens > 0 ? { cached_input_tokens: cachedInputTokens } : {}),
230
+ ...(reasoningOutputTokens > 0 ? { reasoning_output_tokens: reasoningOutputTokens } : {}),
231
+ ...(costUsd !== undefined ? { cost_usd: costUsd } : {}),
168
232
  ...(durationMs !== undefined ? { duration_ms: durationMs } : {}),
169
233
  ...(model ? { model } : {}),
170
234
  ...(firstTimestamp ? { started_at: firstTimestamp } : {}),
@@ -307,6 +371,16 @@ export function buildTelemetryFromTranscript(
307
371
  source,
308
372
  input_tokens: metrics.input_tokens,
309
373
  output_tokens: metrics.output_tokens,
374
+ cached_input_tokens: metrics.cached_input_tokens,
375
+ reasoning_output_tokens: metrics.reasoning_output_tokens,
376
+ cost_usd: metrics.cost_usd,
377
+ files_changed: metrics.files_changed,
378
+ lines_added: metrics.lines_added,
379
+ lines_removed: metrics.lines_removed,
380
+ lines_modified: metrics.lines_modified,
381
+ artifact_count: metrics.artifact_count,
382
+ session_type: metrics.session_type,
383
+ agent_summary: generateSessionSummary(metrics),
310
384
  };
311
385
  }
312
386
 
@@ -518,6 +592,141 @@ export function extractTokenUsage(transcriptPath: string): { input: number; outp
518
592
  return { input, output };
519
593
  }
520
594
 
595
+ // ---------------------------------------------------------------------------
596
+ // Win 3: Model cost lookup (USD per million tokens)
597
+ // ---------------------------------------------------------------------------
598
+
599
+ const MODEL_COSTS: Record<string, { input: number; output: number }> = {
600
+ "claude-sonnet-4-20250514": { input: 3.0, output: 15.0 },
601
+ "claude-opus-4-20250514": { input: 15.0, output: 75.0 },
602
+ "claude-haiku-3-5-20241022": { input: 0.8, output: 4.0 },
603
+ "claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
604
+ "claude-3-5-haiku-20241022": { input: 0.8, output: 4.0 },
605
+ "claude-3-opus-20240229": { input: 15.0, output: 75.0 },
606
+ "claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
607
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
608
+ };
609
+
610
+ /**
611
+ * Calculate estimated cost in USD from model name and token counts.
612
+ * Returns undefined if the model is unknown or not provided.
613
+ */
614
+ export function calculateCost(
615
+ model: string | undefined,
616
+ inputTokens: number,
617
+ outputTokens: number,
618
+ ): number | undefined {
619
+ if (!model) return undefined;
620
+ const costs =
621
+ MODEL_COSTS[model] ??
622
+ Object.entries(MODEL_COSTS).find(([k]) =>
623
+ model.startsWith(k.split("-").slice(0, -1).join("-")),
624
+ )?.[1];
625
+ if (!costs) return undefined;
626
+ return (inputTokens * costs.input + outputTokens * costs.output) / 1_000_000;
627
+ }
628
+
629
+ /**
630
+ * Infer session type from tool call distribution.
631
+ *
632
+ * - "dev": majority of output tools are Write/Edit/Bash with git commands
633
+ * - "research": majority are WebFetch/WebSearch/Read
634
+ * - "content": majority are Write/Edit but no git commands
635
+ * - "mixed": no clear majority
636
+ */
637
+ export function inferSessionType(
638
+ toolCalls: Record<string, number>,
639
+ bashCommands: string[],
640
+ ): "dev" | "research" | "content" | "mixed" {
641
+ const total = Object.values(toolCalls).reduce((a, b) => a + b, 0);
642
+ if (total === 0) return "mixed";
643
+
644
+ const writeEdit = (toolCalls.Write ?? 0) + (toolCalls.Edit ?? 0);
645
+ const research = (toolCalls.WebFetch ?? 0) + (toolCalls.WebSearch ?? 0);
646
+ const bash = toolCalls.Bash ?? 0;
647
+ const read = toolCalls.Read ?? 0;
648
+ const hasGit = bashCommands.some((cmd) => GIT_CMD_RE.test(cmd));
649
+
650
+ // Dev: file mutations + git commands OR bash-heavy with git
651
+ if (hasGit && (writeEdit + bash) / total > 0.3) return "dev";
652
+
653
+ // Research: web tools + read-heavy, low file mutations
654
+ if (research > 0 && research / total > 0.2 && writeEdit / total < 0.15) return "research";
655
+ if (read / total > 0.5 && writeEdit / total < 0.1) return "research";
656
+
657
+ // Content: file mutations but no git
658
+ if (writeEdit / total > 0.2 && !hasGit) return "content";
659
+
660
+ return "mixed";
661
+ }
662
+
663
+ /**
664
+ * Generate a short heuristic session summary from transcript metrics.
665
+ * No LLM call — pure template-based approach. Kept under 120 chars.
666
+ */
667
+ export function generateSessionSummary(metrics: TranscriptMetrics): string {
668
+ const MAX_LEN = 120;
669
+ const sessionType: SessionType = metrics.session_type ?? "mixed";
670
+ const lastQuery = truncateQuery(metrics.last_user_query, 60);
671
+
672
+ if (metrics.total_tool_calls === 0 && !lastQuery) {
673
+ return "Empty session — no tool calls or queries";
674
+ }
675
+
676
+ const topTools = getTopTools(metrics.tool_calls, 2);
677
+
678
+ let summary: string;
679
+ switch (sessionType) {
680
+ case "dev": {
681
+ const filesChanged = metrics.files_changed ?? 0;
682
+ const toolStr = topTools.length > 0 ? ` via ${topTools.join(", ")}` : "";
683
+ const queryStr = lastQuery ? ` — ${lastQuery}` : "";
684
+ summary = `${filesChanged} files changed${toolStr}${queryStr}`;
685
+ break;
686
+ }
687
+ case "research": {
688
+ const searches = (metrics.tool_calls.WebSearch ?? 0) + (metrics.tool_calls.WebFetch ?? 0);
689
+ const reads = metrics.tool_calls.Read ?? 0;
690
+ const queryStr = lastQuery ? ` — ${lastQuery}` : "";
691
+ summary = `${searches} searches + ${reads} reads${queryStr}`;
692
+ break;
693
+ }
694
+ case "content": {
695
+ const filesChanged = metrics.files_changed ?? 0;
696
+ const queryStr = lastQuery ? ` — ${lastQuery}` : "";
697
+ summary = `${filesChanged} files created/edited${queryStr}`;
698
+ break;
699
+ }
700
+ default: {
701
+ const toolCount = Object.keys(metrics.tool_calls).length;
702
+ const queryStr = lastQuery ? ` — ${lastQuery}` : "";
703
+ summary = `${metrics.total_tool_calls} tool calls across ${toolCount} tools${queryStr}`;
704
+ break;
705
+ }
706
+ }
707
+
708
+ if (summary.length > MAX_LEN) {
709
+ return `${summary.slice(0, MAX_LEN - 3)}...`;
710
+ }
711
+ return summary;
712
+ }
713
+
714
+ /** Get the top N tools by call count. */
715
+ function getTopTools(toolCalls: Record<string, number>, n: number): string[] {
716
+ return Object.entries(toolCalls)
717
+ .sort((a, b) => b[1] - a[1])
718
+ .slice(0, n)
719
+ .map(([name]) => name);
720
+ }
721
+
722
+ /** Truncate a query string to maxLen, adding ellipsis if needed. */
723
+ function truncateQuery(query: string, maxLen: number): string {
724
+ const trimmed = query.trim();
725
+ if (!trimmed) return "";
726
+ if (trimmed.length <= maxLen) return trimmed;
727
+ return `${trimmed.slice(0, maxLen - 3)}...`;
728
+ }
729
+
521
730
  function emptyMetrics(): TranscriptMetrics {
522
731
  return {
523
732
  tool_calls: {},
@@ -143,7 +143,18 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
143
143
  errors_encountered: number;
144
144
  input_tokens?: number;
145
145
  output_tokens?: number;
146
+ cached_input_tokens?: number;
147
+ reasoning_output_tokens?: number;
148
+ cost_usd?: number;
146
149
  duration_ms?: number;
150
+ files_changed?: number;
151
+ lines_added?: number;
152
+ lines_removed?: number;
153
+ lines_modified?: number;
154
+ /** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
155
+ artifact_count?: number;
156
+ /** Inferred session type based on tool distribution. */
157
+ session_type?: "dev" | "research" | "content" | "mixed";
147
158
  completion_status?: CanonicalCompletionStatus;
148
159
  end_reason?: string;
149
160
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "selftune",
3
- "version": "0.2.16",
3
+ "version": "0.2.18",
4
4
  "description": "Self-improving skills CLI for AI agents",
5
5
  "keywords": [
6
6
  "agent",
@@ -143,7 +143,18 @@ export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase
143
143
  errors_encountered: number;
144
144
  input_tokens?: number;
145
145
  output_tokens?: number;
146
+ cached_input_tokens?: number;
147
+ reasoning_output_tokens?: number;
148
+ cost_usd?: number;
146
149
  duration_ms?: number;
150
+ files_changed?: number;
151
+ lines_added?: number;
152
+ lines_removed?: number;
153
+ lines_modified?: number;
154
+ /** Count of output-producing tool calls (Write, Edit, WebFetch, WebSearch, Skill, Agent). */
155
+ artifact_count?: number;
156
+ /** Inferred session type based on tool distribution. */
157
+ session_type?: "dev" | "research" | "content" | "mixed";
147
158
  completion_status?: CanonicalCompletionStatus;
148
159
  end_reason?: string;
149
160
  }