selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,217 @@
1
+ /**
2
+ * frontmatter.ts
3
+ *
4
+ * Line-based YAML frontmatter parser for SKILL.md files.
5
+ * Extracts name, description, and version without a YAML library.
6
+ */
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // Types
10
+ // ---------------------------------------------------------------------------
11
+
12
+ export interface SkillFrontmatter {
13
+ name: string;
14
+ description: string;
15
+ version: string;
16
+ body: string;
17
+ }
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Parser
21
+ // ---------------------------------------------------------------------------
22
+
23
+ /**
24
+ * Parse YAML frontmatter from a SKILL.md file.
25
+ *
26
+ * Handles two description formats:
27
+ * - Single-line: `description: When the user wants to...`
28
+ * - Folded scalar: `description: >\n Multi-line text...`
29
+ *
30
+ * Handles two version locations:
31
+ * - Top-level: `version: 1.0.0`
32
+ * - Nested: `metadata:\n version: 1.0.0`
33
+ *
34
+ * Returns the full content as description if no frontmatter is found.
35
+ */
36
+ export function parseFrontmatter(content: string): SkillFrontmatter {
37
+ const lines = content.split("\n");
38
+
39
+ // Check for opening delimiter
40
+ if (lines[0]?.trim() !== "---") {
41
+ return { name: "", description: content, version: "", body: content };
42
+ }
43
+
44
+ // Find closing delimiter
45
+ let endIdx = -1;
46
+ for (let i = 1; i < lines.length; i++) {
47
+ if (lines[i].trim() === "---") {
48
+ endIdx = i;
49
+ break;
50
+ }
51
+ }
52
+
53
+ if (endIdx < 0) {
54
+ return { name: "", description: content, version: "", body: content };
55
+ }
56
+
57
+ const yamlLines = lines.slice(1, endIdx);
58
+ const body = lines
59
+ .slice(endIdx + 1)
60
+ .join("\n")
61
+ .replace(/^\n+/, "");
62
+
63
+ let name = "";
64
+ let description = "";
65
+ let version = "";
66
+ let inMetadata = false;
67
+
68
+ for (let i = 0; i < yamlLines.length; i++) {
69
+ const line = yamlLines[i];
70
+ const trimmed = line.trimEnd();
71
+
72
+ // Top-level `name:`
73
+ if (trimmed.startsWith("name:")) {
74
+ name = trimmed.slice("name:".length).trim();
75
+ inMetadata = false;
76
+ continue;
77
+ }
78
+
79
+ // Top-level `version:`
80
+ if (trimmed.startsWith("version:") && !trimmed.startsWith(" ")) {
81
+ version = trimmed.slice("version:".length).trim();
82
+ inMetadata = false;
83
+ continue;
84
+ }
85
+
86
+ // `metadata:` block start
87
+ if (trimmed === "metadata:" || trimmed.startsWith("metadata:")) {
88
+ inMetadata = true;
89
+ continue;
90
+ }
91
+
92
+ // Nested `version:` inside metadata
93
+ if (inMetadata && /^\s+version:/.test(trimmed)) {
94
+ version = trimmed.replace(/^\s+version:\s*/, "");
95
+ continue;
96
+ }
97
+
98
+ // Top-level `description:` — single-line or folded scalar
99
+ if (trimmed.startsWith("description:")) {
100
+ inMetadata = false;
101
+ const afterKey = trimmed.slice("description:".length).trim();
102
+
103
+ if (afterKey === ">" || afterKey === "|") {
104
+ // Folded/literal scalar: collect indented continuation lines
105
+ const descParts: string[] = [];
106
+ let j = i + 1;
107
+ while (j < yamlLines.length) {
108
+ const next = yamlLines[j];
109
+ // Continuation line must be indented (starts with whitespace)
110
+ if (next.length > 0 && /^\s/.test(next)) {
111
+ descParts.push(next.replace(/^\s+/, ""));
112
+ } else {
113
+ break;
114
+ }
115
+ j++;
116
+ }
117
+ description = descParts.join(" ").trim();
118
+ i = j - 1; // advance past consumed lines
119
+ } else {
120
+ // Single-line value
121
+ description = afterKey;
122
+ }
123
+ continue;
124
+ }
125
+
126
+ // Any other top-level key resets inMetadata
127
+ if (/^\S/.test(trimmed) && trimmed.includes(":")) {
128
+ inMetadata = false;
129
+ }
130
+ }
131
+
132
+ return { name, description, version, body };
133
+ }
134
+
135
+ // ---------------------------------------------------------------------------
136
+ // Frontmatter description replacement
137
+ // ---------------------------------------------------------------------------
138
+
139
+ /**
140
+ * Replace the `description:` field in YAML frontmatter, preserving all other
141
+ * content. If the new description contains special YAML characters, it is
142
+ * written as a folded scalar (`description: >`).
143
+ *
144
+ * Returns the original content unchanged if no frontmatter is found.
145
+ */
146
+ export function replaceFrontmatterDescription(content: string, newDescription: string): string {
147
+ const lines = content.split("\n");
148
+
149
+ if (lines[0]?.trim() !== "---") return content;
150
+
151
+ let endIdx = -1;
152
+ for (let i = 1; i < lines.length; i++) {
153
+ if (lines[i].trim() === "---") {
154
+ endIdx = i;
155
+ break;
156
+ }
157
+ }
158
+ if (endIdx < 0) return content;
159
+
160
+ // Find and replace the description within frontmatter lines
161
+ const yamlLines = lines.slice(1, endIdx);
162
+ const newYamlLines: string[] = [];
163
+ let i = 0;
164
+ let replaced = false;
165
+
166
+ while (i < yamlLines.length) {
167
+ const trimmed = yamlLines[i].trimEnd();
168
+
169
+ if (trimmed.startsWith("description:")) {
170
+ replaced = true;
171
+ const afterKey = trimmed.slice("description:".length).trim();
172
+
173
+ // Skip continuation lines of folded/literal scalars
174
+ if (afterKey === ">" || afterKey === "|") {
175
+ i++;
176
+ while (i < yamlLines.length && yamlLines[i].length > 0 && /^\s/.test(yamlLines[i])) {
177
+ i++;
178
+ }
179
+ } else {
180
+ i++;
181
+ }
182
+
183
+ // Write new description — use folded scalar if it's long or has special chars
184
+ const needsFolded = newDescription.length > 120 || /[:#"'[\]{}|>]/.test(newDescription);
185
+ if (needsFolded) {
186
+ newYamlLines.push("description: >");
187
+ // Wrap at ~78 chars with 2-space indent
188
+ const words = newDescription.split(/\s+/);
189
+ let line = " ";
190
+ for (const word of words) {
191
+ if (line.length + word.length + 1 > 80 && line.trim().length > 0) {
192
+ newYamlLines.push(line);
193
+ line = ` ${word}`;
194
+ } else {
195
+ line = line.trim().length === 0 ? ` ${word}` : `${line} ${word}`;
196
+ }
197
+ }
198
+ if (line.trim().length > 0) newYamlLines.push(line);
199
+ } else {
200
+ newYamlLines.push(`description: ${newDescription}`);
201
+ }
202
+ continue;
203
+ }
204
+
205
+ newYamlLines.push(yamlLines[i]);
206
+ i++;
207
+ }
208
+
209
+ // If description wasn't found in frontmatter, add it
210
+ if (!replaced) {
211
+ newYamlLines.push(`description: ${newDescription}`);
212
+ }
213
+
214
+ const before = lines[0]; // "---"
215
+ const after = lines.slice(endIdx); // "---" + body
216
+ return [before, ...newYamlLines, ...after].join("\n");
217
+ }
@@ -0,0 +1,41 @@
1
+ import { CLAUDE_CODE_HOOK_KEYS } from "../constants.js";
2
+
3
+ export interface ClaudeCodeHookCommand {
4
+ command?: string;
5
+ }
6
+
7
+ export interface ClaudeCodeHookEntry {
8
+ command?: string;
9
+ hooks?: ClaudeCodeHookCommand[];
10
+ }
11
+
12
+ function isHookEntry(value: unknown): value is ClaudeCodeHookEntry {
13
+ return typeof value === "object" && value !== null;
14
+ }
15
+
16
+ export function entryReferencesSelftune(entry: ClaudeCodeHookEntry): boolean {
17
+ if (typeof entry.command === "string" && entry.command.includes("selftune")) {
18
+ return true;
19
+ }
20
+
21
+ if (Array.isArray(entry.hooks)) {
22
+ return entry.hooks.some(
23
+ (hook) => typeof hook.command === "string" && hook.command.includes("selftune"),
24
+ );
25
+ }
26
+
27
+ return false;
28
+ }
29
+
30
+ export function hookKeyHasSelftuneEntry(hooks: Record<string, unknown>, key: string): boolean {
31
+ const entries = hooks[key];
32
+ if (!Array.isArray(entries) || entries.length === 0) {
33
+ return false;
34
+ }
35
+
36
+ return entries.some((entry) => isHookEntry(entry) && entryReferencesSelftune(entry));
37
+ }
38
+
39
+ export function missingClaudeCodeHookKeys(hooks: Record<string, unknown>): string[] {
40
+ return CLAUDE_CODE_HOOK_KEYS.filter((key) => !hookKeyHasSelftuneEntry(hooks, key));
41
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Escape JSON for safe embedding inside an HTML <script> tag.
3
+ *
4
+ * Escaping only </script> is not sufficient because arbitrary log payloads can
5
+ * contain HTML-like substrings that confuse the parser in raw-text script
6
+ * blocks. We escape all `<`, `>`, and `&` characters, plus the Unicode line
7
+ * separators that can break JavaScript parsing in some engines.
8
+ */
9
+ export function escapeJsonForHtmlScript(data: unknown): string {
10
+ const json = JSON.stringify(data);
11
+ return (json ?? "null").replace(/[<>&\u2028\u2029]/g, (char) => {
12
+ switch (char) {
13
+ case "<":
14
+ return "\\u003c";
15
+ case ">":
16
+ return "\\u003e";
17
+ case "&":
18
+ return "\\u0026";
19
+ case "\u2028":
20
+ return "\\u2028";
21
+ case "\u2029":
22
+ return "\\u2029";
23
+ default:
24
+ return char;
25
+ }
26
+ });
27
+ }
@@ -11,6 +11,27 @@ import { tmpdir } from "node:os";
11
11
  import { join } from "node:path";
12
12
 
13
13
  import { AGENT_CANDIDATES } from "../constants.js";
14
+ import { createLogger } from "./logging.js";
15
+
16
+ const logger = createLogger("llm-call");
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Model alias resolution
20
+ // ---------------------------------------------------------------------------
21
+
22
+ /**
23
+ * The claude CLI --model flag only accepts "sonnet" and "opus" as aliases.
24
+ * "haiku" is NOT a valid --model alias (only valid in --agents subagent config).
25
+ * Map short names to full model IDs so callers can use friendly names.
26
+ */
27
+ const CLAUDE_MODEL_ALIASES: Record<string, string> = {
28
+ haiku: "claude-haiku-4-5-20251001",
29
+ };
30
+
31
+ /** Resolve a model alias to its full ID for the claude CLI --model flag. */
32
+ function resolveModelFlag(flag: string): string {
33
+ return CLAUDE_MODEL_ALIASES[flag] ?? flag;
34
+ }
14
35
 
15
36
  // ---------------------------------------------------------------------------
16
37
  // Agent detection
@@ -68,6 +89,35 @@ export function stripMarkdownFences(raw: string): string {
68
89
  return text;
69
90
  }
70
91
 
92
+ // ---------------------------------------------------------------------------
93
+ // Retry configuration
94
+ // ---------------------------------------------------------------------------
95
+
96
+ const DEFAULT_MAX_RETRIES = 2;
97
+ const DEFAULT_INITIAL_BACKOFF_MS = 2_000;
98
+
99
+ /** Options to control retry behavior. All fields optional with sensible defaults. */
100
+ export interface RetryOptions {
101
+ /** Maximum number of retries (default: 2). Set to 0 to disable retries. */
102
+ maxRetries?: number;
103
+ /** Initial backoff in ms before first retry (default: 2000). Doubles each retry. */
104
+ initialBackoffMs?: number;
105
+ }
106
+
107
+ /** Returns true for errors that are transient and worth retrying. */
108
+ function isTransientError(err: unknown): boolean {
109
+ if (!(err instanceof Error)) return false;
110
+ const msg = err.message;
111
+ // Transient: non-zero exit codes from agent subprocess (crash, OOM, timeout kill)
112
+ if (/exited with code/i.test(msg)) return true;
113
+ return false;
114
+ }
115
+
116
+ /** Sleep for the given number of milliseconds. */
117
+ function sleep(ms: number): Promise<void> {
118
+ return new Promise((resolve) => setTimeout(resolve, ms));
119
+ }
120
+
71
121
  // ---------------------------------------------------------------------------
72
122
  // Call LLM via agent subprocess
73
123
  // ---------------------------------------------------------------------------
@@ -77,6 +127,8 @@ export async function callViaAgent(
77
127
  systemPrompt: string,
78
128
  userPrompt: string,
79
129
  agent: string,
130
+ modelFlag?: string,
131
+ retryOpts?: RetryOptions,
80
132
  ): Promise<string> {
81
133
  // Write prompt to temp file to avoid shell quoting issues
82
134
  const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
@@ -88,6 +140,10 @@ export async function callViaAgent(
88
140
 
89
141
  if (agent === "claude") {
90
142
  cmd = ["claude", "-p", promptContent];
143
+ if (modelFlag) {
144
+ const resolved = resolveModelFlag(modelFlag);
145
+ cmd.push("--model", resolved);
146
+ }
91
147
  } else if (agent === "codex") {
92
148
  cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
93
149
  } else if (agent === "opencode") {
@@ -96,26 +152,53 @@ export async function callViaAgent(
96
152
  throw new Error(`Unknown agent: ${agent}`);
97
153
  }
98
154
 
99
- const proc = Bun.spawn(cmd, {
100
- stdout: "pipe",
101
- stderr: "pipe",
102
- env: { ...process.env, CLAUDECODE: "" },
103
- });
104
-
105
- // 120s timeout
106
- const timeout = setTimeout(() => proc.kill(), 120_000);
107
- const exitCode = await proc.exited;
108
- clearTimeout(timeout);
109
-
110
- if (exitCode !== 0) {
111
- const stderr = await new Response(proc.stderr).text();
112
- throw new Error(
113
- `Agent '${agent}' exited with code ${exitCode}.\nstderr: ${stderr.slice(0, 500)}`,
114
- );
155
+ // Retry loop with exponential backoff for transient failures
156
+ const maxRetries = retryOpts?.maxRetries ?? DEFAULT_MAX_RETRIES;
157
+ const initialBackoffMs = retryOpts?.initialBackoffMs ?? DEFAULT_INITIAL_BACKOFF_MS;
158
+ let lastError: Error | undefined;
159
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
160
+ if (attempt > 0) {
161
+ const backoffMs = initialBackoffMs * 2 ** (attempt - 1);
162
+ logger.warn(
163
+ `Retry ${attempt}/${maxRetries} for agent '${agent}' after ${backoffMs}ms backoff`,
164
+ );
165
+ await sleep(backoffMs);
166
+ }
167
+
168
+ try {
169
+ const proc = Bun.spawn(cmd, {
170
+ stdout: "pipe",
171
+ stderr: "pipe",
172
+ env: { ...process.env, CLAUDECODE: "" },
173
+ });
174
+
175
+ // Longer timeout for heavier models (sonnet/opus take longer than haiku)
176
+ const isLightModel = modelFlag === "haiku" || modelFlag?.includes("haiku");
177
+ const timeoutMs = isLightModel ? 120_000 : 300_000;
178
+ const timeout = setTimeout(() => proc.kill(), timeoutMs);
179
+ const exitCode = await proc.exited;
180
+ clearTimeout(timeout);
181
+
182
+ if (exitCode !== 0) {
183
+ const stderr = await new Response(proc.stderr).text();
184
+ throw new Error(
185
+ `Agent '${agent}' exited with code ${exitCode}.\nstderr: ${stderr.slice(0, 500)}`,
186
+ );
187
+ }
188
+
189
+ const raw = await new Response(proc.stdout).text();
190
+ return raw;
191
+ } catch (err) {
192
+ lastError = err instanceof Error ? err : new Error(String(err));
193
+ if (!isTransientError(lastError) || attempt === maxRetries) {
194
+ throw lastError;
195
+ }
196
+ logger.warn(`Transient failure on attempt ${attempt + 1}: ${lastError.message}`);
197
+ }
115
198
  }
116
199
 
117
- const raw = await new Response(proc.stdout).text();
118
- return raw;
200
+ // Unreachable, but satisfies TypeScript
201
+ throw lastError ?? new Error("callViaAgent: unexpected retry loop exit");
119
202
  } finally {
120
203
  try {
121
204
  const { unlinkSync } = await import("node:fs");
@@ -135,9 +218,10 @@ export async function callLlm(
135
218
  systemPrompt: string,
136
219
  userPrompt: string,
137
220
  agent: string,
221
+ modelFlag?: string,
138
222
  ): Promise<string> {
139
223
  if (!agent) {
140
224
  throw new Error("Agent must be specified for callLlm");
141
225
  }
142
- return callViaAgent(systemPrompt, userPrompt, agent);
226
+ return callViaAgent(systemPrompt, userPrompt, agent, modelFlag);
143
227
  }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Shared math utilities.
3
+ */
4
+
5
+ /**
6
+ * Clamp a number between min and max.
7
+ */
8
+ export function clamp(value: number, min: number, max: number): number {
9
+ return Math.max(min, Math.min(max, value));
10
+ }
@@ -0,0 +1,139 @@
1
+ import { SKIP_PREFIXES } from "../constants.js";
2
+ import type { QueryLogRecord, SkillUsageRecord } from "../types.js";
3
+
4
+ const NON_USER_QUERY_PREFIXES = [
5
+ "<system_instruction>",
6
+ "<system-instruction>",
7
+ "<system-reminder>",
8
+ "<available-deferred-tools>",
9
+ "<fast_mode_info>",
10
+ "<local-command-caveat>",
11
+ "<local-command-stdout>",
12
+ "<local-command-stderr>",
13
+ "<command-name>",
14
+ "<task-notification>",
15
+ "<teammate-message",
16
+ "[Request interrupted by user for tool use]",
17
+ "[Request interrupted by user]",
18
+ "Base directory for this skill:",
19
+ "This session is being continued from a previous conversation that ran out of context.",
20
+ "USER'S CURRENT MESSAGE (summarize THIS):",
21
+ "CONTEXT:",
22
+ "Completing task",
23
+ "Tool loaded.",
24
+ "Continue from where you left off.",
25
+ "You are an evaluation assistant.",
26
+ "You are a skill description optimizer for an AI agent routing system.",
27
+ "The following skills are available",
28
+ ] as const;
29
+
30
+ /**
31
+ * Regex patterns for wrapper/hook pipeline artifacts that are never real user prompts.
32
+ * These fire after prefix checks and cover structured hook callback lines.
33
+ */
34
+ const NON_USER_QUERY_PATTERNS = [
35
+ // Hook callback output lines (e.g. "SessionStart:startup hook success: ...")
36
+ // "Stop" excluded from general alternation — too common as English word.
37
+ /^(SessionStart|UserPromptSubmit|PreToolUse|PostToolUse):/,
38
+ // Stop hook callbacks follow a structured shape: "Stop:" + lowercase/callback text
39
+ /^Stop:(session |cleanup |hook |Callback )/,
40
+ // Injected git context blocks
41
+ /^gitStatus:\s/,
42
+ ] as const;
43
+
44
+ const LEADING_WRAPPED_QUERY_TAGS = [
45
+ "system_instruction",
46
+ "system-instruction",
47
+ "system-reminder",
48
+ "available-deferred-tools",
49
+ "fast_mode_info",
50
+ "task-notification",
51
+ "teammate-message",
52
+ "local-command-caveat",
53
+ "local-command-stdout",
54
+ "local-command-stderr",
55
+ "command-name",
56
+ ] as const;
57
+
58
+ function stripLeadingWrappedQueryText(query: string): string {
59
+ let current = query.trim();
60
+
61
+ for (;;) {
62
+ let changed = false;
63
+
64
+ for (const tag of LEADING_WRAPPED_QUERY_TAGS) {
65
+ const pattern = new RegExp(`^<${tag}\\b[^>]*>[\\s\\S]*?<\\/${tag}>\\s*`, "i");
66
+ const next = current.replace(pattern, "").trim();
67
+ if (next !== current) {
68
+ current = next;
69
+ changed = true;
70
+ break;
71
+ }
72
+ }
73
+
74
+ if (!changed) return current;
75
+ }
76
+ }
77
+
78
+ export function extractActionableQueryText(query: string): string | null {
79
+ if (typeof query !== "string") return null;
80
+
81
+ const trimmed = query.trim();
82
+ if (!trimmed || trimmed === "-" || trimmed === "(query not found)") return null;
83
+
84
+ const candidate = stripLeadingWrappedQueryText(trimmed) || trimmed;
85
+ if (!candidate || candidate === "-" || candidate === "(query not found)") return null;
86
+
87
+ const isBlocked =
88
+ SKIP_PREFIXES.some((prefix) => candidate.startsWith(prefix)) ||
89
+ NON_USER_QUERY_PREFIXES.some((prefix) => candidate.startsWith(prefix)) ||
90
+ NON_USER_QUERY_PATTERNS.some((pattern) => pattern.test(candidate));
91
+
92
+ return isBlocked ? null : candidate;
93
+ }
94
+
95
+ export function isActionableQueryText(query: string): boolean {
96
+ return extractActionableQueryText(query) !== null;
97
+ }
98
+
99
+ export function filterActionableQueryRecords(queryRecords: QueryLogRecord[]): QueryLogRecord[] {
100
+ const actionable: QueryLogRecord[] = [];
101
+
102
+ for (const record of queryRecords) {
103
+ if (record == null) continue;
104
+ const normalizedQuery = extractActionableQueryText((record as QueryLogRecord).query);
105
+ if (!normalizedQuery) continue;
106
+ actionable.push(
107
+ normalizedQuery === record.query ? record : { ...record, query: normalizedQuery },
108
+ );
109
+ }
110
+
111
+ return actionable;
112
+ }
113
+
114
+ export function isActionableSkillUsageRecord(record: SkillUsageRecord | null | undefined): boolean {
115
+ if (record == null) return false;
116
+ if (typeof record.skill_name !== "string" || !record.skill_name.trim()) return false;
117
+ if (typeof record.query !== "string") return false;
118
+
119
+ const query = record.query.trim();
120
+ if (!query || query === "(query not found)") return false;
121
+
122
+ return extractActionableQueryText(query) !== null;
123
+ }
124
+
125
+ export function filterActionableSkillUsageRecords(
126
+ skillRecords: SkillUsageRecord[],
127
+ ): SkillUsageRecord[] {
128
+ const actionable: SkillUsageRecord[] = [];
129
+
130
+ for (const record of skillRecords) {
131
+ const normalizedQuery = extractActionableQueryText(record?.query);
132
+ if (!normalizedQuery) continue;
133
+ actionable.push(
134
+ normalizedQuery === record.query ? record : { ...record, query: normalizedQuery },
135
+ );
136
+ }
137
+
138
+ return actionable;
139
+ }