little-coder 1.4.3 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { harnessIntervention } from "./intervention.ts";
3
+
4
+ describe("harnessIntervention", () => {
5
+ it("prefixes the message and uses a single info-level notification", () => {
6
+ const calls: Array<[string, string | undefined]> = [];
7
+ const ctx = { ui: { notify: (m: string, t?: any) => calls.push([m, t]) } };
8
+ harnessIntervention(ctx, "the model did X — doing Y.");
9
+ expect(calls).toEqual([
10
+ ["harness intervention: the model did X — doing Y.", "info"],
11
+ ]);
12
+ });
13
+ });
@@ -0,0 +1,41 @@
1
+ // Shared presentation for "harness intervention" events — the moments where
2
+ // little-coder's scaffolding overrides or redirects the model rather than the
3
+ // model deciding for itself (thinking-budget cap, write-guard redirect,
4
+ // turn-cap, finalize-warn, quality-monitor corrections, output-parser nudges).
5
+ //
6
+ // Before this helper each extension emitted its own free-form `ctx.ui.notify`
7
+ // in a different voice and severity, so a single harness decision (e.g. a
8
+ // thinking-budget abort) surfaced as several stacked warnings plus pi's own
9
+ // "Operation aborted" marker. Routing every such message through one helper
10
+ // gives the user a single, consistently-worded line:
11
+ //
12
+ // harness intervention: the model has thought long enough — forcing it to
13
+ // start implementing.
14
+ //
15
+ // This dir intentionally has no `index.ts`, so the launcher's extension
16
+ // auto-discovery (bin/little-coder.mjs: requires `<subdir>/index.ts`) skips
17
+ // it — it is a library imported by the real extensions, not an extension.
18
+
19
+ // Structurally typed so this helper has no hard dependency on pi's type
20
+ // surface and stays trivially mockable in unit tests.
21
+ export interface InterventionUI {
22
+ notify(message: string, type?: "info" | "warning" | "error"): void;
23
+ }
24
+
25
+ export interface InterventionCtx {
26
+ ui: InterventionUI;
27
+ }
28
+
29
+ /**
30
+ * Surface a single, uniformly-formatted harness-intervention line to the user.
31
+ *
32
+ * @param ctx Any object exposing `ui.notify` (the event-handler ctx).
33
+ * @param message The human explanation of what the harness did and why,
34
+ * phrased as a continuation of "harness intervention: ".
35
+ * Lead with the consequence, e.g.
36
+ * "the model has thought long enough — forcing it to start
37
+ * implementing."
38
+ */
39
+ export function harnessIntervention(ctx: InterventionCtx, message: string): void {
40
+ ctx.ui.notify(`harness intervention: ${message}`, "info");
41
+ }
@@ -60,25 +60,38 @@ function loadSettings(): void {
60
60
  }
61
61
  }
62
62
 
63
- function resolveProfile(providerSlashModel: string): ModelProfile {
64
- loadSettings();
65
- if (!settings) return {};
66
- const profiles = settings.model_profiles ?? {};
67
- const bench = process.env.LITTLE_CODER_BENCHMARK;
63
+ // Normalize the separator between model-name segments so a profile key written
64
+ // with hyphens (`llamacpp/qwen3.6-35b-a3b`) matches a runtime model id that uses
65
+ // a colon (`llamacpp/qwen3.6:35b-a3b`) and vice-versa. Without this the prefix
66
+ // match silently fails and EVERY model falls back to default_model_profile —
67
+ // per-model thinking_budget / context_limit / temperature are skipped (the
68
+ // quirk surfaced in issue #8's reproduction). Dots (`qwen3.6`) are preserved.
69
+ export function normKey(s: string): string {
70
+ return s.replace(/:/g, "-");
71
+ }
72
+
73
+ // Pure resolver, exported for testing. Exact match → separator-insensitive
74
+ // prefix match → default_model_profile, then benchmark_overrides if `bench` set.
75
+ export function resolveProfileFrom(
76
+ s: LittleCoderSettings | null,
77
+ providerSlashModel: string,
78
+ bench?: string,
79
+ ): ModelProfile {
80
+ if (!s) return {};
81
+ const profiles = s.model_profiles ?? {};
82
+ const target = normKey(providerSlashModel);
68
83
 
69
- // Exact match first, then prefix match (mirrors get_model_profile)
70
84
  let base: ModelProfile | undefined = profiles[providerSlashModel];
71
85
  if (!base) {
72
86
  for (const [pattern, p] of Object.entries(profiles)) {
73
- if (providerSlashModel.startsWith(pattern)) {
87
+ if (target === normKey(pattern) || target.startsWith(normKey(pattern))) {
74
88
  base = p;
75
89
  break;
76
90
  }
77
91
  }
78
92
  }
79
- if (!base) base = settings.default_model_profile ?? {};
93
+ if (!base) base = s.default_model_profile ?? {};
80
94
 
81
- // Strip + apply benchmark_overrides if set
82
95
  const { benchmark_overrides, ...basePlain } = { ...base };
83
96
  if (bench && benchmark_overrides && benchmark_overrides[bench]) {
84
97
  return { ...basePlain, ...benchmark_overrides[bench] };
@@ -86,6 +99,11 @@ function resolveProfile(providerSlashModel: string): ModelProfile {
86
99
  return basePlain;
87
100
  }
88
101
 
102
+ function resolveProfile(providerSlashModel: string): ModelProfile {
103
+ loadSettings();
104
+ return resolveProfileFrom(settings, providerSlashModel, process.env.LITTLE_CODER_BENCHMARK);
105
+ }
106
+
89
107
  // Per-benchmark tools that should always have skill cards present on turn 1,
90
108
  // even before the agent has used them. Without this, skill-inject relies on
91
109
  // recency / error-recovery / intent-matching, none of which fire on the
@@ -1,78 +1,87 @@
1
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
1
+ import { describe, it, expect } from "vitest";
2
2
  import { readFileSync } from "node:fs";
3
3
  import { dirname, join } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
+ import { resolveProfileFrom, normKey } from "./index.ts";
5
6
 
6
7
  const here = dirname(fileURLToPath(import.meta.url));
7
8
  const settingsPath = join(here, "..", "..", "settings.json");
8
9
 
9
- // Mirror the resolution logic so we can test it as a pure function without
10
- // instantiating the extension.
11
- interface ModelProfile {
12
- thinking_budget?: number;
13
- max_turns?: number;
14
- temperature?: number;
15
- context_limit?: number;
16
- benchmark_overrides?: Record<string, Partial<ModelProfile>>;
17
- }
18
-
19
- function resolveProfile(
20
- settings: { model_profiles?: Record<string, ModelProfile>; default_model_profile?: ModelProfile },
21
- key: string,
22
- benchmark?: string,
23
- ): ModelProfile {
24
- const profiles = settings.model_profiles ?? {};
25
- let base: ModelProfile | undefined = profiles[key];
26
- if (!base) {
27
- for (const [pattern, p] of Object.entries(profiles)) {
28
- if (key.startsWith(pattern)) { base = p; break; }
29
- }
30
- }
31
- if (!base) base = settings.default_model_profile ?? {};
32
- const { benchmark_overrides, ...basePlain } = { ...base };
33
- if (benchmark && benchmark_overrides && benchmark_overrides[benchmark]) {
34
- return { ...basePlain, ...benchmark_overrides[benchmark] };
35
- }
36
- return basePlain;
37
- }
38
-
39
10
  describe("benchmark-profiles resolution against real settings.json", () => {
40
11
  const settings = JSON.parse(readFileSync(settingsPath, "utf-8")).little_coder;
41
12
 
42
- it("resolves base profile for llamacpp/qwen3.6-35b-a3b", () => {
43
- const p = resolveProfile(settings, "llamacpp/qwen3.6-35b-a3b");
44
- expect(p.thinking_budget).toBe(2048);
13
+ it("resolves base profile for llamacpp/qwen3.6-35b-a3b (budget bumped to 4096)", () => {
14
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b");
15
+ expect(p.thinking_budget).toBe(4096);
45
16
  expect(p.context_limit).toBe(32768);
46
17
  expect(p.max_turns).toBeUndefined();
47
18
  });
48
19
 
49
20
  it("applies terminal_bench overrides", () => {
50
- const p = resolveProfile(settings, "llamacpp/qwen3.6-35b-a3b", "terminal_bench");
51
- expect(p.thinking_budget).toBe(3000);
21
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b", "terminal_bench");
22
+ expect(p.thinking_budget).toBe(3000); // benchmark override kept
52
23
  expect(p.temperature).toBe(0.2);
53
24
  expect(p.max_turns).toBe(40);
54
- // Non-overridden fields fall through from base
55
25
  expect(p.context_limit).toBe(32768);
56
26
  });
57
27
 
58
28
  it("applies gaia overrides", () => {
59
- const p = resolveProfile(settings, "llamacpp/qwen3.6-35b-a3b", "gaia");
29
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b", "gaia");
60
30
  expect(p.thinking_budget).toBe(2000);
61
31
  expect(p.temperature).toBe(0.4);
62
32
  expect(p.max_turns).toBe(40);
63
33
  expect(p.context_limit).toBe(65536);
64
34
  });
65
35
 
66
- it("unknown model falls back to default_model_profile", () => {
67
- const p = resolveProfile(settings, "fake-provider/fake-model");
68
- // Default profile defined in settings.json
69
- expect(p.thinking_budget).toBe(2048);
36
+ it("unknown model falls back to default_model_profile (also 4096)", () => {
37
+ const p = resolveProfileFrom(settings, "fake-provider/fake-model");
38
+ expect(p.thinking_budget).toBe(4096);
70
39
  expect(p.context_limit).toBe(32768);
71
40
  });
72
41
 
73
42
  it("unknown benchmark name yields base profile unchanged", () => {
74
- const p = resolveProfile(settings, "llamacpp/qwen3.6-35b-a3b", "totally_made_up");
75
- expect(p.thinking_budget).toBe(2048);
43
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b", "totally_made_up");
44
+ expect(p.thinking_budget).toBe(4096);
76
45
  expect(p.max_turns).toBeUndefined();
77
46
  });
47
+
48
+ it("every shipped per-model profile carries the 4096 budget", () => {
49
+ for (const key of Object.keys(settings.model_profiles)) {
50
+ expect(resolveProfileFrom(settings, key).thinking_budget, key).toBe(4096);
51
+ }
52
+ });
53
+ });
54
+
55
+ describe("separator-insensitive model-key matching (issue #8 quirk)", () => {
56
+ // The reproduction noted runtime ids using a colon (`qwen3.6:35b-a3b`) never
57
+ // matched the hyphenated profile key, so per-model profiles were silently
58
+ // skipped and everything fell back to default.
59
+ const settings = {
60
+ default_model_profile: { thinking_budget: 4096 },
61
+ model_profiles: {
62
+ "llamacpp/qwen3.6-35b-a3b": { thinking_budget: 1234, temperature: 0.3 },
63
+ },
64
+ };
65
+
66
+ it("normKey collapses ':' to '-'", () => {
67
+ expect(normKey("llamacpp/qwen3.6:35b-a3b")).toBe("llamacpp/qwen3.6-35b-a3b");
68
+ });
69
+
70
+ it("matches a colon runtime id to a hyphenated profile key", () => {
71
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6:35b-a3b");
72
+ expect(p.thinking_budget).toBe(1234); // per-model profile, NOT the default
73
+ });
74
+
75
+ it("still matches the exact hyphenated id", () => {
76
+ expect(resolveProfileFrom(settings, "llamacpp/qwen3.6-35b-a3b").thinking_budget).toBe(1234);
77
+ });
78
+
79
+ it("matches via prefix when the runtime id has a tag suffix", () => {
80
+ const p = resolveProfileFrom(settings, "llamacpp/qwen3.6:35b-a3b:Q4_K_M");
81
+ expect(p.thinking_budget).toBe(1234);
82
+ });
83
+
84
+ it("an unrelated model still falls back to default", () => {
85
+ expect(resolveProfileFrom(settings, "ollama/llama3").thinking_budget).toBe(4096);
86
+ });
78
87
  });
@@ -0,0 +1,37 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import setupClear from "./index.ts";
3
+
4
+ describe("/clear command", () => {
5
+ function register() {
6
+ let reg: { name: string; opts: any } | undefined;
7
+ const pi = {
8
+ registerCommand(name: string, opts: any) {
9
+ reg = { name, opts };
10
+ },
11
+ };
12
+ setupClear(pi as any);
13
+ if (!reg) throw new Error("no command registered");
14
+ return reg;
15
+ }
16
+
17
+ it("registers a command named 'clear' with a description", () => {
18
+ const reg = register();
19
+ expect(reg.name).toBe("clear");
20
+ expect(typeof reg.opts.description).toBe("string");
21
+ expect(reg.opts.description.length).toBeGreaterThan(0);
22
+ expect(typeof reg.opts.handler).toBe("function");
23
+ });
24
+
25
+ it("starts a new session when invoked", async () => {
26
+ const reg = register();
27
+ let calls = 0;
28
+ const ctx = {
29
+ newSession: async () => {
30
+ calls++;
31
+ return { cancelled: false };
32
+ },
33
+ };
34
+ await reg.opts.handler("", ctx);
35
+ expect(calls).toBe(1);
36
+ });
37
+ });
@@ -0,0 +1,26 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+
3
+ // Adds a `/clear` command (the name users expect from other coding agents) that
4
+ // resets the session as if little-coder had been closed and relaunched.
5
+ //
6
+ // `ctx.newSession()` drives pi's full session-replacement lifecycle:
7
+ // session_before_switch → session_shutdown → session_start{reason:"new"}
8
+ // → resources_discover{reason:"startup"-equivalent}
9
+ // which:
10
+ // - re-renders little-coder's branding header (branding ext hooks session_start),
11
+ // - rebuilds the harness system prompt / AGENTS.md context from scratch,
12
+ // - resets every session_start-scoped extension's module state
13
+ // (quality-monitor counters, evidence store, etc.).
14
+ //
15
+ // pi already ships `/new` for this; we register `/clear` as an alias so the
16
+ // muscle-memory command works, and so the help/branding line can advertise it.
17
+ export default function (pi: ExtensionAPI) {
18
+ pi.registerCommand("clear", {
19
+ description: "Start a fresh session — clears history and reloads context, like relaunching",
20
+ handler: async (_args, ctx) => {
21
+ // newSession() handles the confirm/cancel flow and the full lifecycle.
22
+ // Returns { cancelled } if the user backed out; nothing else to do here.
23
+ await ctx.newSession();
24
+ },
25
+ });
26
+ }
@@ -1,4 +1,5 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { harnessIntervention } from "../_shared/intervention.ts";
2
3
 
3
4
  // Pre-cap finalize-warn: when the agent has WARN_REMAINING turns left
4
5
  // (this turn included), inject a follow-up user message reminding it to
@@ -60,9 +61,9 @@ export default function (pi: ExtensionAPI) {
60
61
  `Do not start new tool chains; if you need a fact you don't have, ` +
61
62
  `answer with your best supported guess from EvidenceList rather than ` +
62
63
  `leaving it blank.`;
63
- ctx.ui.notify(
64
- `finalize-warn: ${WARN_REMAINING} turns left at ${turnsThisRun}/${capForRun}`,
65
- "info",
64
+ harnessIntervention(
65
+ ctx,
66
+ `${WARN_REMAINING} turns left — telling the model to finalize its answer now.`,
66
67
  );
67
68
  try {
68
69
  pi.sendUserMessage(msg, { deliverAs: "followUp" });
@@ -1,5 +1,6 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
2
  import { parseTextToolCalls } from "./parser.ts";
3
+ import { harnessIntervention } from "../_shared/intervention.ts";
3
4
 
4
5
  // Detects malformed/fenced tool calls in assistant text and nudges the model
5
6
  // back onto native tool-calling. Active-repair (executing extracted calls
@@ -37,9 +38,9 @@ export default function (pi: ExtensionAPI) {
37
38
  if (calls.length === 0) return;
38
39
 
39
40
  const names = calls.map((c) => c.name).join(", ");
40
- ctx.ui.notify(
41
- `Detected ${calls.length} text-embedded tool call(s) [${names}] — nudging model to native tool calling`,
42
- "warning",
41
+ harnessIntervention(
42
+ ctx,
43
+ `the model wrote ${calls.length} tool call(s) as text [${names}] — nudging it back to native tool calls.`,
43
44
  );
44
45
 
45
46
  // Queue a follow-up that will be delivered after the agent finishes.
@@ -1,5 +1,6 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { assessResponse, buildCorrectionMessage, type ToolCall } from "./quality.ts";
2
+ import { assessResponse, buildCorrectionMessage, phraseForUser, type ToolCall } from "./quality.ts";
3
+ import { harnessIntervention } from "../_shared/intervention.ts";
3
4
 
4
5
  // Port of local/quality.py. Hooks turn_end, inspects the assistant message
5
6
  // + previous turn's tool calls, and — if we detect a failure mode — sends
@@ -30,6 +31,15 @@ export default function (pi: ExtensionAPI) {
30
31
  const message = (event as any).message;
31
32
  if (!message) return;
32
33
 
34
+ // Skip turns that were interrupted/aborted — by the user pressing ESC OR by
35
+ // a harness abort (thinking-budget, turn-cap). pi marks these with
36
+ // stopReason "aborted"; their content is legitimately partial/empty, so
37
+ // assessing them spuriously fires `empty_response` and steers a "your
38
+ // previous response was empty" correction onto the user's NEXT prompt
39
+ // (the escape-interrupt bug, and the second false warning in the
40
+ // thinking-budget cascade). An aborted turn is not a model quality failure.
41
+ if (message.stopReason === "aborted") return;
42
+
33
43
  // Extract assistant text + tool calls from pi's content-block format
34
44
  const content = Array.isArray(message.content) ? message.content : [];
35
45
  const text = content
@@ -53,18 +63,15 @@ export default function (pi: ExtensionAPI) {
53
63
  // Cap corrections so we don't burn turns in a correction loop
54
64
  consecutiveFailures++;
55
65
  if (consecutiveFailures > MAX_CONSECUTIVE_CORRECTIONS) {
56
- ctx.ui.notify(
57
- `quality-monitor: ${verdict.reason} (suppressed after ${consecutiveFailures} in a row)`,
58
- "warning",
66
+ harnessIntervention(
67
+ ctx,
68
+ `${phraseForUser(verdict.reason)} — backing off after ${consecutiveFailures} in a row.`,
59
69
  );
60
70
  return;
61
71
  }
62
72
 
63
73
  const correction = buildCorrectionMessage(verdict.reason);
64
- ctx.ui.notify(
65
- `quality-monitor: ${verdict.reason} → injecting correction`,
66
- "warning",
67
- );
74
+ harnessIntervention(ctx, `${phraseForUser(verdict.reason)} — redirecting the model.`);
68
75
  // "steer" delivers the correction promptly to the in-flight loop. The
69
76
  // prior "followUp" mode parked the message until the *next* user input,
70
77
  // by which point it was no longer relevant (issue #16).
@@ -1,5 +1,6 @@
1
- import { describe, it, expect } from "vitest";
2
- import { assessResponse, buildCorrectionMessage } from "./quality.ts";
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import { assessResponse, buildCorrectionMessage, phraseForUser } from "./quality.ts";
3
+ import setupQualityMonitor from "./index.ts";
3
4
 
4
5
  const known = new Set(["Read", "Write", "Edit", "Bash", "Glob", "Grep"]);
5
6
 
@@ -73,3 +74,68 @@ describe("buildCorrectionMessage", () => {
73
74
  expect(buildCorrectionMessage("weird_thing")).toContain("weird_thing");
74
75
  });
75
76
  });
77
+
78
+ describe("phraseForUser", () => {
79
+ it("phrases known reasons in plain language", () => {
80
+ expect(phraseForUser("empty_response")).toMatch(/empty response/i);
81
+ expect(phraseForUser("repeated_tool_call")).toMatch(/repeated/i);
82
+ });
83
+ it("includes the tool name for parameterized reasons", () => {
84
+ expect(phraseForUser("unknown_tool:Frobnicate")).toContain("Frobnicate");
85
+ expect(phraseForUser("malformed_args:Edit")).toContain("Edit");
86
+ });
87
+ });
88
+
89
+ // ── turn_end handler: must skip interrupted/aborted turns ───────────────────
90
+ function harness() {
91
+ const handlers: Record<string, ((e: any, c: any) => any)[]> = {};
92
+ const followUps: { msg: string; opts: any }[] = [];
93
+ const pi = {
94
+ handlers,
95
+ on(name: string, h: (e: any, c: any) => any) {
96
+ (handlers[name] ??= []).push(h);
97
+ },
98
+ sendUserMessage(msg: string, opts: any) {
99
+ followUps.push({ msg, opts });
100
+ },
101
+ };
102
+ const notifies: string[] = [];
103
+ const ctx = { ui: { notify: (m: string) => notifies.push(m) } };
104
+ setupQualityMonitor(pi as any);
105
+ return { pi, ctx, followUps, notifies };
106
+ }
107
+ async function fire(h: any, name: string, event: any) {
108
+ for (const fn of h.pi.handlers[name] ?? []) await fn(event, h.ctx);
109
+ }
110
+
111
+ describe("quality-monitor turn_end", () => {
112
+ let h: ReturnType<typeof harness>;
113
+ beforeEach(async () => {
114
+ h = harness();
115
+ await fire(h, "session_start", {}); // reset session-scoped counters
116
+ });
117
+
118
+ it("skips an aborted/interrupted turn — no empty_response correction", async () => {
119
+ // An ESC interrupt or harness abort produces a partial/empty message with
120
+ // stopReason "aborted". This is the escape-interrupt bug: it must NOT steer
121
+ // a 'your previous response was empty' correction onto the next prompt.
122
+ await fire(h, "turn_end", { message: { stopReason: "aborted", content: [] } });
123
+ expect(h.followUps).toHaveLength(0);
124
+ expect(h.notifies).toHaveLength(0);
125
+ });
126
+
127
+ it("flags a genuinely empty COMPLETED turn and steers a correction", async () => {
128
+ await fire(h, "turn_end", { message: { stopReason: "stop", content: [] } });
129
+ expect(h.followUps).toHaveLength(1);
130
+ expect(h.followUps[0].opts).toEqual({ deliverAs: "steer" });
131
+ expect(h.notifies[0]).toMatch(/harness intervention:/i);
132
+ });
133
+
134
+ it("passes a normal text turn without intervention", async () => {
135
+ await fire(h, "turn_end", {
136
+ message: { stopReason: "stop", content: [{ type: "text", text: "done." }] },
137
+ });
138
+ expect(h.followUps).toHaveLength(0);
139
+ expect(h.notifies).toHaveLength(0);
140
+ });
141
+ });
@@ -82,3 +82,20 @@ export function buildCorrectionMessage(reason: string): string {
82
82
 
83
83
  return corrections[reason] ?? `Issue detected: ${reason}. Please try again.`;
84
84
  }
85
+
86
+ // Short, user-facing phrasing for the harness-intervention line (distinct from
87
+ // buildCorrectionMessage, which is the verbose text sent to the model).
88
+ export function phraseForUser(reason: string): string {
89
+ if (reason.startsWith("unknown_tool:")) {
90
+ return `the model called a tool that doesn't exist (${reason.slice("unknown_tool:".length)})`;
91
+ }
92
+ if (reason.startsWith("malformed_args:")) {
93
+ return `the model's tool arguments were malformed (${reason.slice("malformed_args:".length)})`;
94
+ }
95
+ const phrases: Record<string, string> = {
96
+ empty_response: "the model returned an empty response",
97
+ empty_tool_name: "the model emitted a tool call with no name",
98
+ repeated_tool_call: "the model repeated its previous tool call verbatim",
99
+ };
100
+ return phrases[reason] ?? `quality issue (${reason})`;
101
+ }