@elench/testkit 0.1.96 → 0.1.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/lib/app/browser-bridge.mjs +1 -1
  2. package/lib/cli/assistant/app.mjs +49 -12
  3. package/lib/cli/assistant/composer.mjs +19 -1
  4. package/lib/cli/assistant/context-pack.mjs +9 -8
  5. package/lib/cli/assistant/interactive.mjs +1 -1
  6. package/lib/cli/assistant/model-discovery.mjs +243 -0
  7. package/lib/cli/assistant/prompt-builder.mjs +2 -5
  8. package/lib/cli/{agents → assistant}/providers/claude.mjs +41 -3
  9. package/lib/cli/{agents → assistant}/providers/codex.mjs +33 -14
  10. package/lib/cli/{agents → assistant/providers}/index.mjs +3 -3
  11. package/lib/cli/{agents → assistant}/providers/shared.mjs +6 -2
  12. package/lib/cli/assistant/session.mjs +31 -6
  13. package/lib/cli/assistant/slash-commands.mjs +30 -3
  14. package/lib/cli/assistant/state.mjs +237 -71
  15. package/lib/cli/assistant/tool-registry.mjs +325 -39
  16. package/lib/cli/assistant/view-model.mjs +1 -1
  17. package/lib/cli/commands/assistant.mjs +4 -3
  18. package/lib/cli/commands/browser/serve.mjs +5 -23
  19. package/lib/cli/commands/cleanup.mjs +8 -2
  20. package/lib/cli/commands/db/snapshot/capture.mjs +8 -4
  21. package/lib/cli/commands/destroy.mjs +8 -2
  22. package/lib/cli/commands/discover.mjs +5 -27
  23. package/lib/cli/commands/doctor.mjs +5 -5
  24. package/lib/cli/commands/flags.mjs +61 -0
  25. package/lib/cli/commands/run.mjs +10 -2
  26. package/lib/cli/commands/status.mjs +10 -2
  27. package/lib/cli/commands/typecheck.mjs +5 -5
  28. package/lib/cli/{tui/inspect-app.mjs → components/blocks/run-tree.mjs} +29 -54
  29. package/lib/cli/{tui → components/primitives}/filter-bar.mjs +1 -1
  30. package/lib/cli/{presentation → components/primitives}/summary-box.mjs +1 -1
  31. package/lib/cli/config.mjs +63 -0
  32. package/lib/cli/operations/browser/serve/operation.mjs +23 -0
  33. package/lib/cli/operations/cleanup/operation.mjs +8 -0
  34. package/lib/cli/{db.mjs → operations/db/snapshot/capture/operation.mjs} +15 -9
  35. package/lib/cli/operations/destroy/operation.mjs +12 -0
  36. package/lib/cli/operations/discover/operation.mjs +32 -0
  37. package/lib/cli/operations/doctor/operation.mjs +5 -0
  38. package/lib/cli/operations/run/operation.mjs +129 -0
  39. package/lib/cli/operations/status/operation.mjs +7 -0
  40. package/lib/cli/operations/typecheck/operation.mjs +5 -0
  41. package/lib/cli/renderers/browser-serve/text.mjs +6 -0
  42. package/lib/cli/renderers/cleanup/text.mjs +3 -0
  43. package/lib/cli/renderers/db-snapshot-capture/text.mjs +3 -0
  44. package/lib/cli/renderers/destroy/text.mjs +3 -0
  45. package/lib/cli/{presentation/discovery-reporter.mjs → renderers/discover/report.mjs} +3 -3
  46. package/lib/cli/renderers/discover/text.mjs +7 -0
  47. package/lib/cli/renderers/doctor/text.mjs +7 -0
  48. package/lib/cli/{presentation/failure-presentation.mjs → renderers/run/failure.mjs} +6 -6
  49. package/lib/cli/renderers/run/interactive.mjs +119 -0
  50. package/lib/cli/{presentation/run-reporter.mjs → renderers/run/text-reporter.mjs} +5 -5
  51. package/lib/cli/renderers/status/text.mjs +7 -0
  52. package/lib/cli/renderers/typecheck/text.mjs +7 -0
  53. package/lib/cli/{tui/inspect-model.mjs → state/run/model.mjs} +11 -26
  54. package/lib/cli/{tui/inspect-state.mjs → state/run/state.mjs} +11 -18
  55. package/lib/cli/{tui → state/tree}/fuzzy-match.mjs +1 -1
  56. package/lib/cli/terminal/capabilities.mjs +33 -0
  57. package/lib/database/index.mjs +9 -21
  58. package/lib/database/template-steps.mjs +3 -3
  59. package/lib/{cli/viewer.mjs → results/artifacts.mjs} +1 -1
  60. package/lib/{cli/context-resources.mjs → results/context.mjs} +1 -1
  61. package/lib/runner/maintenance.mjs +25 -14
  62. package/lib/runner/readiness.mjs +5 -4
  63. package/lib/runner/runtime-preparation.mjs +36 -0
  64. package/lib/runner/state-io.mjs +10 -4
  65. package/lib/runner/template.mjs +24 -3
  66. package/node_modules/@elench/next-analysis/package.json +1 -1
  67. package/node_modules/@elench/testkit-bridge/package.json +2 -2
  68. package/node_modules/@elench/testkit-protocol/package.json +1 -1
  69. package/node_modules/@elench/ts-analysis/package.json +1 -1
  70. package/package.json +5 -5
  71. package/lib/cli/assistant/command-plan.mjs +0 -227
  72. package/lib/cli/command-helpers.mjs +0 -191
  73. package/lib/cli/presentation/tree-reporter.mjs +0 -96
  74. package/lib/cli/tui/inspect-artifact-adapter.mjs +0 -3
  75. package/lib/cli/tui/inspect-live-adapter.mjs +0 -15
  76. /package/lib/cli/{presentation/events-reporter.mjs → renderers/run/events.mjs} +0 -0
  77. /package/lib/cli/{presentation → terminal}/colors.mjs +0 -0
  78. /package/lib/cli/{presentation/terminal-layout.mjs → terminal/layout.mjs} +0 -0
  79. /package/lib/{cli/presentation → results}/code-frames.mjs +0 -0
@@ -1,6 +1,6 @@
1
1
  import { loadConfigContext, resolveProductDir } from "../config/index.mjs";
2
2
  import { discoverTests } from "../discovery/index.mjs";
3
- import { loadCurrentRunArtifact } from "../cli/viewer.mjs";
3
+ import { loadCurrentRunArtifact } from "../results/artifacts.mjs";
4
4
 
5
5
  export async function loadBrowserBridgeContext(options = {}) {
6
6
  const productDir = resolveProductDir(process.cwd(), options.dir);
@@ -1,7 +1,8 @@
1
- import React, { createElement, useEffect, useMemo, useState } from "react";
2
- import { Box, Text, useApp, useInput, useStdout } from "ink";
3
- import { bold, cyan, dim, green, red, yellow } from "../presentation/colors.mjs";
4
- import { getComposerRenderParts } from "./composer.mjs";
1
+ import React, { createElement, useEffect, useMemo, useRef, useState } from "react";
2
+ import { Box, Text, useApp, useBoxMetrics, useCursor, useInput, useStdout } from "ink";
3
+ import { bold, cyan, dim, green, red, yellow } from "../terminal/colors.mjs";
4
+ import { RunTreeView } from "../components/blocks/run-tree.mjs";
5
+ import { getComposerDisplayModel } from "./composer.mjs";
5
6
  import { buildAssistantViewModel } from "./view-model.mjs";
6
7
 
7
8
  const MAX_BLOCK_LINES = 18;
@@ -49,6 +50,7 @@ export function AssistantApp({
49
50
  }),
50
51
  [snapshot, stdout?.columns]
51
52
  );
53
+ const runSession = assistantState.getLiveRunSession?.() || assistantState.getLastRunSession?.() || null;
52
54
 
53
55
  return createElement(
54
56
  Box,
@@ -63,6 +65,28 @@ export function AssistantApp({
63
65
  view.blocks.length === 0
64
66
  ? createElement(WelcomePanel, { view })
65
67
  : createElement(Transcript, { view }),
68
+ runSession
69
+ ? createElement(
70
+ Box,
71
+ { flexDirection: "column", marginTop: 1 },
72
+ createElement(Text, null, bold("Run Session")),
73
+ createElement(
74
+ Box,
75
+ {
76
+ borderStyle: "round",
77
+ flexDirection: "column",
78
+ paddingLeft: 1,
79
+ paddingRight: 1,
80
+ },
81
+ createElement(RunTreeView, {
82
+ runState: runSession.runState,
83
+ stdout,
84
+ productDir: runSession.productDir,
85
+ interactive: false,
86
+ })
87
+ )
88
+ )
89
+ : null,
66
90
  createElement(Text, null, ""),
67
91
  createElement(ComposerBar, { view, busy: snapshot.busy }),
68
92
  createElement(Text, null, dim(view.statusLine)),
@@ -177,15 +201,30 @@ function renderBlock(block) {
177
201
  }
178
202
 
179
203
  function ComposerBar({ view, busy }) {
180
- const { before, current, after, empty } = getComposerRenderParts({
181
- text: view.composer.text,
182
- cursor: view.composer.cursor,
183
- });
204
+ const ref = useRef(null);
205
+ const metrics = useBoxMetrics(ref);
206
+ const { setCursorPosition } = useCursor();
207
+ const display = getComposerDisplayModel(
208
+ {
209
+ text: view.composer.text,
210
+ cursor: view.composer.cursor,
211
+ },
212
+ { placeholder: view.composer.placeholder }
213
+ );
214
+ setCursorPosition(
215
+ metrics.hasMeasured
216
+ ? {
217
+ x: 2 + display.cursorColumn,
218
+ y: metrics.top + 1,
219
+ }
220
+ : undefined
221
+ );
222
+
184
223
  const prompt = cyan("❯");
185
- const promptText = empty ? dim(`${view.composer.placeholder} `) : before;
186
224
  return createElement(
187
225
  Box,
188
226
  {
227
+ ref,
189
228
  borderStyle: "single",
190
229
  borderLeft: false,
191
230
  borderRight: false,
@@ -196,9 +235,7 @@ function ComposerBar({ view, busy }) {
196
235
  Text,
197
236
  null,
198
237
  `${prompt} `,
199
- promptText,
200
- createElement(Text, { inverse: true }, current),
201
- after,
238
+ display.empty ? dim(display.placeholder) : display.text,
202
239
  busy ? dim(" provider responding") : ""
203
240
  )
204
241
  );
@@ -1,3 +1,5 @@
1
+ import { measureWidth } from "../terminal/layout.mjs";
2
+
1
3
  const segmenter =
2
4
  typeof Intl !== "undefined" && typeof Intl.Segmenter === "function"
3
5
  ? new Intl.Segmenter(undefined, { granularity: "grapheme" })
@@ -81,14 +83,30 @@ export function moveComposerCursorToEnd(state) {
81
83
  export function getComposerRenderParts(state) {
82
84
  const parts = splitGraphemes(state?.text || "");
83
85
  const cursor = clampCursor(state?.cursor ?? parts.length, parts.length);
86
+ const before = parts.slice(0, cursor).join("");
84
87
  return {
85
- before: parts.slice(0, cursor).join(""),
88
+ before,
86
89
  current: parts[cursor] || " ",
87
90
  after: parts.slice(cursor + (parts[cursor] ? 1 : 0)).join(""),
88
91
  empty: parts.length === 0,
89
92
  };
90
93
  }
91
94
 
95
+ export function getComposerDisplayModel(state, { placeholder = "" } = {}) {
96
+ const parts = splitGraphemes(state?.text || "");
97
+ const cursor = clampCursor(state?.cursor ?? parts.length, parts.length);
98
+ const before = parts.slice(0, cursor).join("");
99
+ const text = parts.join("");
100
+ return {
101
+ text,
102
+ before,
103
+ after: parts.slice(cursor).join(""),
104
+ cursorColumn: measureWidth(before),
105
+ placeholder: String(placeholder || ""),
106
+ empty: parts.length === 0,
107
+ };
108
+ }
109
+
92
110
  function normalizeComposerState(state) {
93
111
  const text = String(state?.text || "");
94
112
  const parts = splitGraphemes(text);
@@ -1,11 +1,11 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import { fileURLToPath } from "url";
4
- import { readContextContent, buildContextSelection } from "../context-resources.mjs";
4
+ import { readContextContent, buildContextSelection } from "../../results/context.mjs";
5
5
 
6
6
  export function prepareAssistantContextPack({
7
7
  productDir,
8
- inspectState,
8
+ runState,
9
9
  } = {}) {
10
10
  const contextDir = path.join(productDir, ".testkit", "assistant");
11
11
  const binDir = path.join(contextDir, "bin");
@@ -23,7 +23,7 @@ export function prepareAssistantContextPack({
23
23
  const wrapperPath = path.join(binDir, "testkit");
24
24
 
25
25
  function refresh() {
26
- const snapshot = inspectState?.getSnapshot?.() || {};
26
+ const snapshot = runState?.getSnapshot?.() || {};
27
27
  const detailContent = readContextContent({ productDir, snapshot, mode: "detail", logTail: 12 });
28
28
  const logsContent = readContextContent({ productDir, snapshot, mode: "logs", logTail: 12 });
29
29
  const artifactsContent = readContextContent({ productDir, snapshot, mode: "artifacts", logTail: 12 });
@@ -160,9 +160,8 @@ function buildContextMarkdown(productDir, snapshot, paths) {
160
160
  lines.push(
161
161
  "",
162
162
  "## Guidance",
163
- "- Use the local `testkit` command directly when you need to execute or inspect tests.",
164
- "- Preferred commands: `testkit run --dir . --type <type>`, `testkit discover --dir .`, `testkit status --dir .`, and `testkit doctor --dir .`.",
165
- "- Do not launch testkit through pnpm, npm, yarn, bun, or npx unless the user explicitly asks for that exact package-manager command.",
163
+ "- Use dedicated testkit tools for run/discover/status/doctor/typecheck actions before falling back to generic shell commands.",
164
+ "- Do not reinterpret CLI syntax after an execution failure unless `testkit run --help` confirms a syntax problem.",
166
165
  "- Use the command log and focused context files before rereading artifacts manually.",
167
166
  "- Prefer repo-local commands over guessing project-specific wrappers.",
168
167
  ""
@@ -175,13 +174,15 @@ function buildCommandsMarkdown() {
175
174
  return [
176
175
  "# Testkit Commands",
177
176
  "",
178
- "- `testkit run --dir . --type int`",
179
- "- `testkit run --dir . --type e2e`",
177
+ "- `testkit run int --dir .`",
178
+ "- `testkit run e2e --dir .`",
180
179
  "- `testkit run --dir . --file path/to/file.testkit.ts`",
181
180
  "- `testkit discover --dir .`",
182
181
  "- `testkit status --dir .`",
183
182
  "- `testkit doctor --dir .`",
184
183
  "- `testkit destroy --dir .`",
184
+ "- `npm run testkit`",
185
+ "- `npx testkit run e2e --dir .`",
185
186
  "",
186
187
  ].join("\n");
187
188
  }
@@ -2,7 +2,7 @@ import React, { createElement } from "react";
2
2
  import { render } from "ink";
3
3
  import { createAssistantState } from "./state.mjs";
4
4
  import { AssistantApp } from "./app.mjs";
5
- import { loadLatestRunArtifact, resolveFileSubject } from "../viewer.mjs";
5
+ import { loadLatestRunArtifact, resolveFileSubject } from "../../results/artifacts.mjs";
6
6
 
7
7
  export async function runInteractiveAssistant({
8
8
  productDir,
@@ -0,0 +1,243 @@
1
+ import fs from "fs";
2
+ import os from "os";
3
+ import path from "path";
4
+ import { execaCommand } from "execa";
5
+
6
+ const CLAUDE_ALIASES = ["default", "best", "sonnet", "opus", "haiku", "opusplan", "sonnet[1m]", "opus[1m]"];
7
+ const CACHE_MAX_AGE_MS = 5 * 60 * 1000;
8
+
9
+ export async function discoverAssistantModels({
10
+ provider,
11
+ productDir = process.cwd(),
12
+ env = process.env,
13
+ } = {}) {
14
+ const resolvedProvider = String(provider || "").trim();
15
+ if (resolvedProvider === "codex") {
16
+ return discoverCodexModels({ productDir, env });
17
+ }
18
+ if (resolvedProvider === "claude") {
19
+ return discoverClaudeModels({ productDir, env });
20
+ }
21
+ return {
22
+ provider: resolvedProvider || "unknown",
23
+ source: "none",
24
+ models: [providerDefaultModel()],
25
+ warnings: ["No provider is resolved yet."],
26
+ };
27
+ }
28
+
29
+ export async function discoverCodexModels({ productDir = process.cwd(), env = process.env } = {}) {
30
+ const cachePath = path.join(productDir, ".testkit", "assistant", "model-cache-codex.json");
31
+ const cached = readFreshCache(cachePath);
32
+ if (cached) return cached;
33
+
34
+ const command = env.TESTKIT_CODEX_BIN || "codex";
35
+ let catalog = null;
36
+ let warning = null;
37
+ try {
38
+ const result = await execaCommand(`${shellQuote(command)} debug models`, {
39
+ cwd: productDir,
40
+ reject: false,
41
+ shell: true,
42
+ env: { ...process.env, ...env },
43
+ });
44
+ if ((result.exitCode ?? 1) === 0) {
45
+ catalog = JSON.parse(result.stdout || "{}");
46
+ } else {
47
+ warning = (result.stderr || result.stdout || "codex debug models failed").trim();
48
+ }
49
+ } catch (error) {
50
+ warning = error instanceof Error ? error.message : String(error);
51
+ }
52
+
53
+ if (!catalog) {
54
+ catalog = readJson(path.join(os.homedir(), ".codex", "models_cache.json"));
55
+ }
56
+
57
+ const models = normalizeCodexModels(catalog);
58
+ const discovery = {
59
+ provider: "codex",
60
+ source: catalog ? "codex debug models" : "fallback",
61
+ models: [providerDefaultModel(), ...models],
62
+ warnings: warning && models.length === 0 ? [warning] : [],
63
+ };
64
+ writeCache(cachePath, discovery);
65
+ return discovery;
66
+ }
67
+
68
+ export async function discoverClaudeModels({ productDir = process.cwd(), env = process.env } = {}) {
69
+ const configured = readClaudeAvailableModels({ productDir });
70
+ const apiModels = await fetchAnthropicModels({ env });
71
+ const dynamicModels = apiModels.map((model) => ({
72
+ id: model.id,
73
+ label: model.displayName || model.id,
74
+ description: model.description || "",
75
+ source: "anthropic api",
76
+ concrete: true,
77
+ }));
78
+ const aliasModels = CLAUDE_ALIASES.map((id) => ({
79
+ id: id === "default" ? null : id,
80
+ label: id === "default" ? "provider default" : id,
81
+ description: id === "default" ? "Use Claude Code's selected default model." : "Claude Code model alias.",
82
+ source: "claude alias",
83
+ concrete: false,
84
+ }));
85
+
86
+ const merged = dedupeModels([...aliasModels, ...dynamicModels]);
87
+ const restricted = configured.length > 0
88
+ ? merged.filter((model) => model.id == null || configured.includes(model.id))
89
+ : merged;
90
+
91
+ return {
92
+ provider: "claude",
93
+ source: apiModels.length > 0 ? "anthropic api" : "claude aliases",
94
+ models: restricted,
95
+ warnings: apiModels.length > 0
96
+ ? []
97
+ : ["Claude Code does not expose a scriptable model catalog; showing stable aliases."],
98
+ };
99
+ }
100
+
101
+ export function normalizeCodexModels(catalog) {
102
+ const rawModels = Array.isArray(catalog?.models) ? catalog.models : [];
103
+ return rawModels
104
+ .filter((model) => model?.slug && (model.visibility == null || model.visibility === "list"))
105
+ .sort((a, b) => Number(a.priority ?? 999) - Number(b.priority ?? 999))
106
+ .map((model) => ({
107
+ id: String(model.slug),
108
+ label: String(model.display_name || model.slug),
109
+ description: String(model.description || ""),
110
+ source: "codex catalog",
111
+ concrete: true,
112
+ defaultEffort: model.default_reasoning_level || null,
113
+ efforts: Array.isArray(model.supported_reasoning_levels)
114
+ ? model.supported_reasoning_levels.map((entry) => entry.effort).filter(Boolean)
115
+ : [],
116
+ }));
117
+ }
118
+
119
+ export function getModelProviderMismatch(provider, model) {
120
+ const normalizedModel = String(model || "").trim().toLowerCase();
121
+ if (!provider || !normalizedModel) return null;
122
+
123
+ const looksClaude = /\b(?:opus|sonnet|haiku|claude|opusplan|best)\b/.test(normalizedModel);
124
+ const looksCodex = /\b(?:gpt|codex|o[1-9]|chatgpt)\b/.test(normalizedModel);
125
+
126
+ if (provider === "codex" && looksClaude) {
127
+ return `Model "${model}" looks like a Claude model, but the assistant is using Codex. Run /provider claude or /model default.`;
128
+ }
129
+ if (provider === "claude" && looksCodex) {
130
+ return `Model "${model}" looks like a Codex/OpenAI model, but the assistant is using Claude. Run /provider codex or /model default.`;
131
+ }
132
+ return null;
133
+ }
134
+
135
+ export function formatModelChoices(discovery, { currentModel = null } = {}) {
136
+ const current = currentModel || "provider default";
137
+ const lines = [
138
+ `Models for ${discovery.provider}`,
139
+ `Current: ${current}`,
140
+ "",
141
+ ...discovery.models.map((model) => {
142
+ const command = model.id ? `/model ${model.id}` : "/model default";
143
+ const marker = (model.id || null) === (currentModel || null) ? "*" : " ";
144
+ return `${marker} ${model.label} ${command}`;
145
+ }),
146
+ " custom... /model custom <model>",
147
+ ];
148
+ for (const warning of discovery.warnings || []) {
149
+ lines.push("", `Note: ${warning}`);
150
+ }
151
+ return lines.join("\n");
152
+ }
153
+
154
+ export function providerDefaultModel() {
155
+ return {
156
+ id: null,
157
+ label: "provider default",
158
+ description: "Use the provider CLI default model.",
159
+ source: "provider default",
160
+ concrete: false,
161
+ };
162
+ }
163
+
164
+ function readFreshCache(filePath) {
165
+ const value = readJson(filePath);
166
+ if (!value?.fetchedAt || !Array.isArray(value.models)) return null;
167
+ if (Date.now() - Date.parse(value.fetchedAt) > CACHE_MAX_AGE_MS) return null;
168
+ return value;
169
+ }
170
+
171
+ function writeCache(filePath, discovery) {
172
+ try {
173
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
174
+ fs.writeFileSync(filePath, JSON.stringify({ ...discovery, fetchedAt: new Date().toISOString() }, null, 2), "utf8");
175
+ } catch {
176
+ // Model discovery is best-effort.
177
+ }
178
+ }
179
+
180
+ function readJson(filePath) {
181
+ try {
182
+ return JSON.parse(fs.readFileSync(filePath, "utf8"));
183
+ } catch {
184
+ return null;
185
+ }
186
+ }
187
+
188
+ function readClaudeAvailableModels({ productDir }) {
189
+ const paths = [
190
+ path.join(os.homedir(), ".claude", "settings.json"),
191
+ path.join(productDir, ".claude", "settings.json"),
192
+ ];
193
+ const models = [];
194
+ for (const filePath of paths) {
195
+ const settings = readJson(filePath);
196
+ const available = settings?.availableModels || settings?.model?.availableModels;
197
+ if (Array.isArray(available)) models.push(...available.map((entry) => String(entry).trim()).filter(Boolean));
198
+ }
199
+ return [...new Set(models)];
200
+ }
201
+
202
+ async function fetchAnthropicModels({ env }) {
203
+ const apiKey = env.ANTHROPIC_API_KEY;
204
+ if (!apiKey || typeof fetch !== "function") return [];
205
+ try {
206
+ const response = await fetch("https://api.anthropic.com/v1/models", {
207
+ headers: {
208
+ "x-api-key": apiKey,
209
+ "anthropic-version": "2023-06-01",
210
+ },
211
+ });
212
+ if (!response.ok) return [];
213
+ const body = await response.json();
214
+ const data = Array.isArray(body?.data) ? body.data : [];
215
+ return data
216
+ .filter((entry) => entry?.id)
217
+ .map((entry) => ({
218
+ id: String(entry.id),
219
+ displayName: String(entry.display_name || entry.id),
220
+ description: entry.created_at ? `Created ${entry.created_at}` : "",
221
+ }));
222
+ } catch {
223
+ return [];
224
+ }
225
+ }
226
+
227
+ function dedupeModels(models) {
228
+ const seen = new Set();
229
+ const result = [];
230
+ for (const model of models) {
231
+ const key = model.id || "__default__";
232
+ if (seen.has(key)) continue;
233
+ seen.add(key);
234
+ result.push(model);
235
+ }
236
+ return result;
237
+ }
238
+
239
+ function shellQuote(value) {
240
+ const text = String(value);
241
+ if (/^[a-zA-Z0-9._:/-]+$/.test(text)) return text;
242
+ return `'${text.replace(/'/g, `'\\''`)}'`;
243
+ }
@@ -1,4 +1,4 @@
1
- import { readContextContent } from "../context-resources.mjs";
1
+ import { readContextContent } from "../../results/context.mjs";
2
2
  import { buildAssistantResponseContract } from "./protocol.mjs";
3
3
 
4
4
  export function buildAssistantPrompt({
@@ -16,11 +16,8 @@ export function buildAssistantPrompt({
16
16
  "You are Testkit Assistant.",
17
17
  "You help users run tests, inspect failures, read logs and artifacts, and navigate the current local test state.",
18
18
  "All user natural-language requests must be handled through your own reasoning plus the available tools.",
19
- "Use shell_exec when the user asks to run tests or inspect the working repo.",
20
- "For testkit work, invoke the local `testkit` command directly, for example `testkit run --dir . --type e2e` or `testkit discover --dir .`.",
21
- "Do not wrap testkit with pnpm, npm, yarn, bun, or npx unless the user explicitly asks for that exact package-manager command.",
19
+ "Use the dedicated testkit tools for run/discover/status/doctor/typecheck actions; use shell_exec only for arbitrary repository commands outside those actions.",
22
20
  "Use read_context before repeating artifact/log inspection work, and use read_file/search_repo when you need codebase context.",
23
- "After a tool result, describe only what the tool result actually says. Do not invent filesystem, sandbox, package-manager, or permission errors.",
24
21
  buildAssistantResponseContract({ tools }),
25
22
  "",
26
23
  "Current run summary:",
@@ -4,6 +4,7 @@ import {
4
4
  buildStatusEvent,
5
5
  buildToolEvent,
6
6
  createHostedSessionRunner,
7
+ extractTextFragments,
7
8
  } from "./shared.mjs";
8
9
 
9
10
  export function startClaudeHostedSession({
@@ -50,8 +51,8 @@ export function startClaudeHostedSession({
50
51
  child,
51
52
  onEvent,
52
53
  parsePayload: parseClaudePayload,
53
- readFinalText() {
54
- return null;
54
+ readFinalText(result) {
55
+ return readClaudeFinalText(result?.stdout || "") || null;
55
56
  },
56
57
  });
57
58
  }
@@ -61,7 +62,7 @@ function normalizeProviderArgs(providerArgs) {
61
62
  return providerArgs.flatMap((arg) => String(arg || "").split(/\s+/).filter(Boolean));
62
63
  }
63
64
 
64
- function parseClaudePayload(payload) {
65
+ export function parseClaudePayload(payload) {
65
66
  const events = [];
66
67
  if (!payload || typeof payload !== "object") return events;
67
68
 
@@ -102,6 +103,10 @@ function parseClaudePayload(payload) {
102
103
  }
103
104
 
104
105
  if (type === "assistant") {
106
+ const fragments = [...new Set(extractTextFragments(payload.message?.content || payload.content || [], []))];
107
+ for (const fragment of fragments) {
108
+ events.push({ type: "delta", text: fragment });
109
+ }
105
110
  return events;
106
111
  }
107
112
 
@@ -113,7 +118,40 @@ function parseClaudePayload(payload) {
113
118
  return events;
114
119
  }
115
120
 
121
+ if (type === "system" || type === "rate_limit_event") {
122
+ return events;
123
+ }
124
+
116
125
  const statusEvent = buildStatusEvent(type ? `Claude event: ${type}` : JSON.stringify(payload));
117
126
  if (statusEvent) events.push(statusEvent);
118
127
  return events;
119
128
  }
129
+
130
+ export function readClaudeFinalText(stdout) {
131
+ const lines = String(stdout || "")
132
+ .split("\n")
133
+ .map((line) => line.trim())
134
+ .filter(Boolean);
135
+
136
+ let fallback = null;
137
+ for (const line of lines) {
138
+ let payload = null;
139
+ try {
140
+ payload = JSON.parse(line);
141
+ } catch {
142
+ continue;
143
+ }
144
+ if (!payload || typeof payload !== "object") continue;
145
+
146
+ if (payload.type === "result" && payload.subtype !== "error" && typeof payload.result === "string") {
147
+ return payload.result.trim() || null;
148
+ }
149
+
150
+ if (payload.type === "assistant") {
151
+ const fragments = [...new Set(extractTextFragments(payload.message?.content || payload.content || [], []))];
152
+ if (fragments.length > 0) fallback = fragments.join("");
153
+ }
154
+ }
155
+
156
+ return fallback;
157
+ }
@@ -22,17 +22,14 @@ export function startCodexHostedSession({
22
22
  } = {}) {
23
23
  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "testkit-codex-"));
24
24
  const outputFile = path.join(tempDir, "final-message.txt");
25
- const args = ["exec", "--json", "-o", outputFile];
26
-
27
- if (purpose === "assistant") {
28
- args.push("-s", "read-only");
29
- }
30
- if (model) {
31
- args.push("--model", String(model));
32
- }
33
- args.push(...normalizeProviderArgs(providerArgs));
34
-
35
- args.push(prompt);
25
+ const args = buildCodexArgs({
26
+ outputFile,
27
+ purpose,
28
+ model,
29
+ providerArgs,
30
+ prompt,
31
+ sandbox: process.env.TESTKIT_CODEX_SANDBOX,
32
+ });
36
33
 
37
34
  const child = execa(command, args, {
38
35
  cwd,
@@ -51,7 +48,7 @@ export function startCodexHostedSession({
51
48
  return String(message || "").trim() === "Reading additional input from stdin...";
52
49
  },
53
50
  readFinalText(result) {
54
- return readTextFileIfPresent(outputFile) || result.stdout || null;
51
+ return readTextFileIfPresent(outputFile) || null;
55
52
  },
56
53
  });
57
54
 
@@ -65,16 +62,38 @@ export function startCodexHostedSession({
65
62
  };
66
63
  }
67
64
 
65
+ export function buildCodexArgs({
66
+ outputFile,
67
+ purpose = "assistant",
68
+ model = null,
69
+ providerArgs = [],
70
+ prompt = "",
71
+ sandbox = null,
72
+ } = {}) {
73
+ const args = ["exec", "--json"];
74
+ if (outputFile) args.push("-o", outputFile);
75
+
76
+ if (purpose === "assistant") {
77
+ args.push("-s", String(sandbox || "workspace-write"));
78
+ }
79
+ if (model) {
80
+ args.push("--model", String(model));
81
+ }
82
+ args.push(...normalizeProviderArgs(providerArgs));
83
+ args.push(prompt);
84
+ return args;
85
+ }
86
+
68
87
  function normalizeProviderArgs(providerArgs) {
69
88
  if (!Array.isArray(providerArgs)) return [];
70
89
  return providerArgs.flatMap((arg) => String(arg || "").split(/\s+/).filter(Boolean));
71
90
  }
72
91
 
73
- function parseCodexPayload(payload) {
92
+ export function parseCodexPayload(payload) {
74
93
  const events = [];
75
94
  if (!payload || typeof payload !== "object") return events;
76
95
  const type = payload.type || payload.event || payload.kind || null;
77
- const errorMessage = payload.error?.message || payload.error || null;
96
+ const errorMessage = payload.error?.message || payload.error || (type === "error" ? payload.message : null) || null;
78
97
  if (errorMessage) {
79
98
  const event = buildErrorEvent(errorMessage);
80
99
  if (event) events.push(event);
@@ -1,7 +1,7 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
- import { startClaudeHostedSession } from "./providers/claude.mjs";
4
- import { startCodexHostedSession } from "./providers/codex.mjs";
3
+ import { startClaudeHostedSession } from "./claude.mjs";
4
+ import { startCodexHostedSession } from "./codex.mjs";
5
5
 
6
6
  const PROVIDERS = ["codex", "claude"];
7
7
 
@@ -55,7 +55,7 @@ export function isProviderInstalled(provider, env = process.env) {
55
55
  return false;
56
56
  }
57
57
 
58
- export function startAgentSession({
58
+ export function startProviderSession({
59
59
  provider = "auto",
60
60
  model = null,
61
61
  effort = null,
@@ -5,11 +5,15 @@ export function createHostedSessionRunner({ provider, child, onEvent, parsePaylo
5
5
  let cancelled = false;
6
6
  let settled = false;
7
7
  let assistantText = "";
8
+ let lastErrorMessage = null;
8
9
 
9
10
  const emit = (event) => {
10
11
  if (event?.type === "delta" || event?.type === "final") {
11
12
  assistantText += event.text || "";
12
13
  }
14
+ if (event?.type === "error") {
15
+ lastErrorMessage = event.message || lastErrorMessage;
16
+ }
13
17
  if (typeof onEvent === "function" && event) onEvent({ provider, ...event });
14
18
  };
15
19
 
@@ -36,8 +40,8 @@ export function createHostedSessionRunner({ provider, child, onEvent, parsePaylo
36
40
  const completion = (async () => {
37
41
  const result = await child;
38
42
  const finalText = (readFinalText ? readFinalText(result) : null) || assistantText.trim() || null;
39
- if ((result.exitCode ?? 0) !== 0 && !finalText) {
40
- const message = result.stderr || result.stdout || `${provider} exited with code ${result.exitCode ?? 1}`;
43
+ if ((result.exitCode ?? 0) !== 0) {
44
+ const message = lastErrorMessage || result.stderr || `${provider} exited with code ${result.exitCode ?? 1}`;
41
45
  emit({ type: "error", message });
42
46
  throw new Error(message);
43
47
  }