@burtson-labs/host-kit 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +55 -0
- package/dist/backgroundTasks.d.ts +113 -0
- package/dist/backgroundTasks.d.ts.map +1 -0
- package/dist/backgroundTasks.js +137 -0
- package/dist/backgroundTasks.js.map +1 -0
- package/dist/checkpoints.d.ts +99 -0
- package/dist/checkpoints.d.ts.map +1 -0
- package/dist/checkpoints.js +227 -0
- package/dist/checkpoints.js.map +1 -0
- package/dist/hooks.d.ts +51 -0
- package/dist/hooks.d.ts.map +1 -0
- package/dist/hooks.js +152 -0
- package/dist/hooks.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +95 -0
- package/dist/index.js.map +1 -0
- package/dist/insights.d.ts +398 -0
- package/dist/insights.d.ts.map +1 -0
- package/dist/insights.js +1933 -0
- package/dist/insights.js.map +1 -0
- package/dist/mcp.d.ts +60 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +281 -0
- package/dist/mcp.js.map +1 -0
- package/dist/mcpConnectors.d.ts +108 -0
- package/dist/mcpConnectors.d.ts.map +1 -0
- package/dist/mcpConnectors.js +217 -0
- package/dist/mcpConnectors.js.map +1 -0
- package/dist/mcpToolCache.d.ts +43 -0
- package/dist/mcpToolCache.d.ts.map +1 -0
- package/dist/mcpToolCache.js +150 -0
- package/dist/mcpToolCache.js.map +1 -0
- package/dist/mcpTrust.d.ts +22 -0
- package/dist/mcpTrust.d.ts.map +1 -0
- package/dist/mcpTrust.js +104 -0
- package/dist/mcpTrust.js.map +1 -0
- package/dist/memory.d.ts +38 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/memory.js +151 -0
- package/dist/memory.js.map +1 -0
- package/dist/memoryIndex.d.ts +38 -0
- package/dist/memoryIndex.d.ts.map +1 -0
- package/dist/memoryIndex.js +142 -0
- package/dist/memoryIndex.js.map +1 -0
- package/dist/mentions.d.ts +33 -0
- package/dist/mentions.d.ts.map +1 -0
- package/dist/mentions.js +126 -0
- package/dist/mentions.js.map +1 -0
- package/dist/ollamaModels.d.ts +36 -0
- package/dist/ollamaModels.d.ts.map +1 -0
- package/dist/ollamaModels.js +83 -0
- package/dist/ollamaModels.js.map +1 -0
- package/dist/permissions.d.ts +72 -0
- package/dist/permissions.d.ts.map +1 -0
- package/dist/permissions.js +271 -0
- package/dist/permissions.js.map +1 -0
- package/dist/tools/extraTools.d.ts +80 -0
- package/dist/tools/extraTools.d.ts.map +1 -0
- package/dist/tools/extraTools.js +471 -0
- package/dist/tools/extraTools.js.map +1 -0
- package/dist/tools/readMemoryTool.d.ts +3 -0
- package/dist/tools/readMemoryTool.d.ts.map +1 -0
- package/dist/tools/readMemoryTool.js +115 -0
- package/dist/tools/readMemoryTool.js.map +1 -0
- package/dist/tools/taskTool.d.ts +119 -0
- package/dist/tools/taskTool.d.ts.map +1 -0
- package/dist/tools/taskTool.js +466 -0
- package/dist/tools/taskTool.js.map +1 -0
- package/dist/tools/testRunTool.d.ts +59 -0
- package/dist/tools/testRunTool.d.ts.map +1 -0
- package/dist/tools/testRunTool.js +308 -0
- package/dist/tools/testRunTool.js.map +1 -0
- package/dist/turnLog.d.ts +89 -0
- package/dist/turnLog.d.ts.map +1 -0
- package/dist/turnLog.js +469 -0
- package/dist/turnLog.js.map +1 -0
- package/package.json +35 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* test_run — the highest-leverage missing tool flagged in Bandit's
|
|
3
|
+
* 2026-05-10 self-eval. Closes the round-trip where the agent has to
|
|
4
|
+
* guess the test framework and shell out via raw run_command, fish
|
|
5
|
+
* through the output, and report fuzzily. Instead:
|
|
6
|
+
*
|
|
7
|
+
* 1. Auto-detect the framework from workspace files (vitest, jest,
|
|
8
|
+
* pytest, go, cargo, dotnet, npm-script fallback).
|
|
9
|
+
* 2. Run the appropriate command (with an optional `pattern` to
|
|
10
|
+
* scope to specific tests / files).
|
|
11
|
+
* 3. Parse the output into a tight summary: framework, cwd,
|
|
12
|
+
* `N passed, M failed`, the first few failure messages.
|
|
13
|
+
*
|
|
14
|
+
* Why a dedicated tool rather than "tell the model to run pnpm test":
|
|
15
|
+
* (a) detection is opinionated, doing it once is cheaper than the model
|
|
16
|
+
* guessing; (b) parsed output keeps tool-result size bounded so big
|
|
17
|
+
* suites don't flood the context with 5000 lines of pass logs;
|
|
18
|
+
* (c) the model gets a deterministic shape to react to, making
|
|
19
|
+
* fix-it loops actually closeable.
|
|
20
|
+
*/
|
|
21
|
+
import type { AgentTool, ToolExecutionContext } from '@burtson-labs/agent-core';
|
|
22
|
+
export type TestFramework = 'vitest' | 'jest' | 'pytest' | 'go' | 'cargo' | 'dotnet' | 'npm-script';
|
|
23
|
+
/**
|
|
24
|
+
* Detect the test framework by inspecting the workspace at `cwd`. Tries
|
|
25
|
+
* config files first, then dependency lookups, finally falls back to a
|
|
26
|
+
* generic `npm test` script if one exists. Returns `null` if nothing
|
|
27
|
+
* looks runnable — the tool surfaces a clear "I don't know how to test
|
|
28
|
+
* this" message rather than blindly running `npm test` and hanging.
|
|
29
|
+
*/
|
|
30
|
+
export declare function detectTestFramework(ctx: ToolExecutionContext, cwd: string): Promise<TestFramework | null>;
|
|
31
|
+
/**
|
|
32
|
+
* Build the command + args for a given framework. `pattern` is the
|
|
33
|
+
* user-supplied test name / file filter. Each framework's filter flag
|
|
34
|
+
* is different — handled here so the agent doesn't have to know.
|
|
35
|
+
*/
|
|
36
|
+
export declare function buildTestCommand(framework: TestFramework, pattern?: string): {
|
|
37
|
+
cmd: string;
|
|
38
|
+
args: string[];
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Parse stdout+stderr from a test run into a structured summary. We
|
|
42
|
+
* extract the `N passed, M failed` headline plus the first few
|
|
43
|
+
* failure snippets. Best-effort per framework — when parsing fails,
|
|
44
|
+
* we fall back to "(unable to parse summary, see raw output)" and
|
|
45
|
+
* the agent still gets the raw text.
|
|
46
|
+
*/
|
|
47
|
+
export interface ParsedTestSummary {
|
|
48
|
+
passed?: number;
|
|
49
|
+
failed?: number;
|
|
50
|
+
skipped?: number;
|
|
51
|
+
/** Brief one-line description of each failed test (first MAX_FAILURE_SNIPPETS). */
|
|
52
|
+
failureNames: string[];
|
|
53
|
+
}
|
|
54
|
+
export declare function parseTestOutput(framework: TestFramework, stdout: string, stderr: string): ParsedTestSummary;
|
|
55
|
+
/**
|
|
56
|
+
* The agent-facing tool.
|
|
57
|
+
*/
|
|
58
|
+
export declare function buildTestRunTool(): AgentTool;
|
|
59
|
+
//# sourceMappingURL=testRunTool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"testRunTool.d.ts","sourceRoot":"","sources":["../../src/tools/testRunTool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAc,oBAAoB,EAAE,MAAM,0BAA0B,CAAC;AAE5F,MAAM,MAAM,aAAa,GACrB,QAAQ,GACR,MAAM,GACN,QAAQ,GACR,IAAI,GACJ,OAAO,GACP,QAAQ,GACR,YAAY,CAAC;AAKjB;;;;;;GAMG;AACH,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,oBAAoB,EACzB,GAAG,EAAE,MAAM,GACV,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC,CAkD/B;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,EAAE,CAAA;CAAE,CA4B5G;AAED;;;;;;GAMG;AACH,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mFAAmF;IACnF,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED,wBAAgB,eAAe,CAAC,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,iBAAiB,CAqF3G;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,SAAS,CAoE5C"}
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* test_run — the highest-leverage missing tool flagged in Bandit's
|
|
4
|
+
* 2026-05-10 self-eval. Closes the round-trip where the agent has to
|
|
5
|
+
* guess the test framework and shell out via raw run_command, fish
|
|
6
|
+
* through the output, and report fuzzily. Instead:
|
|
7
|
+
*
|
|
8
|
+
* 1. Auto-detect the framework from workspace files (vitest, jest,
|
|
9
|
+
* pytest, go, cargo, dotnet, npm-script fallback).
|
|
10
|
+
* 2. Run the appropriate command (with an optional `pattern` to
|
|
11
|
+
* scope to specific tests / files).
|
|
12
|
+
* 3. Parse the output into a tight summary: framework, cwd,
|
|
13
|
+
* `N passed, M failed`, the first few failure messages.
|
|
14
|
+
*
|
|
15
|
+
* Why a dedicated tool rather than "tell the model to run pnpm test":
|
|
16
|
+
* (a) detection is opinionated, doing it once is cheaper than the model
|
|
17
|
+
* guessing; (b) parsed output keeps tool-result size bounded so big
|
|
18
|
+
* suites don't flood the context with 5000 lines of pass logs;
|
|
19
|
+
* (c) the model gets a deterministic shape to react to, making
|
|
20
|
+
* fix-it loops actually closeable.
|
|
21
|
+
*/
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.detectTestFramework = detectTestFramework;
|
|
24
|
+
exports.buildTestCommand = buildTestCommand;
|
|
25
|
+
exports.parseTestOutput = parseTestOutput;
|
|
26
|
+
exports.buildTestRunTool = buildTestRunTool;
|
|
27
|
+
const MAX_OUTPUT_BYTES = 16 * 1024;
|
|
28
|
+
const MAX_FAILURE_SNIPPETS = 8;
|
|
29
|
+
/**
|
|
30
|
+
* Detect the test framework by inspecting the workspace at `cwd`. Tries
|
|
31
|
+
* config files first, then dependency lookups, finally falls back to a
|
|
32
|
+
* generic `npm test` script if one exists. Returns `null` if nothing
|
|
33
|
+
* looks runnable — the tool surfaces a clear "I don't know how to test
|
|
34
|
+
* this" message rather than blindly running `npm test` and hanging.
|
|
35
|
+
*/
|
|
36
|
+
async function detectTestFramework(ctx, cwd) {
|
|
37
|
+
// Read package.json once — used by vitest/jest detection AND by the
|
|
38
|
+
// npm-script fallback. Tolerate JSON parse errors silently; some
|
|
39
|
+
// monorepo roots have package.json with comments / pragma lines.
|
|
40
|
+
let pkg;
|
|
41
|
+
try {
|
|
42
|
+
const raw = await ctx.readFile(`${cwd}/package.json`);
|
|
43
|
+
pkg = JSON.parse(raw);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// No package.json or invalid — that's fine, try the non-JS frameworks.
|
|
47
|
+
}
|
|
48
|
+
// ── JS/TS frameworks (vitest first since it's strict superset of
|
|
49
|
+
// jest's matcher API on this codebase and more common in 2026) ────
|
|
50
|
+
if (pkg) {
|
|
51
|
+
const allDeps = { ...(pkg.dependencies ?? {}), ...(pkg.devDependencies ?? {}) };
|
|
52
|
+
if (allDeps.vitest)
|
|
53
|
+
return 'vitest';
|
|
54
|
+
if (allDeps.jest || allDeps['ts-jest'])
|
|
55
|
+
return 'jest';
|
|
56
|
+
}
|
|
57
|
+
// Config-file probes for the (rare) case of zero-dep test setups.
|
|
58
|
+
for (const candidate of ['vitest.config.ts', 'vitest.config.js', 'vitest.config.mjs']) {
|
|
59
|
+
if (await fileExists(ctx, `${cwd}/${candidate}`))
|
|
60
|
+
return 'vitest';
|
|
61
|
+
}
|
|
62
|
+
for (const candidate of ['jest.config.ts', 'jest.config.js', 'jest.config.mjs']) {
|
|
63
|
+
if (await fileExists(ctx, `${cwd}/${candidate}`))
|
|
64
|
+
return 'jest';
|
|
65
|
+
}
|
|
66
|
+
// ── Python ──────────────────────────────────────────────────────────
|
|
67
|
+
if ((await fileExists(ctx, `${cwd}/pytest.ini`)) ||
|
|
68
|
+
(await fileExists(ctx, `${cwd}/pyproject.toml`)) ||
|
|
69
|
+
(await fileExists(ctx, `${cwd}/setup.cfg`))) {
|
|
70
|
+
return 'pytest';
|
|
71
|
+
}
|
|
72
|
+
// ── Go ──────────────────────────────────────────────────────────────
|
|
73
|
+
if (await fileExists(ctx, `${cwd}/go.mod`))
|
|
74
|
+
return 'go';
|
|
75
|
+
// ── Rust ────────────────────────────────────────────────────────────
|
|
76
|
+
if (await fileExists(ctx, `${cwd}/Cargo.toml`))
|
|
77
|
+
return 'cargo';
|
|
78
|
+
// ── .NET — sln OR any csproj at the root ───────────────────────────
|
|
79
|
+
if (await hasFileMatching(ctx, cwd, '*.sln'))
|
|
80
|
+
return 'dotnet';
|
|
81
|
+
if (await hasFileMatching(ctx, cwd, '*.csproj'))
|
|
82
|
+
return 'dotnet';
|
|
83
|
+
// ── Generic npm-script fallback ─────────────────────────────────────
|
|
84
|
+
if (pkg?.scripts?.test)
|
|
85
|
+
return 'npm-script';
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Build the command + args for a given framework. `pattern` is the
|
|
90
|
+
* user-supplied test name / file filter. Each framework's filter flag
|
|
91
|
+
* is different — handled here so the agent doesn't have to know.
|
|
92
|
+
*/
|
|
93
|
+
function buildTestCommand(framework, pattern) {
|
|
94
|
+
switch (framework) {
|
|
95
|
+
case 'vitest':
|
|
96
|
+
// `vitest run` is non-watch; --reporter=default produces the
|
|
97
|
+
// summary line we parse. `pnpm test` would also work in
|
|
98
|
+
// workspaces that wrap vitest, but invoking vitest directly is
|
|
99
|
+
// more deterministic and avoids the workspace's prepush hooks.
|
|
100
|
+
return { cmd: 'npx', args: ['vitest', 'run', ...(pattern ? [pattern] : [])] };
|
|
101
|
+
case 'jest':
|
|
102
|
+
return { cmd: 'npx', args: ['jest', '--ci', ...(pattern ? [pattern] : [])] };
|
|
103
|
+
case 'pytest':
|
|
104
|
+
// -q for short output; pytest's default is too verbose to fit
|
|
105
|
+
// a useful failure list inside MAX_OUTPUT_BYTES.
|
|
106
|
+
return { cmd: 'pytest', args: ['-q', ...(pattern ? ['-k', pattern] : [])] };
|
|
107
|
+
case 'go':
|
|
108
|
+
// `./...` recurses; pattern goes as `-run` regex.
|
|
109
|
+
return { cmd: 'go', args: ['test', ...(pattern ? ['-run', pattern] : []), './...'] };
|
|
110
|
+
case 'cargo':
|
|
111
|
+
return { cmd: 'cargo', args: ['test', ...(pattern ? [pattern] : [])] };
|
|
112
|
+
case 'dotnet':
|
|
113
|
+
return { cmd: 'dotnet', args: ['test', '--nologo', '--verbosity', 'quiet', ...(pattern ? ['--filter', pattern] : [])] };
|
|
114
|
+
case 'npm-script':
|
|
115
|
+
// Generic fallback: respect the project's own `test` script.
|
|
116
|
+
// Pattern isn't standardized for `npm test`, so we ignore it
|
|
117
|
+
// and tell the agent to use a framework-specific override
|
|
118
|
+
// if needed.
|
|
119
|
+
return { cmd: 'npm', args: ['test', '--silent'] };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
function parseTestOutput(framework, stdout, stderr) {
|
|
123
|
+
const combined = `${stdout}\n${stderr}`;
|
|
124
|
+
const result = { failureNames: [] };
|
|
125
|
+
// vitest: `Tests N passed (N)` or `Tests M failed | N passed (N+M)`
|
|
126
|
+
if (framework === 'vitest') {
|
|
127
|
+
const m = combined.match(/Tests\s+(?:(\d+)\s+failed\s*\|\s*)?(\d+)\s+passed(?:\s*\|\s*(\d+)\s+skipped)?\s*\((\d+)\)/);
|
|
128
|
+
if (m) {
|
|
129
|
+
result.failed = m[1] ? parseInt(m[1], 10) : 0;
|
|
130
|
+
result.passed = parseInt(m[2], 10);
|
|
131
|
+
result.skipped = m[3] ? parseInt(m[3], 10) : 0;
|
|
132
|
+
}
|
|
133
|
+
// Failure rows look like `FAIL test/foo.test.ts > nested > test name`.
|
|
134
|
+
const fails = [...combined.matchAll(/FAIL\s+(.+?)(?:\n|$)/g)];
|
|
135
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
136
|
+
}
|
|
137
|
+
// jest: `Tests: N passed, M failed, K total`
|
|
138
|
+
if (framework === 'jest') {
|
|
139
|
+
const m = combined.match(/Tests:\s+(?:(\d+)\s+failed,\s+)?(\d+)\s+passed,\s+(\d+)\s+total/);
|
|
140
|
+
if (m) {
|
|
141
|
+
result.failed = m[1] ? parseInt(m[1], 10) : 0;
|
|
142
|
+
result.passed = parseInt(m[2], 10);
|
|
143
|
+
}
|
|
144
|
+
const fails = [...combined.matchAll(/●\s+(.+?)(?:\n|$)/g)];
|
|
145
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
146
|
+
}
|
|
147
|
+
// pytest: `N failed, M passed in T.Ts` or `M passed in T.Ts`
|
|
148
|
+
if (framework === 'pytest') {
|
|
149
|
+
const m = combined.match(/(?:(\d+)\s+failed[,\s])?\s*(\d+)\s+passed(?:[,\s]+(\d+)\s+skipped)?\s+in\s+[\d.]+s/);
|
|
150
|
+
if (m) {
|
|
151
|
+
result.failed = m[1] ? parseInt(m[1], 10) : 0;
|
|
152
|
+
result.passed = parseInt(m[2], 10);
|
|
153
|
+
result.skipped = m[3] ? parseInt(m[3], 10) : 0;
|
|
154
|
+
}
|
|
155
|
+
// pytest's failed-list lines look like `FAILED path::testname` or
|
|
156
|
+
// `FAILED path::testname - <reason>`. The `::` separator is the
|
|
157
|
+
// marker — without it we end up matching the `FAILED` token in the
|
|
158
|
+
// dashed summary banner (e.g. `======== 1 failed ...`) and capture
|
|
159
|
+
// `========` as the "failure name."
|
|
160
|
+
const fails = [...combined.matchAll(/FAILED\s+(\S+::\S+)/g)];
|
|
161
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
162
|
+
}
|
|
163
|
+
// go: `--- FAIL: TestName` rows; summary is `ok` per package.
|
|
164
|
+
if (framework === 'go') {
|
|
165
|
+
const fails = [...combined.matchAll(/---\s+FAIL:\s+(\S+)/g)];
|
|
166
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
167
|
+
result.failed = result.failureNames.length;
|
|
168
|
+
// `PASS: TestName` lines, optionally with subtests.
|
|
169
|
+
const passes = [...combined.matchAll(/---\s+PASS:\s+/g)];
|
|
170
|
+
result.passed = passes.length;
|
|
171
|
+
}
|
|
172
|
+
// cargo: `test result: FAILED. N passed; M failed; ...` or `ok. N passed`.
|
|
173
|
+
if (framework === 'cargo') {
|
|
174
|
+
const m = combined.match(/test result:\s+(?:ok|FAILED)\.\s+(\d+)\s+passed;\s+(\d+)\s+failed(?:;\s+(\d+)\s+ignored)?/);
|
|
175
|
+
if (m) {
|
|
176
|
+
result.passed = parseInt(m[1], 10);
|
|
177
|
+
result.failed = parseInt(m[2], 10);
|
|
178
|
+
result.skipped = m[3] ? parseInt(m[3], 10) : 0;
|
|
179
|
+
}
|
|
180
|
+
const fails = [...combined.matchAll(/test\s+(\S+)\s+\.\.\.\s+FAILED/g)];
|
|
181
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
182
|
+
}
|
|
183
|
+
// dotnet: `Passed! - Failed: M, Passed: N, ...`
|
|
184
|
+
if (framework === 'dotnet') {
|
|
185
|
+
const m = combined.match(/Failed:\s+(\d+),\s+Passed:\s+(\d+)(?:,\s+Skipped:\s+(\d+))?/);
|
|
186
|
+
if (m) {
|
|
187
|
+
result.failed = parseInt(m[1], 10);
|
|
188
|
+
result.passed = parseInt(m[2], 10);
|
|
189
|
+
result.skipped = m[3] ? parseInt(m[3], 10) : 0;
|
|
190
|
+
}
|
|
191
|
+
const fails = [...combined.matchAll(/Failed\s+(\S+)/g)];
|
|
192
|
+
result.failureNames = fails.slice(0, MAX_FAILURE_SNIPPETS).map((m2) => m2[1].trim());
|
|
193
|
+
}
|
|
194
|
+
// npm-script: no standard parser — we don't know which framework the
|
|
195
|
+
// user's `test` script wraps. Leave the summary blank; the agent reads
|
|
196
|
+
// the raw output. (If the project wires a real framework, the
|
|
197
|
+
// dedicated branch above already caught it.)
|
|
198
|
+
return result;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* The agent-facing tool.
|
|
202
|
+
*/
|
|
203
|
+
function buildTestRunTool() {
|
|
204
|
+
return {
|
|
205
|
+
name: 'test_run',
|
|
206
|
+
description: 'Run the project\'s test suite and report a parsed summary. Auto-detects the framework (vitest, jest, pytest, go test, cargo test, dotnet test, or the package.json `test` script). Use this after editing code to verify nothing broke — the result tells you exactly how many tests passed/failed and which failed by name, so you can react without re-reading thousands of lines of test output. Pass `pattern` to scope to specific tests when iterating on a fix. Pass `cwd` for monorepos where the framework lives in a subdirectory (e.g. `packages/agent-core`).',
|
|
207
|
+
parameters: [
|
|
208
|
+
{ name: 'pattern', description: 'Optional test name / file filter. Each framework maps it to its native flag (vitest/cargo/jest/dotnet: positional; pytest: -k; go: -run regex). Skipped for npm-script.', required: false },
|
|
209
|
+
{ name: 'cwd', description: 'Optional subdirectory of the workspace to run in. Defaults to the workspace root. Useful in monorepos to scope to one package.', required: false },
|
|
210
|
+
{ name: 'framework', description: 'Optional override when auto-detection picks the wrong one (e.g. monorepos with both jest and vitest). Must be one of: vitest, jest, pytest, go, cargo, dotnet, npm-script.', required: false }
|
|
211
|
+
],
|
|
212
|
+
async execute(params, ctx) {
|
|
213
|
+
const cwd = params.cwd?.trim()
|
|
214
|
+
? resolveCwd(ctx.workspaceRoot, params.cwd.trim())
|
|
215
|
+
: ctx.workspaceRoot;
|
|
216
|
+
const forced = params.framework?.trim();
|
|
217
|
+
const framework = forced ?? (await detectTestFramework(ctx, cwd));
|
|
218
|
+
if (!framework) {
|
|
219
|
+
return {
|
|
220
|
+
output: `Could not detect a test framework in ${displayCwd(cwd, ctx.workspaceRoot)}. ` +
|
|
221
|
+
`No vitest/jest/pytest/Cargo.toml/go.mod/*.csproj/sln found, and no \`test\` script in package.json. ` +
|
|
222
|
+
`If a framework is present, pass it via the \`framework\` parameter (one of: vitest, jest, pytest, go, cargo, dotnet, npm-script).`,
|
|
223
|
+
isError: true
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
const { cmd, args } = buildTestCommand(framework, params.pattern?.trim() || undefined);
|
|
227
|
+
const run = await ctx.runCommand(cmd, args, cwd);
|
|
228
|
+
const stdout = truncate(run.stdout, MAX_OUTPUT_BYTES);
|
|
229
|
+
const stderr = truncate(run.stderr, MAX_OUTPUT_BYTES);
|
|
230
|
+
const summary = parseTestOutput(framework, stdout, stderr);
|
|
231
|
+
const lines = [];
|
|
232
|
+
const cwdLabel = displayCwd(cwd, ctx.workspaceRoot);
|
|
233
|
+
const status = run.exitCode === 0 ? 'PASS' : 'FAIL';
|
|
234
|
+
lines.push(`${status} · ${framework} in ${cwdLabel}`);
|
|
235
|
+
if (summary.passed != null || summary.failed != null) {
|
|
236
|
+
const parts = [];
|
|
237
|
+
if (summary.passed != null)
|
|
238
|
+
parts.push(`${summary.passed} passed`);
|
|
239
|
+
if (summary.failed != null && summary.failed > 0)
|
|
240
|
+
parts.push(`${summary.failed} failed`);
|
|
241
|
+
if (summary.skipped != null && summary.skipped > 0)
|
|
242
|
+
parts.push(`${summary.skipped} skipped`);
|
|
243
|
+
lines.push(parts.join(', '));
|
|
244
|
+
}
|
|
245
|
+
if (summary.failureNames.length > 0) {
|
|
246
|
+
lines.push('');
|
|
247
|
+
lines.push('Failures:');
|
|
248
|
+
for (const name of summary.failureNames)
|
|
249
|
+
lines.push(` - ${name}`);
|
|
250
|
+
}
|
|
251
|
+
// Always tail the raw output (truncated) so the agent has the
|
|
252
|
+
// actual stack/diagnostic when summary parsing missed something.
|
|
253
|
+
// Cap the appended raw block tightly — the parsed summary is the
|
|
254
|
+
// primary product; raw text is the fallback.
|
|
255
|
+
const rawTail = (stdout + (stderr ? `\n[stderr]\n${stderr}` : '')).slice(-4 * 1024);
|
|
256
|
+
lines.push('');
|
|
257
|
+
lines.push('--- raw output (tail) ---');
|
|
258
|
+
lines.push(rawTail);
|
|
259
|
+
return {
|
|
260
|
+
output: lines.join('\n'),
|
|
261
|
+
// Non-zero exit OR any failed-count from parsed summary marks it
|
|
262
|
+
// as an error so the loop's existing tool-error handling kicks in
|
|
263
|
+
// (e.g. the model knows to fix-then-retry rather than treat the
|
|
264
|
+
// tool call as a clean success).
|
|
265
|
+
isError: run.exitCode !== 0 || (summary.failed != null && summary.failed > 0)
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
// ── helpers ────────────────────────────────────────────────────────────
|
|
271
|
+
async function fileExists(ctx, absPath) {
|
|
272
|
+
try {
|
|
273
|
+
await ctx.readFile(absPath);
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
async function hasFileMatching(ctx, cwd, glob) {
|
|
281
|
+
try {
|
|
282
|
+
const matches = await ctx.listFiles(glob, cwd);
|
|
283
|
+
return matches.length > 0;
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
return false;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
function resolveCwd(workspaceRoot, cwd) {
|
|
290
|
+
if (cwd.startsWith('/'))
|
|
291
|
+
return cwd;
|
|
292
|
+
return `${workspaceRoot}/${cwd}`.replace(/\/+/g, '/');
|
|
293
|
+
}
|
|
294
|
+
function displayCwd(absolute, workspaceRoot) {
|
|
295
|
+
if (absolute === workspaceRoot)
|
|
296
|
+
return '.';
|
|
297
|
+
if (absolute.startsWith(workspaceRoot + '/'))
|
|
298
|
+
return absolute.slice(workspaceRoot.length + 1);
|
|
299
|
+
return absolute;
|
|
300
|
+
}
|
|
301
|
+
function truncate(text, max) {
|
|
302
|
+
if (!text)
|
|
303
|
+
return '';
|
|
304
|
+
if (text.length <= max)
|
|
305
|
+
return text;
|
|
306
|
+
return text.slice(0, max) + '\n… (truncated)';
|
|
307
|
+
}
|
|
308
|
+
//# sourceMappingURL=testRunTool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"testRunTool.js","sourceRoot":"","sources":["../../src/tools/testRunTool.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;AAuBH,kDAqDC;AAOD,4CA4BC;AAiBD,0CAqFC;AAKD,4CAoEC;AAjRD,MAAM,gBAAgB,GAAG,EAAE,GAAG,IAAI,CAAC;AACnC,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAE/B;;;;;;GAMG;AACI,KAAK,UAAU,mBAAmB,CACvC,GAAyB,EACzB,GAAW;IAEX,oEAAoE;IACpE,iEAAiE;IACjE,iEAAiE;IACjE,IAAI,GAAsI,CAAC;IAC3I,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,GAAG,GAAG,eAAe,CAAC,CAAC;QACtD,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,uEAAuE;IACzE,CAAC;IAED,kEAAkE;IAClE,oEAAoE;IACpE,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,OAAO,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC,EAAE,CAAC;QAChF,IAAI,OAAO,CAAC,MAAM;YAAE,OAAO,QAAQ,CAAC;QACpC,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,SAAS,CAAC;YAAE,OAAO,MAAM,CAAC;IACxD,CAAC;IACD,kEAAkE;IAClE,KAAK,MAAM,SAAS,IAAI,CAAC,kBAAkB,EAAE,kBAAkB,EAAE,mBAAmB,CAAC,EAAE,CAAC;QACtF,IAAI,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;YAAE,OAAO,QAAQ,CAAC;IACpE,CAAC;IACD,KAAK,MAAM,SAAS,IAAI,CAAC,gBAAgB,EAAE,gBAAgB,EAAE,iBAAiB,CAAC,EAAE,CAAC;QAChF,IAAI,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;YAAE,OAAO,MAAM,CAAC;IAClE,CAAC;IAED,uEAAuE;IACvE,IACE,CAAC,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;QAC5C,CAAC,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,iBAAiB,CAAC,CAAC;QAChD,CAAC,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,YAAY,CAAC,CAAC,EAC3C,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,uEAAuE;IACvE,IAAI,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,SAAS,CAAC;QAAE,OAAO,IAAI,CAAC;IAExD,uEAAuE;IACvE,IAAI,MAAM,UAAU,CAAC,GAAG,EAAE,GAAG,GAAG,aAAa,CAAC;QAAE,OAAO,OAAO,CAAC;IAE/D,sEAAsE;IACtE,IAAI,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC;QAAE,OAAO,QAAQ,CAAC;IAC9D,IAAI,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,UAAU,CAAC;QAAE,OAAO,QAAQ,CAAC;IAEjE,uEAAuE;IACvE,IAAI,GAAG,EAAE,OAAO,EAAE,IAAI;QAAE,OAAO,YAAY,CAAC;IAE5C,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,SAAgB,gBAAgB,CAAC,SAAwB,EAAE,OAAgB;IACzE,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,QAAQ;YACX,6DAA6D;YAC7D,wDAAwD;YACxD,+DAA+D;YAC/D,+DAA+D;YAC/D,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAChF,KAAK,MAAM;YACT,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC/E,KAAK,QAAQ;YACX,8DAA8D;YAC9D,iDAAiD;YACjD,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC9E,KAAK,IAAI;YACP,kDAAkD;YAClD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,OAAO,CAAC,EAAE,CAAC;QACvF,KAAK,OAAO;YACV,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QACzE,KAAK,QAAQ;YACX,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC1H,KAAK,YAAY;YACf,6DAA6D;YAC7D,6DAA6D;YAC7D,0DAA0D;YAC1D,aAAa;YACb,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,UAAU,CAAC,EAAE,CAAC;IACtD,CAAC;AACH,CAAC;AAiBD,SAAgB,eAAe,CAAC,SAAwB,EAAE,MAAc,EAAE,MAAc;IACtF,MAAM,QAAQ,GAAG,GAAG,MAAM,KAAK,MAAM,EAAE,CAAC;IACxC,MAAM,MAAM,GAAsB,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;IAEvD,sEAAsE;IACtE,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,2FAA2F,CAAC,CAAC;QACtH,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QACD,wEAAwE;QACxE,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC;QAC9D,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,CAAC;IAED,mDAAmD;IACnD,IAAI,SAAS,KAAK,MAAM,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAC;QAC5F,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,CAAC;QACD,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,CAAC;IAED,6DAA6D;IAC7D,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,oFAAoF,CAAC,CAAC;QAC/G,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QACD,kEAAkE;QAClE,gEAAgE;QAChE,mEAAmE;QACnE,mEAAmE;QACnE,oCAAoC;QACpC,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,CAAC;QAC7D,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,CAAC;IAED,8DAA8D;IAC9D,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,CAAC;QAC7D,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACrF,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC;QAC3C,oDAAoD;QACpD,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAChC,CAAC;IAED,2EAA2E;IAC3E,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,2FAA2F,CAAC,CAAC;QACtH,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QACD,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,iCAAiC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,CAAC;IAED,yDAAyD;IACzD,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,6DAA6D,CAAC,CAAC;QACxF,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACnC,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QACD,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC;QACxD,MAAM,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,CAAC;IAED,qEAAqE;IACrE,uEAAuE;IACvE,8DAA8D;IAC9D,6CAA6C;IAE7C,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB;IAC9B,OAAO;QACL,IAAI,EAAE,UAAU;QAChB,WAAW,EACT,2iBAA2iB;QAC7iB,UAAU,EAAE;YACV,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,yKAAyK,EAAE,QAAQ,EAAE,KAAK,EAAE;YAC5N,EAAE,IAAI,EAAE,KAAK,EAAE,WAAW,EAAE,gIAAgI,EAAE,QAAQ,EAAE,KAAK,EAAE;YAC/K,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,4KAA4K,EAAE,QAAQ,EAAE,KAAK,EAAE;SAClO;QACD,KAAK,CAAC,OAAO,CAAC,MAA8B,EAAE,GAAyB;YACrE,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,EAAE,IAAI,EAAE;gBAC5B,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;gBAClD,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC;YAEtB,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,IAAI,EAA+B,CAAC;YACrE,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,OAAO;oBACL,MAAM,EACJ,wCAAwC,UAAU,CAAC,GAAG,EAAE,GAAG,CAAC,aAAa,CAAC,IAAI;wBAC9E,sGAAsG;wBACtG,mIAAmI;oBACrI,OAAO,EAAE,IAAI;iBACd,CAAC;YACJ,CAAC;YAED,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,SAAS,EAAE,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC,CAAC;YACvF,MAAM,GAAG,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;YACtD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAE3D,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,EAAE,GAAG,CAAC,aAAa,CAAC,CAAC;YACpD,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YACpD,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,MAAM,SAAS,OAAO,QAAQ,EAAE,CAAC,CAAC;YACtD,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;gBACrD,MAAM,KAAK,GAAa,EAAE,CAAC;gBAC3B,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI;oBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,SAAS,CAAC,CAAC;gBACnE,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,SAAS,CAAC,CAAC;gBACzF,IAAI,OAAO,CAAC,OAAO,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,UAAU,CAAC,CAAC;gBAC7F,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YAC/B,CAAC;YACD,IAAI,OAAO,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;gBACxB,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,YAAY;oBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;YACrE,CAAC;YACD,8DAA8D;YAC9D,iEAAiE;YACjE,iEAAiE;YACjE,6CAA6C;YAC7C,MAAM,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;YACpF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAEpB,OAAO;gBACL,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;gBACxB,iEAAiE;gBACjE,kEAAkE;gBAClE,gEAAgE;gBAChE,iCAAiC;gBACjC,OAAO,EAAE,GAAG,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;aAC9E,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,0EAA0E;AAE1E,KAAK,UAAU,UAAU,CAAC,GAAyB,EAAE,OAAe;IAClE,IAAI,CAAC;QACH,MAAM,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,GAAyB,EAAE,GAAW,EAAE,IAAY;IACjF,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC/C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,aAAqB,EAAE,GAAW;IACpD,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC;IACpC,OAAO,GAAG,aAAa,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB,EAAE,aAAqB;IACzD,IAAI,QAAQ,KAAK,aAAa;QAAE,OAAO,GAAG,CAAC;IAC3C,IAAI,QAAQ,CAAC,UAAU,CAAC,aAAa,GAAG,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9F,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY,EAAE,GAAW;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACpC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,iBAAiB,CAAC;AAChD,CAAC"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-use transcript logger — records every LLM response, tool call, and
|
|
3
|
+
* tool result to a JSONL file under the workspace's .bandit/turns/ folder.
|
|
4
|
+
*
|
|
5
|
+
* Motivation: the agent occasionally claims to have done something it
|
|
6
|
+
* didn't. Without a record of what it emitted, diagnosis is impossible.
|
|
7
|
+
* This writes a per-turn log we can point at later to answer "what did
|
|
8
|
+
* the model actually try?".
|
|
9
|
+
*
|
|
10
|
+
* Format: one JSON object per line. Events:
|
|
11
|
+
* { t, type: 'user-prompt', prompt }
|
|
12
|
+
* { t, type: 'llm-response', iteration, textPreview }
|
|
13
|
+
* { t, type: 'tool-execute', iteration, name, params }
|
|
14
|
+
* { t, type: 'tool-result', iteration, name, isError, outputPreview }
|
|
15
|
+
* { t, type: 'tool-error', iteration, name, error }
|
|
16
|
+
* { t, type: 'tool-blocked', iteration, name, reason }
|
|
17
|
+
* { t, type: 'permission-request', name, primary, risk }
|
|
18
|
+
* { t, type: 'permission-decision', name, primary, choice }
|
|
19
|
+
* { t, type: 'permission-denied', name, primary, source, reason }
|
|
20
|
+
* { t, type: 'final-response', iterations, hitLimit, finalPreview }
|
|
21
|
+
*
|
|
22
|
+
* Keeping this in host-kit so both CLI and extension can use the same
|
|
23
|
+
* logger without duplicating code.
|
|
24
|
+
*/
|
|
25
|
+
export interface TurnLogger {
|
|
26
|
+
append(event: Record<string, unknown>): Promise<void>;
|
|
27
|
+
readonly filePath: string;
|
|
28
|
+
}
|
|
29
|
+
export interface TurnLogEvent {
|
|
30
|
+
t?: string;
|
|
31
|
+
type: string;
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
}
|
|
34
|
+
export interface TurnTrace {
|
|
35
|
+
id: string;
|
|
36
|
+
filePath: string;
|
|
37
|
+
events: TurnLogEvent[];
|
|
38
|
+
summary: TurnTraceSummary;
|
|
39
|
+
}
|
|
40
|
+
export type TurnTraceScope = 'workspace' | 'global' | 'external';
|
|
41
|
+
export interface TurnTraceSummary {
|
|
42
|
+
id: string;
|
|
43
|
+
filePath: string;
|
|
44
|
+
scope: TurnTraceScope;
|
|
45
|
+
workspace: string;
|
|
46
|
+
startedAt?: string;
|
|
47
|
+
prompt?: string;
|
|
48
|
+
finalPreview?: string;
|
|
49
|
+
iterations: number;
|
|
50
|
+
hitLimit: boolean;
|
|
51
|
+
toolCalls: number;
|
|
52
|
+
tools: string[];
|
|
53
|
+
blockedTools: number;
|
|
54
|
+
errors: number;
|
|
55
|
+
retries: number;
|
|
56
|
+
nativeFallbacks: number;
|
|
57
|
+
permissionRequests: number;
|
|
58
|
+
permissionDecisions: number;
|
|
59
|
+
permissionDenials: number;
|
|
60
|
+
compactions: number;
|
|
61
|
+
checkpoints: number;
|
|
62
|
+
status: 'completed' | 'failed' | 'blocked' | 'cancelled' | 'unknown';
|
|
63
|
+
}
|
|
64
|
+
export interface TurnTraceListOptions {
|
|
65
|
+
limit?: number;
|
|
66
|
+
includeGlobal?: boolean;
|
|
67
|
+
status?: TurnTraceSummary['status'] | TurnTraceSummary['status'][];
|
|
68
|
+
}
|
|
69
|
+
interface ReadTurnTraceOptions {
|
|
70
|
+
workspaceRoot?: string;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Open a log file for a single agent turn. The filename includes a UTC
|
|
74
|
+
* timestamp and a random suffix so parallel turns (rare) can't collide.
|
|
75
|
+
*/
|
|
76
|
+
export declare function openTurnLog(workspaceRoot: string): Promise<TurnLogger>;
|
|
77
|
+
/** Truncate a string for log preview — protects against multi-MB tool outputs. */
|
|
78
|
+
export declare function previewText(s: unknown): string;
|
|
79
|
+
export declare function listTurnTraceFiles(workspaceRoot: string, optionsOrLimit?: number | TurnTraceListOptions): Promise<string[]>;
|
|
80
|
+
export declare function listTurnTraces(workspaceRoot: string, optionsOrLimit?: number | TurnTraceListOptions): Promise<TurnTrace[]>;
|
|
81
|
+
export declare function readTurnTrace(filePath: string, options?: ReadTurnTraceOptions): Promise<TurnTrace | null>;
|
|
82
|
+
export declare function readTurnTraceById(workspaceRoot: string, idOrPath: string, optionsOrLimit?: number | TurnTraceListOptions): Promise<TurnTrace | null>;
|
|
83
|
+
export declare function parseTurnLog(text: string): TurnLogEvent[];
|
|
84
|
+
export declare function summarizeTurnTrace(id: string, filePath: string, events: TurnLogEvent[], options?: ReadTurnTraceOptions): TurnTraceSummary;
|
|
85
|
+
export declare function formatTurnTraceMarkdown(trace: TurnTrace, options?: {
|
|
86
|
+
maxEvents?: number;
|
|
87
|
+
}): string;
|
|
88
|
+
export {};
|
|
89
|
+
//# sourceMappingURL=turnLog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turnLog.d.ts","sourceRoot":"","sources":["../src/turnLog.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AASH,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,OAAO,EAAE,gBAAgB,CAAC;CAC3B;AAED,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,QAAQ,GAAG,UAAU,CAAC;AAEjE,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,cAAc,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,WAAW,GAAG,QAAQ,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC;CACtE;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,CAAC,EAAE,gBAAgB,CAAC,QAAQ,CAAC,GAAG,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;CACpE;AAED,UAAU,oBAAoB;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAaD;;;GAGG;AACH,wBAAsB,WAAW,CAAC,aAAa,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAiB5E;AAED,kFAAkF;AAClF,wBAAgB,WAAW,CAAC,CAAC,EAAE,OAAO,GAAG,MAAM,CAG9C;AA6ED,wBAAsB,kBAAkB,CAAC,aAAa,EAAE,MAAM,EAAE,cAAc,GAAE,MAAM,GAAG,oBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAgBrI;AAoBD,wBAAsB,cAAc,CAAC,aAAa,EAAE,MAAM,EAAE,cAAc,GAAE,MAAM,GAAG,oBAAyB,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAcpI;AAED,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,oBAAyB,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAenH;AAED,wBAAsB,iBAAiB,CAAC,aAAa,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,cAAc,GAAE,MAAM,GAAG,oBAA0B,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAa/J;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,EAAE,CAczD;AAED,wBAAgB,kBAAkB,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,EAAE,OAAO,GAAE,oBAAyB,GAAG,gBAAgB,CA0F7I;AAED,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,GAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAO,GAAG,MAAM,CA8BtG"}
|