@dotsetlabs/bellwether 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +2 -2
- package/dist/baseline/golden-output.d.ts +0 -4
- package/dist/baseline/golden-output.js +2 -47
- package/dist/cli/commands/baseline-accept.js +14 -45
- package/dist/cli/commands/baseline.js +23 -78
- package/dist/cli/commands/check-formatters.d.ts +10 -0
- package/dist/cli/commands/check-formatters.js +160 -0
- package/dist/cli/commands/check.js +33 -241
- package/dist/cli/commands/contract.js +1 -13
- package/dist/cli/commands/explore.js +19 -66
- package/dist/cli/commands/watch.js +2 -3
- package/dist/cli/output.d.ts +0 -42
- package/dist/cli/output.js +73 -110
- package/dist/cli/utils/config-loader.d.ts +6 -0
- package/dist/cli/utils/config-loader.js +19 -0
- package/dist/cli/utils/error-hints.d.ts +9 -0
- package/dist/cli/utils/error-hints.js +128 -0
- package/dist/cli/utils/headers.js +2 -25
- package/dist/cli/utils/path-resolution.d.ts +10 -0
- package/dist/cli/utils/path-resolution.js +27 -0
- package/dist/cli/utils/report-loader.d.ts +9 -0
- package/dist/cli/utils/report-loader.js +31 -0
- package/dist/cli/utils/server-runtime.d.ts +16 -0
- package/dist/cli/utils/server-runtime.js +31 -0
- package/dist/config/defaults.d.ts +0 -1
- package/dist/config/defaults.js +0 -1
- package/dist/constants/core.d.ts +0 -42
- package/dist/constants/core.js +0 -50
- package/dist/contract/validator.js +2 -47
- package/dist/interview/question-category.d.ts +5 -0
- package/dist/interview/question-category.js +2 -0
- package/dist/interview/question-types.d.ts +80 -0
- package/dist/interview/question-types.js +2 -0
- package/dist/interview/schema-test-generator.d.ts +3 -29
- package/dist/interview/schema-test-generator.js +11 -286
- package/dist/interview/test-fixtures.d.ts +19 -0
- package/dist/interview/test-fixtures.js +2 -0
- package/dist/interview/types.d.ts +5 -80
- package/dist/persona/types.d.ts +3 -5
- package/dist/scenarios/types.d.ts +1 -1
- package/dist/transport/auth-errors.d.ts +15 -0
- package/dist/transport/auth-errors.js +22 -0
- package/dist/transport/http-transport.js +7 -9
- package/dist/transport/mcp-client.d.ts +0 -4
- package/dist/transport/mcp-client.js +13 -37
- package/dist/transport/sse-transport.d.ts +0 -1
- package/dist/transport/sse-transport.js +13 -28
- package/dist/utils/content-type.d.ts +14 -0
- package/dist/utils/content-type.js +37 -0
- package/dist/utils/http-headers.d.ts +9 -0
- package/dist/utils/http-headers.js +34 -0
- package/dist/utils/smart-truncate.js +2 -23
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [2.1.3] - 2026-02-18
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Check reporting utilities for CI**: Added dedicated check output formatting plus new GitHub Action scripts for metrics extraction and JSON-to-JUnit/SARIF conversion.
|
|
15
|
+
- **Shared CLI/runtime helper modules**: Added reusable helpers for server runtime setup, config/report loading, path resolution, and improved contextual error hints.
|
|
16
|
+
- **Docs generation bootstrap script**: Added `scripts/build-docs.mjs` so docs generation can self-bootstrap website dependencies.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- **Modular check and baseline command flows**: Refactored `check`/`baseline` internals to reduce coupling and make command behavior easier to maintain.
|
|
21
|
+
- **Transport/interview utility refactors**: Consolidated header/content-type/auth handling and split interview/question fixture types into clearer shared modules.
|
|
22
|
+
- **Documentation updates for current CLI behavior**: Refreshed docs and sidebar navigation for revised `check`/`baseline` behavior and the `validate-config` command.
|
|
23
|
+
|
|
24
|
+
### Removed
|
|
25
|
+
|
|
26
|
+
- **Unused core constants module**: Removed `src/constants/core.ts` after internal CLI/runtime simplification.
|
|
27
|
+
|
|
10
28
|
## [2.1.2] - 2026-02-16
|
|
11
29
|
|
|
12
30
|
### Added
|
package/README.md
CHANGED
|
@@ -132,9 +132,9 @@ Comparisons are **protocol-version-aware** — version-specific fields (annotati
|
|
|
132
132
|
## GitHub Action
|
|
133
133
|
|
|
134
134
|
```yaml
|
|
135
|
-
- uses: dotsetlabs/bellwether@v2.1.
|
|
135
|
+
- uses: dotsetlabs/bellwether@v2.1.3
|
|
136
136
|
with:
|
|
137
|
-
version: '2.1.
|
|
137
|
+
version: '2.1.3'
|
|
138
138
|
server-command: 'npx @mcp/your-server'
|
|
139
139
|
baseline-path: './bellwether-baseline.json'
|
|
140
140
|
fail-on-severity: 'warning'
|
|
@@ -155,8 +155,4 @@ export declare function deleteGoldenOutput(toolName: string, storePath: string,
|
|
|
155
155
|
* Compare current output against a golden output.
|
|
156
156
|
*/
|
|
157
157
|
export declare function compareWithGolden(golden: GoldenOutput, currentResponse: MCPToolCallResult): GoldenComparisonResult;
|
|
158
|
-
/**
|
|
159
|
-
* Compare all golden outputs against current tool responses.
|
|
160
|
-
*/
|
|
161
|
-
export declare function compareAllGoldens(storePath: string, getToolResponse: (toolName: string, args: Record<string, unknown>) => Promise<MCPToolCallResult>): Promise<GoldenComparisonResult[]>;
|
|
162
158
|
//# sourceMappingURL=golden-output.d.ts.map
|
|
@@ -9,6 +9,7 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
|
9
9
|
import { join, dirname } from 'path';
|
|
10
10
|
import { createHash } from 'crypto';
|
|
11
11
|
import { PATHS } from '../constants.js';
|
|
12
|
+
import { detectContentType as detectCommonContentType } from '../utils/content-type.js';
|
|
12
13
|
// Constants
|
|
13
14
|
const GOLDEN_STORE_VERSION = 1;
|
|
14
15
|
const DEFAULT_GOLDEN_DIR = '.bellwether/golden';
|
|
@@ -236,58 +237,12 @@ export function compareWithGolden(golden, currentResponse) {
|
|
|
236
237
|
summary: generateComparisonSummary(disallowedDiffs, mode),
|
|
237
238
|
};
|
|
238
239
|
}
|
|
239
|
-
/**
|
|
240
|
-
* Compare all golden outputs against current tool responses.
|
|
241
|
-
*/
|
|
242
|
-
export function compareAllGoldens(storePath, getToolResponse) {
|
|
243
|
-
const store = loadGoldenStore(storePath);
|
|
244
|
-
return Promise.all(store.outputs.map(async (golden) => {
|
|
245
|
-
try {
|
|
246
|
-
const response = await getToolResponse(golden.toolName, golden.inputArgs);
|
|
247
|
-
return compareWithGolden(golden, response);
|
|
248
|
-
}
|
|
249
|
-
catch (error) {
|
|
250
|
-
return {
|
|
251
|
-
toolName: golden.toolName,
|
|
252
|
-
passed: false,
|
|
253
|
-
severity: 'breaking',
|
|
254
|
-
mode: golden.tolerance.mode,
|
|
255
|
-
goldenCapturedAt: golden.capturedAt,
|
|
256
|
-
differences: [{
|
|
257
|
-
type: 'changed',
|
|
258
|
-
path: '$',
|
|
259
|
-
expected: 'successful response',
|
|
260
|
-
actual: `error: ${error instanceof Error ? error.message : String(error)}`,
|
|
261
|
-
allowed: false,
|
|
262
|
-
description: 'Tool call failed',
|
|
263
|
-
}],
|
|
264
|
-
summary: `Tool call failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
265
|
-
};
|
|
266
|
-
}
|
|
267
|
-
}));
|
|
268
|
-
}
|
|
269
240
|
// Helper functions
|
|
270
241
|
/**
|
|
271
242
|
* Detect content type from raw output.
|
|
272
243
|
*/
|
|
273
244
|
function detectContentType(raw) {
|
|
274
|
-
|
|
275
|
-
// Check for JSON
|
|
276
|
-
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) ||
|
|
277
|
-
(trimmed.startsWith('[') && trimmed.endsWith(']'))) {
|
|
278
|
-
try {
|
|
279
|
-
JSON.parse(trimmed);
|
|
280
|
-
return 'json';
|
|
281
|
-
}
|
|
282
|
-
catch {
|
|
283
|
-
// Not valid JSON
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
// Check for Markdown patterns
|
|
287
|
-
if (/^#|^\*{1,3}[^*]|\[.*\]\(.*\)|^```/.test(trimmed)) {
|
|
288
|
-
return 'markdown';
|
|
289
|
-
}
|
|
290
|
-
return 'text';
|
|
245
|
+
return detectCommonContentType(raw);
|
|
291
246
|
}
|
|
292
247
|
/**
|
|
293
248
|
* Compute a hash of content for quick comparison.
|
|
@@ -11,51 +11,14 @@
|
|
|
11
11
|
* bellwether baseline accept --dry-run # Show what would be accepted
|
|
12
12
|
*/
|
|
13
13
|
import { Command } from 'commander';
|
|
14
|
-
import { existsSync
|
|
14
|
+
import { existsSync } from 'fs';
|
|
15
15
|
import { join } from 'path';
|
|
16
16
|
import { createBaseline, saveBaseline, loadBaseline, compareBaselines, acceptDrift, formatDiffText, } from '../../baseline/index.js';
|
|
17
|
-
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
18
17
|
import { EXIT_CODES } from '../../constants.js';
|
|
19
18
|
import * as output from '../output.js';
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
}
|
|
24
|
-
catch (error) {
|
|
25
|
-
if (error instanceof ConfigNotFoundError) {
|
|
26
|
-
output.error(error.message);
|
|
27
|
-
process.exit(EXIT_CODES.ERROR);
|
|
28
|
-
}
|
|
29
|
-
throw error;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
/**
|
|
33
|
-
* Load interview result from JSON report.
|
|
34
|
-
*/
|
|
35
|
-
function loadInterviewResult(reportPath) {
|
|
36
|
-
if (!existsSync(reportPath)) {
|
|
37
|
-
throw new Error(`Test report not found: ${reportPath}\n\n` +
|
|
38
|
-
'Run `bellwether check` first to generate a report.\n' +
|
|
39
|
-
'Configure in bellwether.yaml:\n' +
|
|
40
|
-
' output:\n' +
|
|
41
|
-
' format: json # or "both" for JSON + markdown');
|
|
42
|
-
}
|
|
43
|
-
const content = readFileSync(reportPath, 'utf-8');
|
|
44
|
-
let result;
|
|
45
|
-
try {
|
|
46
|
-
result = JSON.parse(content);
|
|
47
|
-
}
|
|
48
|
-
catch (error) {
|
|
49
|
-
throw new Error(`Invalid JSON in report file ${reportPath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
50
|
-
}
|
|
51
|
-
// Validate that this is a check mode result
|
|
52
|
-
if (result.metadata.model && result.metadata.model !== 'check') {
|
|
53
|
-
throw new Error(`Baseline operations only work with check mode results.\n\n` +
|
|
54
|
-
`The report at ${reportPath} was created with explore mode.\n` +
|
|
55
|
-
'Run `bellwether check` to generate a check mode report first.');
|
|
56
|
-
}
|
|
57
|
-
return result;
|
|
58
|
-
}
|
|
19
|
+
import { loadConfigOrExit } from '../utils/config-loader.js';
|
|
20
|
+
import { loadCheckInterviewResult } from '../utils/report-loader.js';
|
|
21
|
+
import { resolvePathFromOutputDirOrCwd } from '../utils/path-resolution.js';
|
|
59
22
|
export const acceptCommand = new Command('accept')
|
|
60
23
|
.description('Accept detected drift as intentional and update the baseline')
|
|
61
24
|
.option('-c, --config <path>', 'Path to config file')
|
|
@@ -74,9 +37,7 @@ export const acceptCommand = new Command('accept')
|
|
|
74
37
|
process.exit(EXIT_CODES.ERROR);
|
|
75
38
|
}
|
|
76
39
|
// Determine paths
|
|
77
|
-
const baselinePath = resolvedBaselinePath
|
|
78
|
-
? resolvedBaselinePath
|
|
79
|
-
: join(outputDir, resolvedBaselinePath);
|
|
40
|
+
const baselinePath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
|
|
80
41
|
const reportPath = options.report || join(outputDir, config.output.files.checkReport);
|
|
81
42
|
// Load the existing baseline
|
|
82
43
|
if (!existsSync(baselinePath)) {
|
|
@@ -96,7 +57,15 @@ export const acceptCommand = new Command('accept')
|
|
|
96
57
|
// Load the current test results
|
|
97
58
|
let result;
|
|
98
59
|
try {
|
|
99
|
-
result =
|
|
60
|
+
result = loadCheckInterviewResult(reportPath, {
|
|
61
|
+
missingReportMessage: 'Run `bellwether check` first to generate a report.\n' +
|
|
62
|
+
'Configure in bellwether.yaml:\n' +
|
|
63
|
+
' output:\n' +
|
|
64
|
+
' format: json # or "both" for JSON + markdown',
|
|
65
|
+
invalidModeMessage: () => `Baseline operations only work with check mode results.\n\n` +
|
|
66
|
+
`The report at ${reportPath} was created with explore mode.\n` +
|
|
67
|
+
'Run `bellwether check` to generate a check mode report first.',
|
|
68
|
+
});
|
|
100
69
|
}
|
|
101
70
|
catch (error) {
|
|
102
71
|
output.error(error instanceof Error ? error.message : String(error));
|
|
@@ -9,61 +9,17 @@
|
|
|
9
9
|
* - accept Accept detected drift as intentional
|
|
10
10
|
*/
|
|
11
11
|
import { Command } from 'commander';
|
|
12
|
-
import { existsSync
|
|
12
|
+
import { existsSync } from 'fs';
|
|
13
13
|
import { join, basename } from 'path';
|
|
14
14
|
import { createBaseline, saveBaseline, loadBaseline, compareBaselines, formatDiffText, formatDiffJson, formatDiffMarkdown, formatDiffCompact, verifyBaselineHash, getBaselineGeneratedAt, getBaselineMode, getBaselineServerCommand, getToolFingerprints, } from '../../baseline/index.js';
|
|
15
15
|
import { BaselineVersionError } from '../../baseline/version.js';
|
|
16
16
|
import { EXIT_CODES, MCP } from '../../constants.js';
|
|
17
17
|
import { getExcludedFeatureNames } from '../../protocol/index.js';
|
|
18
18
|
import { acceptCommand } from './baseline-accept.js';
|
|
19
|
-
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
20
19
|
import * as output from '../output.js';
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
*/
|
|
25
|
-
function loadInterviewResult(reportPath) {
|
|
26
|
-
if (!existsSync(reportPath)) {
|
|
27
|
-
throw new Error(`Test report not found: ${reportPath}\n\n` +
|
|
28
|
-
'Run `bellwether check` first with JSON output enabled.\n' +
|
|
29
|
-
'Configure in bellwether.yaml:\n' +
|
|
30
|
-
' output:\n' +
|
|
31
|
-
' format: json # or "both" for JSON + markdown');
|
|
32
|
-
}
|
|
33
|
-
const content = readFileSync(reportPath, 'utf-8');
|
|
34
|
-
let result;
|
|
35
|
-
try {
|
|
36
|
-
result = JSON.parse(content);
|
|
37
|
-
}
|
|
38
|
-
catch (error) {
|
|
39
|
-
throw new Error(`Invalid JSON in report file ${reportPath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
40
|
-
}
|
|
41
|
-
// Validate that this is a check mode result, not explore
|
|
42
|
-
if (result.metadata.model && result.metadata.model !== 'check') {
|
|
43
|
-
throw new Error(`Baseline operations only work with check mode results.\n\n` +
|
|
44
|
-
`The report at ${reportPath} was created with explore mode (model: ${result.metadata.model}).\n` +
|
|
45
|
-
`Explore results are for documentation only and cannot be used for baselines.\n\n` +
|
|
46
|
-
'To create a baseline:\n' +
|
|
47
|
-
' 1. Run `bellwether check` to generate a check mode report\n' +
|
|
48
|
-
' 2. Run `bellwether baseline save` to create the baseline');
|
|
49
|
-
}
|
|
50
|
-
return result;
|
|
51
|
-
}
|
|
52
|
-
/**
|
|
53
|
-
* Get the output directory from config or use current directory.
|
|
54
|
-
*/
|
|
55
|
-
function loadConfigOrExit(configPath) {
|
|
56
|
-
try {
|
|
57
|
-
return loadConfig(configPath);
|
|
58
|
-
}
|
|
59
|
-
catch (error) {
|
|
60
|
-
if (error instanceof ConfigNotFoundError) {
|
|
61
|
-
output.error(error.message);
|
|
62
|
-
process.exit(EXIT_CODES.ERROR);
|
|
63
|
-
}
|
|
64
|
-
throw error;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
20
|
+
import { loadConfigOrExit } from '../utils/config-loader.js';
|
|
21
|
+
import { loadCheckInterviewResult } from '../utils/report-loader.js';
|
|
22
|
+
import { resolvePathFromOutputDir, resolvePathFromOutputDirOrCwd, } from '../utils/path-resolution.js';
|
|
67
23
|
export const baselineCommand = new Command('baseline')
|
|
68
24
|
.description('Manage baselines for drift detection')
|
|
69
25
|
.addHelpText('after', `
|
|
@@ -99,16 +55,21 @@ baselineCommand
|
|
|
99
55
|
// Load interview result
|
|
100
56
|
let result;
|
|
101
57
|
try {
|
|
102
|
-
result =
|
|
58
|
+
result = loadCheckInterviewResult(reportPath, {
|
|
59
|
+
invalidModeMessage: (model) => `Baseline operations only work with check mode results.\n\n` +
|
|
60
|
+
`The report at ${reportPath} was created with explore mode (model: ${model}).\n` +
|
|
61
|
+
`Explore results are for documentation only and cannot be used for baselines.\n\n` +
|
|
62
|
+
'To create a baseline:\n' +
|
|
63
|
+
' 1. Run `bellwether check` to generate a check mode report\n' +
|
|
64
|
+
' 2. Run `bellwether baseline save` to create the baseline',
|
|
65
|
+
});
|
|
103
66
|
}
|
|
104
67
|
catch (error) {
|
|
105
68
|
output.error(error instanceof Error ? error.message : String(error));
|
|
106
69
|
process.exit(EXIT_CODES.ERROR);
|
|
107
70
|
}
|
|
108
71
|
// Determine baseline path (relative to output dir if not absolute)
|
|
109
|
-
const finalPath = resolvedBaselinePath
|
|
110
|
-
? resolvedBaselinePath
|
|
111
|
-
: join(outputDir, resolvedBaselinePath);
|
|
72
|
+
const finalPath = resolvePathFromOutputDir(resolvedBaselinePath, outputDir);
|
|
112
73
|
// Check for existing baseline
|
|
113
74
|
if (existsSync(finalPath) && !options.force) {
|
|
114
75
|
output.error(`Baseline already exists: ${finalPath}`);
|
|
@@ -147,28 +108,7 @@ baselineCommand
|
|
|
147
108
|
output.error('No baseline path provided. Set baseline.path or baseline.comparePath in config, or pass a path argument.');
|
|
148
109
|
process.exit(EXIT_CODES.ERROR);
|
|
149
110
|
}
|
|
150
|
-
|
|
151
|
-
// 1. If absolute path, use as-is
|
|
152
|
-
// 2. First try relative to outputDir (e.g., .bellwether/)
|
|
153
|
-
// 3. Fall back to relative to cwd
|
|
154
|
-
let fullBaselinePath;
|
|
155
|
-
if (resolvedBaselinePath.startsWith('/')) {
|
|
156
|
-
fullBaselinePath = resolvedBaselinePath;
|
|
157
|
-
}
|
|
158
|
-
else {
|
|
159
|
-
const outputDirPath = join(outputDir, resolvedBaselinePath);
|
|
160
|
-
const cwdPath = join(process.cwd(), resolvedBaselinePath);
|
|
161
|
-
if (existsSync(outputDirPath)) {
|
|
162
|
-
fullBaselinePath = outputDirPath;
|
|
163
|
-
}
|
|
164
|
-
else if (existsSync(cwdPath)) {
|
|
165
|
-
fullBaselinePath = cwdPath;
|
|
166
|
-
}
|
|
167
|
-
else {
|
|
168
|
-
// Default to outputDir path for error message consistency
|
|
169
|
-
fullBaselinePath = outputDirPath;
|
|
170
|
-
}
|
|
171
|
-
}
|
|
111
|
+
const fullBaselinePath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
|
|
172
112
|
if (!existsSync(fullBaselinePath)) {
|
|
173
113
|
output.error(`Baseline not found: ${fullBaselinePath}`);
|
|
174
114
|
output.error('\nRun `bellwether baseline save` to create a baseline.');
|
|
@@ -186,7 +126,14 @@ baselineCommand
|
|
|
186
126
|
const reportPath = options.report || join(outputDir, config.output.files.checkReport);
|
|
187
127
|
let result;
|
|
188
128
|
try {
|
|
189
|
-
result =
|
|
129
|
+
result = loadCheckInterviewResult(reportPath, {
|
|
130
|
+
invalidModeMessage: (model) => `Baseline operations only work with check mode results.\n\n` +
|
|
131
|
+
`The report at ${reportPath} was created with explore mode (model: ${model}).\n` +
|
|
132
|
+
`Explore results are for documentation only and cannot be used for baselines.\n\n` +
|
|
133
|
+
'To create a baseline:\n' +
|
|
134
|
+
' 1. Run `bellwether check` to generate a check mode report\n' +
|
|
135
|
+
' 2. Run `bellwether baseline save` to create the baseline',
|
|
136
|
+
});
|
|
190
137
|
}
|
|
191
138
|
catch (error) {
|
|
192
139
|
output.error(error instanceof Error ? error.message : String(error));
|
|
@@ -285,9 +232,7 @@ baselineCommand
|
|
|
285
232
|
process.exit(EXIT_CODES.ERROR);
|
|
286
233
|
}
|
|
287
234
|
// Determine full path
|
|
288
|
-
const fullPath = resolvedBaselinePath
|
|
289
|
-
? resolvedBaselinePath
|
|
290
|
-
: join(outputDir, resolvedBaselinePath);
|
|
235
|
+
const fullPath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
|
|
291
236
|
if (!existsSync(fullPath)) {
|
|
292
237
|
output.error(`Baseline not found: ${fullPath}`);
|
|
293
238
|
output.error('\nRun `bellwether baseline save` to create a baseline.');
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { type BehavioralDiff, type BehavioralBaseline } from '../../baseline/index.js';
|
|
2
|
+
/**
|
|
3
|
+
* Format a diff using the requested output format.
|
|
4
|
+
*/
|
|
5
|
+
export declare function formatDiffOutput(diff: BehavioralDiff, format: string, baselinePath: string): string;
|
|
6
|
+
/**
|
|
7
|
+
* Format check-only output when no baseline is provided.
|
|
8
|
+
*/
|
|
9
|
+
export declare function formatCheckResults(baseline: BehavioralBaseline, format: string): string | null;
|
|
10
|
+
//# sourceMappingURL=check-formatters.d.ts.map
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, getToolFingerprints, } from '../../baseline/index.js';
|
|
2
|
+
/**
|
|
3
|
+
* Format a diff using the requested output format.
|
|
4
|
+
*/
|
|
5
|
+
export function formatDiffOutput(diff, format, baselinePath) {
|
|
6
|
+
switch (format.toLowerCase()) {
|
|
7
|
+
case 'json':
|
|
8
|
+
return formatDiffJson(diff);
|
|
9
|
+
case 'compact':
|
|
10
|
+
return formatDiffCompact(diff);
|
|
11
|
+
case 'github':
|
|
12
|
+
return formatDiffGitHubActions(diff);
|
|
13
|
+
case 'markdown':
|
|
14
|
+
case 'md':
|
|
15
|
+
return formatDiffMarkdown(diff);
|
|
16
|
+
case 'junit':
|
|
17
|
+
case 'junit-xml':
|
|
18
|
+
case 'xml':
|
|
19
|
+
return formatDiffJUnit(diff, 'bellwether-check');
|
|
20
|
+
case 'sarif':
|
|
21
|
+
return formatDiffSarif(diff, baselinePath);
|
|
22
|
+
case 'text':
|
|
23
|
+
default:
|
|
24
|
+
return formatDiffText(diff);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Format check-only output when no baseline is provided.
|
|
29
|
+
*/
|
|
30
|
+
export function formatCheckResults(baseline, format) {
|
|
31
|
+
switch (format.toLowerCase()) {
|
|
32
|
+
case 'junit':
|
|
33
|
+
case 'junit-xml':
|
|
34
|
+
case 'xml':
|
|
35
|
+
return formatCheckResultsJUnit(baseline);
|
|
36
|
+
case 'sarif':
|
|
37
|
+
return formatCheckResultsSarif(baseline);
|
|
38
|
+
default:
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function formatCheckResultsJUnit(baseline) {
|
|
43
|
+
const tools = getToolFingerprints(baseline);
|
|
44
|
+
const lines = [];
|
|
45
|
+
const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
|
|
46
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
47
|
+
lines.push('<testsuites>');
|
|
48
|
+
lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
|
|
49
|
+
for (const tool of tools) {
|
|
50
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
51
|
+
const status = successRate >= 0.9 ? 'passed' : 'warning';
|
|
52
|
+
lines.push(` <testcase name="${tool.name}" classname="mcp-tools" time="0">`);
|
|
53
|
+
lines.push(` <system-out>Success rate: ${(successRate * 100).toFixed(0)}%</system-out>`);
|
|
54
|
+
if (status === 'warning') {
|
|
55
|
+
lines.push(' <system-err>Tool has success rate below 90%</system-err>');
|
|
56
|
+
}
|
|
57
|
+
lines.push(' </testcase>');
|
|
58
|
+
}
|
|
59
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
60
|
+
if (securityTools.length > 0) {
|
|
61
|
+
lines.push(' <!-- Security findings -->');
|
|
62
|
+
for (const tool of securityTools) {
|
|
63
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
64
|
+
const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
|
|
65
|
+
if (criticalHigh > 0) {
|
|
66
|
+
lines.push(` <testcase name="${tool.name}-security" classname="security">`);
|
|
67
|
+
lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
|
|
68
|
+
for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
|
|
69
|
+
lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
|
|
70
|
+
}
|
|
71
|
+
lines.push(' </failure>');
|
|
72
|
+
lines.push(' </testcase>');
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
lines.push(' </testsuite>');
|
|
77
|
+
lines.push('</testsuites>');
|
|
78
|
+
return lines.join('\n');
|
|
79
|
+
}
|
|
80
|
+
function formatCheckResultsSarif(baseline) {
|
|
81
|
+
const tools = getToolFingerprints(baseline);
|
|
82
|
+
const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
|
|
83
|
+
const results = [];
|
|
84
|
+
const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
|
|
85
|
+
for (const tool of securityTools) {
|
|
86
|
+
const findings = tool.securityFingerprint?.findings ?? [];
|
|
87
|
+
for (const finding of findings) {
|
|
88
|
+
const level = finding.riskLevel === 'critical' || finding.riskLevel === 'high'
|
|
89
|
+
? 'error'
|
|
90
|
+
: finding.riskLevel === 'medium'
|
|
91
|
+
? 'warning'
|
|
92
|
+
: 'note';
|
|
93
|
+
results.push({
|
|
94
|
+
ruleId: finding.cweId || 'BWH-SEC',
|
|
95
|
+
level,
|
|
96
|
+
message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
|
|
97
|
+
locations: [
|
|
98
|
+
{
|
|
99
|
+
physicalLocation: {
|
|
100
|
+
artifactLocation: { uri: serverUri },
|
|
101
|
+
region: { startLine: 1 },
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
for (const tool of tools) {
|
|
109
|
+
const successRate = tool.baselineSuccessRate ?? 1;
|
|
110
|
+
if (successRate < 0.9) {
|
|
111
|
+
results.push({
|
|
112
|
+
ruleId: 'BWH-REL',
|
|
113
|
+
level: 'warning',
|
|
114
|
+
message: {
|
|
115
|
+
text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
|
|
116
|
+
},
|
|
117
|
+
locations: [
|
|
118
|
+
{
|
|
119
|
+
physicalLocation: {
|
|
120
|
+
artifactLocation: { uri: serverUri },
|
|
121
|
+
region: { startLine: 1 },
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
],
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const sarif = {
|
|
129
|
+
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
130
|
+
version: '2.1.0',
|
|
131
|
+
runs: [
|
|
132
|
+
{
|
|
133
|
+
tool: {
|
|
134
|
+
driver: {
|
|
135
|
+
name: 'bellwether',
|
|
136
|
+
version: '1.0.0',
|
|
137
|
+
informationUri: 'https://github.com/dotsetlabs/bellwether',
|
|
138
|
+
rules: [
|
|
139
|
+
{
|
|
140
|
+
id: 'BWH-SEC',
|
|
141
|
+
name: 'SecurityFinding',
|
|
142
|
+
shortDescription: { text: 'Security vulnerability detected' },
|
|
143
|
+
defaultConfiguration: { level: 'warning' },
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
id: 'BWH-REL',
|
|
147
|
+
name: 'LowReliability',
|
|
148
|
+
shortDescription: { text: 'Tool reliability below threshold' },
|
|
149
|
+
defaultConfiguration: { level: 'warning' },
|
|
150
|
+
},
|
|
151
|
+
],
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
results,
|
|
155
|
+
},
|
|
156
|
+
],
|
|
157
|
+
};
|
|
158
|
+
return JSON.stringify(sarif, null, 2);
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=check-formatters.js.map
|