@dotsetlabs/bellwether 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +2 -2
  3. package/dist/baseline/golden-output.d.ts +0 -4
  4. package/dist/baseline/golden-output.js +2 -47
  5. package/dist/cli/commands/baseline-accept.js +14 -45
  6. package/dist/cli/commands/baseline.js +23 -78
  7. package/dist/cli/commands/check-formatters.d.ts +10 -0
  8. package/dist/cli/commands/check-formatters.js +160 -0
  9. package/dist/cli/commands/check.js +33 -241
  10. package/dist/cli/commands/contract.js +1 -13
  11. package/dist/cli/commands/explore.js +19 -66
  12. package/dist/cli/commands/watch.js +2 -3
  13. package/dist/cli/output.d.ts +0 -42
  14. package/dist/cli/output.js +73 -110
  15. package/dist/cli/utils/config-loader.d.ts +6 -0
  16. package/dist/cli/utils/config-loader.js +19 -0
  17. package/dist/cli/utils/error-hints.d.ts +9 -0
  18. package/dist/cli/utils/error-hints.js +128 -0
  19. package/dist/cli/utils/headers.js +2 -25
  20. package/dist/cli/utils/path-resolution.d.ts +10 -0
  21. package/dist/cli/utils/path-resolution.js +27 -0
  22. package/dist/cli/utils/report-loader.d.ts +9 -0
  23. package/dist/cli/utils/report-loader.js +31 -0
  24. package/dist/cli/utils/server-runtime.d.ts +16 -0
  25. package/dist/cli/utils/server-runtime.js +31 -0
  26. package/dist/config/defaults.d.ts +0 -1
  27. package/dist/config/defaults.js +0 -1
  28. package/dist/constants/core.d.ts +0 -42
  29. package/dist/constants/core.js +0 -50
  30. package/dist/contract/validator.js +2 -47
  31. package/dist/interview/question-category.d.ts +5 -0
  32. package/dist/interview/question-category.js +2 -0
  33. package/dist/interview/question-types.d.ts +80 -0
  34. package/dist/interview/question-types.js +2 -0
  35. package/dist/interview/schema-test-generator.d.ts +3 -29
  36. package/dist/interview/schema-test-generator.js +11 -286
  37. package/dist/interview/test-fixtures.d.ts +19 -0
  38. package/dist/interview/test-fixtures.js +2 -0
  39. package/dist/interview/types.d.ts +5 -80
  40. package/dist/persona/types.d.ts +3 -5
  41. package/dist/scenarios/types.d.ts +1 -1
  42. package/dist/transport/auth-errors.d.ts +15 -0
  43. package/dist/transport/auth-errors.js +22 -0
  44. package/dist/transport/http-transport.js +7 -9
  45. package/dist/transport/mcp-client.d.ts +0 -4
  46. package/dist/transport/mcp-client.js +13 -37
  47. package/dist/transport/sse-transport.d.ts +0 -1
  48. package/dist/transport/sse-transport.js +13 -28
  49. package/dist/utils/content-type.d.ts +14 -0
  50. package/dist/utils/content-type.js +37 -0
  51. package/dist/utils/http-headers.d.ts +9 -0
  52. package/dist/utils/http-headers.js +34 -0
  53. package/dist/utils/smart-truncate.js +2 -23
  54. package/package.json +2 -2
package/CHANGELOG.md CHANGED
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.1.3] - 2026-02-18
11
+
12
+ ### Added
13
+
14
+ - **Check reporting utilities for CI**: Added dedicated check output formatting plus new GitHub Action scripts for metrics extraction and JSON-to-JUnit/SARIF conversion.
15
+ - **Shared CLI/runtime helper modules**: Added reusable helpers for server runtime setup, config/report loading, path resolution, and improved contextual error hints.
16
+ - **Docs generation bootstrap script**: Added `scripts/build-docs.mjs` so docs generation can self-bootstrap website dependencies.
17
+
18
+ ### Changed
19
+
20
+ - **Modular check and baseline command flows**: Refactored `check`/`baseline` internals to reduce coupling and make command behavior easier to maintain.
21
+ - **Transport/interview utility refactors**: Consolidated header/content-type/auth handling and split interview/question fixture types into clearer shared modules.
22
+ - **Documentation updates for current CLI behavior**: Refreshed docs and sidebar navigation for revised `check`/`baseline` behavior and the `validate-config` command.
23
+
24
+ ### Removed
25
+
26
+ - **Unused core constants module**: Removed `src/constants/core.ts` after internal CLI/runtime simplification.
27
+
10
28
  ## [2.1.2] - 2026-02-16
11
29
 
12
30
  ### Added
package/README.md CHANGED
@@ -132,9 +132,9 @@ Comparisons are **protocol-version-aware** — version-specific fields (annotati
132
132
  ## GitHub Action
133
133
 
134
134
  ```yaml
135
- - uses: dotsetlabs/bellwether@v2.1.2
135
+ - uses: dotsetlabs/bellwether@v2.1.3
136
136
  with:
137
- version: '2.1.2'
137
+ version: '2.1.3'
138
138
  server-command: 'npx @mcp/your-server'
139
139
  baseline-path: './bellwether-baseline.json'
140
140
  fail-on-severity: 'warning'
@@ -155,8 +155,4 @@ export declare function deleteGoldenOutput(toolName: string, storePath: string,
155
155
  * Compare current output against a golden output.
156
156
  */
157
157
  export declare function compareWithGolden(golden: GoldenOutput, currentResponse: MCPToolCallResult): GoldenComparisonResult;
158
- /**
159
- * Compare all golden outputs against current tool responses.
160
- */
161
- export declare function compareAllGoldens(storePath: string, getToolResponse: (toolName: string, args: Record<string, unknown>) => Promise<MCPToolCallResult>): Promise<GoldenComparisonResult[]>;
162
158
  //# sourceMappingURL=golden-output.d.ts.map
@@ -9,6 +9,7 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
9
9
  import { join, dirname } from 'path';
10
10
  import { createHash } from 'crypto';
11
11
  import { PATHS } from '../constants.js';
12
+ import { detectContentType as detectCommonContentType } from '../utils/content-type.js';
12
13
  // Constants
13
14
  const GOLDEN_STORE_VERSION = 1;
14
15
  const DEFAULT_GOLDEN_DIR = '.bellwether/golden';
@@ -236,58 +237,12 @@ export function compareWithGolden(golden, currentResponse) {
236
237
  summary: generateComparisonSummary(disallowedDiffs, mode),
237
238
  };
238
239
  }
239
- /**
240
- * Compare all golden outputs against current tool responses.
241
- */
242
- export function compareAllGoldens(storePath, getToolResponse) {
243
- const store = loadGoldenStore(storePath);
244
- return Promise.all(store.outputs.map(async (golden) => {
245
- try {
246
- const response = await getToolResponse(golden.toolName, golden.inputArgs);
247
- return compareWithGolden(golden, response);
248
- }
249
- catch (error) {
250
- return {
251
- toolName: golden.toolName,
252
- passed: false,
253
- severity: 'breaking',
254
- mode: golden.tolerance.mode,
255
- goldenCapturedAt: golden.capturedAt,
256
- differences: [{
257
- type: 'changed',
258
- path: '$',
259
- expected: 'successful response',
260
- actual: `error: ${error instanceof Error ? error.message : String(error)}`,
261
- allowed: false,
262
- description: 'Tool call failed',
263
- }],
264
- summary: `Tool call failed: ${error instanceof Error ? error.message : String(error)}`,
265
- };
266
- }
267
- }));
268
- }
269
240
  // Helper functions
270
241
  /**
271
242
  * Detect content type from raw output.
272
243
  */
273
244
  function detectContentType(raw) {
274
- const trimmed = raw.trim();
275
- // Check for JSON
276
- if ((trimmed.startsWith('{') && trimmed.endsWith('}')) ||
277
- (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
278
- try {
279
- JSON.parse(trimmed);
280
- return 'json';
281
- }
282
- catch {
283
- // Not valid JSON
284
- }
285
- }
286
- // Check for Markdown patterns
287
- if (/^#|^\*{1,3}[^*]|\[.*\]\(.*\)|^```/.test(trimmed)) {
288
- return 'markdown';
289
- }
290
- return 'text';
245
+ return detectCommonContentType(raw);
291
246
  }
292
247
  /**
293
248
  * Compute a hash of content for quick comparison.
@@ -11,51 +11,14 @@
11
11
  * bellwether baseline accept --dry-run # Show what would be accepted
12
12
  */
13
13
  import { Command } from 'commander';
14
- import { existsSync, readFileSync } from 'fs';
14
+ import { existsSync } from 'fs';
15
15
  import { join } from 'path';
16
16
  import { createBaseline, saveBaseline, loadBaseline, compareBaselines, acceptDrift, formatDiffText, } from '../../baseline/index.js';
17
- import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
18
17
  import { EXIT_CODES } from '../../constants.js';
19
18
  import * as output from '../output.js';
20
- function loadConfigOrExit(configPath) {
21
- try {
22
- return loadConfig(configPath);
23
- }
24
- catch (error) {
25
- if (error instanceof ConfigNotFoundError) {
26
- output.error(error.message);
27
- process.exit(EXIT_CODES.ERROR);
28
- }
29
- throw error;
30
- }
31
- }
32
- /**
33
- * Load interview result from JSON report.
34
- */
35
- function loadInterviewResult(reportPath) {
36
- if (!existsSync(reportPath)) {
37
- throw new Error(`Test report not found: ${reportPath}\n\n` +
38
- 'Run `bellwether check` first to generate a report.\n' +
39
- 'Configure in bellwether.yaml:\n' +
40
- ' output:\n' +
41
- ' format: json # or "both" for JSON + markdown');
42
- }
43
- const content = readFileSync(reportPath, 'utf-8');
44
- let result;
45
- try {
46
- result = JSON.parse(content);
47
- }
48
- catch (error) {
49
- throw new Error(`Invalid JSON in report file ${reportPath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
50
- }
51
- // Validate that this is a check mode result
52
- if (result.metadata.model && result.metadata.model !== 'check') {
53
- throw new Error(`Baseline operations only work with check mode results.\n\n` +
54
- `The report at ${reportPath} was created with explore mode.\n` +
55
- 'Run `bellwether check` to generate a check mode report first.');
56
- }
57
- return result;
58
- }
19
+ import { loadConfigOrExit } from '../utils/config-loader.js';
20
+ import { loadCheckInterviewResult } from '../utils/report-loader.js';
21
+ import { resolvePathFromOutputDirOrCwd } from '../utils/path-resolution.js';
59
22
  export const acceptCommand = new Command('accept')
60
23
  .description('Accept detected drift as intentional and update the baseline')
61
24
  .option('-c, --config <path>', 'Path to config file')
@@ -74,9 +37,7 @@ export const acceptCommand = new Command('accept')
74
37
  process.exit(EXIT_CODES.ERROR);
75
38
  }
76
39
  // Determine paths
77
- const baselinePath = resolvedBaselinePath.startsWith('/')
78
- ? resolvedBaselinePath
79
- : join(outputDir, resolvedBaselinePath);
40
+ const baselinePath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
80
41
  const reportPath = options.report || join(outputDir, config.output.files.checkReport);
81
42
  // Load the existing baseline
82
43
  if (!existsSync(baselinePath)) {
@@ -96,7 +57,15 @@ export const acceptCommand = new Command('accept')
96
57
  // Load the current test results
97
58
  let result;
98
59
  try {
99
- result = loadInterviewResult(reportPath);
60
+ result = loadCheckInterviewResult(reportPath, {
61
+ missingReportMessage: 'Run `bellwether check` first to generate a report.\n' +
62
+ 'Configure in bellwether.yaml:\n' +
63
+ ' output:\n' +
64
+ ' format: json # or "both" for JSON + markdown',
65
+ invalidModeMessage: () => `Baseline operations only work with check mode results.\n\n` +
66
+ `The report at ${reportPath} was created with explore mode.\n` +
67
+ 'Run `bellwether check` to generate a check mode report first.',
68
+ });
100
69
  }
101
70
  catch (error) {
102
71
  output.error(error instanceof Error ? error.message : String(error));
@@ -9,61 +9,17 @@
9
9
  * - accept Accept detected drift as intentional
10
10
  */
11
11
  import { Command } from 'commander';
12
- import { existsSync, readFileSync } from 'fs';
12
+ import { existsSync } from 'fs';
13
13
  import { join, basename } from 'path';
14
14
  import { createBaseline, saveBaseline, loadBaseline, compareBaselines, formatDiffText, formatDiffJson, formatDiffMarkdown, formatDiffCompact, verifyBaselineHash, getBaselineGeneratedAt, getBaselineMode, getBaselineServerCommand, getToolFingerprints, } from '../../baseline/index.js';
15
15
  import { BaselineVersionError } from '../../baseline/version.js';
16
16
  import { EXIT_CODES, MCP } from '../../constants.js';
17
17
  import { getExcludedFeatureNames } from '../../protocol/index.js';
18
18
  import { acceptCommand } from './baseline-accept.js';
19
- import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
20
19
  import * as output from '../output.js';
21
- /**
22
- * Load interview result from JSON report.
23
- * Only accepts check mode results - explore results are for documentation only.
24
- */
25
- function loadInterviewResult(reportPath) {
26
- if (!existsSync(reportPath)) {
27
- throw new Error(`Test report not found: ${reportPath}\n\n` +
28
- 'Run `bellwether check` first with JSON output enabled.\n' +
29
- 'Configure in bellwether.yaml:\n' +
30
- ' output:\n' +
31
- ' format: json # or "both" for JSON + markdown');
32
- }
33
- const content = readFileSync(reportPath, 'utf-8');
34
- let result;
35
- try {
36
- result = JSON.parse(content);
37
- }
38
- catch (error) {
39
- throw new Error(`Invalid JSON in report file ${reportPath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
40
- }
41
- // Validate that this is a check mode result, not explore
42
- if (result.metadata.model && result.metadata.model !== 'check') {
43
- throw new Error(`Baseline operations only work with check mode results.\n\n` +
44
- `The report at ${reportPath} was created with explore mode (model: ${result.metadata.model}).\n` +
45
- `Explore results are for documentation only and cannot be used for baselines.\n\n` +
46
- 'To create a baseline:\n' +
47
- ' 1. Run `bellwether check` to generate a check mode report\n' +
48
- ' 2. Run `bellwether baseline save` to create the baseline');
49
- }
50
- return result;
51
- }
52
- /**
53
- * Get the output directory from config or use current directory.
54
- */
55
- function loadConfigOrExit(configPath) {
56
- try {
57
- return loadConfig(configPath);
58
- }
59
- catch (error) {
60
- if (error instanceof ConfigNotFoundError) {
61
- output.error(error.message);
62
- process.exit(EXIT_CODES.ERROR);
63
- }
64
- throw error;
65
- }
66
- }
20
+ import { loadConfigOrExit } from '../utils/config-loader.js';
21
+ import { loadCheckInterviewResult } from '../utils/report-loader.js';
22
+ import { resolvePathFromOutputDir, resolvePathFromOutputDirOrCwd, } from '../utils/path-resolution.js';
67
23
  export const baselineCommand = new Command('baseline')
68
24
  .description('Manage baselines for drift detection')
69
25
  .addHelpText('after', `
@@ -99,16 +55,21 @@ baselineCommand
99
55
  // Load interview result
100
56
  let result;
101
57
  try {
102
- result = loadInterviewResult(reportPath);
58
+ result = loadCheckInterviewResult(reportPath, {
59
+ invalidModeMessage: (model) => `Baseline operations only work with check mode results.\n\n` +
60
+ `The report at ${reportPath} was created with explore mode (model: ${model}).\n` +
61
+ `Explore results are for documentation only and cannot be used for baselines.\n\n` +
62
+ 'To create a baseline:\n' +
63
+ ' 1. Run `bellwether check` to generate a check mode report\n' +
64
+ ' 2. Run `bellwether baseline save` to create the baseline',
65
+ });
103
66
  }
104
67
  catch (error) {
105
68
  output.error(error instanceof Error ? error.message : String(error));
106
69
  process.exit(EXIT_CODES.ERROR);
107
70
  }
108
71
  // Determine baseline path (relative to output dir if not absolute)
109
- const finalPath = resolvedBaselinePath.startsWith('/')
110
- ? resolvedBaselinePath
111
- : join(outputDir, resolvedBaselinePath);
72
+ const finalPath = resolvePathFromOutputDir(resolvedBaselinePath, outputDir);
112
73
  // Check for existing baseline
113
74
  if (existsSync(finalPath) && !options.force) {
114
75
  output.error(`Baseline already exists: ${finalPath}`);
@@ -147,28 +108,7 @@ baselineCommand
147
108
  output.error('No baseline path provided. Set baseline.path or baseline.comparePath in config, or pass a path argument.');
148
109
  process.exit(EXIT_CODES.ERROR);
149
110
  }
150
- // Resolve baseline path consistently with 'show' command:
151
- // 1. If absolute path, use as-is
152
- // 2. First try relative to outputDir (e.g., .bellwether/)
153
- // 3. Fall back to relative to cwd
154
- let fullBaselinePath;
155
- if (resolvedBaselinePath.startsWith('/')) {
156
- fullBaselinePath = resolvedBaselinePath;
157
- }
158
- else {
159
- const outputDirPath = join(outputDir, resolvedBaselinePath);
160
- const cwdPath = join(process.cwd(), resolvedBaselinePath);
161
- if (existsSync(outputDirPath)) {
162
- fullBaselinePath = outputDirPath;
163
- }
164
- else if (existsSync(cwdPath)) {
165
- fullBaselinePath = cwdPath;
166
- }
167
- else {
168
- // Default to outputDir path for error message consistency
169
- fullBaselinePath = outputDirPath;
170
- }
171
- }
111
+ const fullBaselinePath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
172
112
  if (!existsSync(fullBaselinePath)) {
173
113
  output.error(`Baseline not found: ${fullBaselinePath}`);
174
114
  output.error('\nRun `bellwether baseline save` to create a baseline.');
@@ -186,7 +126,14 @@ baselineCommand
186
126
  const reportPath = options.report || join(outputDir, config.output.files.checkReport);
187
127
  let result;
188
128
  try {
189
- result = loadInterviewResult(reportPath);
129
+ result = loadCheckInterviewResult(reportPath, {
130
+ invalidModeMessage: (model) => `Baseline operations only work with check mode results.\n\n` +
131
+ `The report at ${reportPath} was created with explore mode (model: ${model}).\n` +
132
+ `Explore results are for documentation only and cannot be used for baselines.\n\n` +
133
+ 'To create a baseline:\n' +
134
+ ' 1. Run `bellwether check` to generate a check mode report\n' +
135
+ ' 2. Run `bellwether baseline save` to create the baseline',
136
+ });
190
137
  }
191
138
  catch (error) {
192
139
  output.error(error instanceof Error ? error.message : String(error));
@@ -285,9 +232,7 @@ baselineCommand
285
232
  process.exit(EXIT_CODES.ERROR);
286
233
  }
287
234
  // Determine full path
288
- const fullPath = resolvedBaselinePath.startsWith('/')
289
- ? resolvedBaselinePath
290
- : join(outputDir, resolvedBaselinePath);
235
+ const fullPath = resolvePathFromOutputDirOrCwd(resolvedBaselinePath, outputDir);
291
236
  if (!existsSync(fullPath)) {
292
237
  output.error(`Baseline not found: ${fullPath}`);
293
238
  output.error('\nRun `bellwether baseline save` to create a baseline.');
@@ -0,0 +1,10 @@
1
+ import { type BehavioralDiff, type BehavioralBaseline } from '../../baseline/index.js';
2
+ /**
3
+ * Format a diff using the requested output format.
4
+ */
5
+ export declare function formatDiffOutput(diff: BehavioralDiff, format: string, baselinePath: string): string;
6
+ /**
7
+ * Format check-only output when no baseline is provided.
8
+ */
9
+ export declare function formatCheckResults(baseline: BehavioralBaseline, format: string): string | null;
10
+ //# sourceMappingURL=check-formatters.d.ts.map
@@ -0,0 +1,160 @@
1
+ import { formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, getToolFingerprints, } from '../../baseline/index.js';
2
+ /**
3
+ * Format a diff using the requested output format.
4
+ */
5
+ export function formatDiffOutput(diff, format, baselinePath) {
6
+ switch (format.toLowerCase()) {
7
+ case 'json':
8
+ return formatDiffJson(diff);
9
+ case 'compact':
10
+ return formatDiffCompact(diff);
11
+ case 'github':
12
+ return formatDiffGitHubActions(diff);
13
+ case 'markdown':
14
+ case 'md':
15
+ return formatDiffMarkdown(diff);
16
+ case 'junit':
17
+ case 'junit-xml':
18
+ case 'xml':
19
+ return formatDiffJUnit(diff, 'bellwether-check');
20
+ case 'sarif':
21
+ return formatDiffSarif(diff, baselinePath);
22
+ case 'text':
23
+ default:
24
+ return formatDiffText(diff);
25
+ }
26
+ }
27
+ /**
28
+ * Format check-only output when no baseline is provided.
29
+ */
30
+ export function formatCheckResults(baseline, format) {
31
+ switch (format.toLowerCase()) {
32
+ case 'junit':
33
+ case 'junit-xml':
34
+ case 'xml':
35
+ return formatCheckResultsJUnit(baseline);
36
+ case 'sarif':
37
+ return formatCheckResultsSarif(baseline);
38
+ default:
39
+ return null;
40
+ }
41
+ }
42
+ function formatCheckResultsJUnit(baseline) {
43
+ const tools = getToolFingerprints(baseline);
44
+ const lines = [];
45
+ const securityFailures = tools.filter((t) => t.securityFingerprint?.findings?.some((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')).length;
46
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
47
+ lines.push('<testsuites>');
48
+ lines.push(` <testsuite name="bellwether-check" tests="${tools.length}" failures="${securityFailures}" errors="0">`);
49
+ for (const tool of tools) {
50
+ const successRate = tool.baselineSuccessRate ?? 1;
51
+ const status = successRate >= 0.9 ? 'passed' : 'warning';
52
+ lines.push(` <testcase name="${tool.name}" classname="mcp-tools" time="0">`);
53
+ lines.push(` <system-out>Success rate: ${(successRate * 100).toFixed(0)}%</system-out>`);
54
+ if (status === 'warning') {
55
+ lines.push(' <system-err>Tool has success rate below 90%</system-err>');
56
+ }
57
+ lines.push(' </testcase>');
58
+ }
59
+ const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
60
+ if (securityTools.length > 0) {
61
+ lines.push(' <!-- Security findings -->');
62
+ for (const tool of securityTools) {
63
+ const findings = tool.securityFingerprint?.findings ?? [];
64
+ const criticalHigh = findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high').length;
65
+ if (criticalHigh > 0) {
66
+ lines.push(` <testcase name="${tool.name}-security" classname="security">`);
67
+ lines.push(` <failure message="${criticalHigh} critical/high security findings">`);
68
+ for (const finding of findings.filter((f) => f.riskLevel === 'critical' || f.riskLevel === 'high')) {
69
+ lines.push(` ${finding.riskLevel.toUpperCase()}: ${finding.title} (${finding.cweId})`);
70
+ }
71
+ lines.push(' </failure>');
72
+ lines.push(' </testcase>');
73
+ }
74
+ }
75
+ }
76
+ lines.push(' </testsuite>');
77
+ lines.push('</testsuites>');
78
+ return lines.join('\n');
79
+ }
80
+ function formatCheckResultsSarif(baseline) {
81
+ const tools = getToolFingerprints(baseline);
82
+ const serverUri = baseline.metadata?.serverCommand || baseline.server.name || 'mcp-server';
83
+ const results = [];
84
+ const securityTools = tools.filter((t) => t.securityFingerprint?.findings?.length);
85
+ for (const tool of securityTools) {
86
+ const findings = tool.securityFingerprint?.findings ?? [];
87
+ for (const finding of findings) {
88
+ const level = finding.riskLevel === 'critical' || finding.riskLevel === 'high'
89
+ ? 'error'
90
+ : finding.riskLevel === 'medium'
91
+ ? 'warning'
92
+ : 'note';
93
+ results.push({
94
+ ruleId: finding.cweId || 'BWH-SEC',
95
+ level,
96
+ message: { text: `[${tool.name}] ${finding.title}: ${finding.description}` },
97
+ locations: [
98
+ {
99
+ physicalLocation: {
100
+ artifactLocation: { uri: serverUri },
101
+ region: { startLine: 1 },
102
+ },
103
+ },
104
+ ],
105
+ });
106
+ }
107
+ }
108
+ for (const tool of tools) {
109
+ const successRate = tool.baselineSuccessRate ?? 1;
110
+ if (successRate < 0.9) {
111
+ results.push({
112
+ ruleId: 'BWH-REL',
113
+ level: 'warning',
114
+ message: {
115
+ text: `Tool "${tool.name}" has ${(successRate * 100).toFixed(0)}% success rate`,
116
+ },
117
+ locations: [
118
+ {
119
+ physicalLocation: {
120
+ artifactLocation: { uri: serverUri },
121
+ region: { startLine: 1 },
122
+ },
123
+ },
124
+ ],
125
+ });
126
+ }
127
+ }
128
+ const sarif = {
129
+ $schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
130
+ version: '2.1.0',
131
+ runs: [
132
+ {
133
+ tool: {
134
+ driver: {
135
+ name: 'bellwether',
136
+ version: '1.0.0',
137
+ informationUri: 'https://github.com/dotsetlabs/bellwether',
138
+ rules: [
139
+ {
140
+ id: 'BWH-SEC',
141
+ name: 'SecurityFinding',
142
+ shortDescription: { text: 'Security vulnerability detected' },
143
+ defaultConfiguration: { level: 'warning' },
144
+ },
145
+ {
146
+ id: 'BWH-REL',
147
+ name: 'LowReliability',
148
+ shortDescription: { text: 'Tool reliability below threshold' },
149
+ defaultConfiguration: { level: 'warning' },
150
+ },
151
+ ],
152
+ },
153
+ },
154
+ results,
155
+ },
156
+ ],
157
+ };
158
+ return JSON.stringify(sarif, null, 2);
159
+ }
160
+ //# sourceMappingURL=check-formatters.js.map