@dotsetlabs/bellwether 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/baseline/dependency-analyzer.js +46 -25
- package/dist/baseline/diff.js +51 -39
- package/dist/baseline/documentation-scorer.d.ts +1 -1
- package/dist/baseline/documentation-scorer.js +4 -4
- package/dist/baseline/error-analyzer.js +1 -1
- package/dist/baseline/external-dependency-detector.js +16 -7
- package/dist/baseline/performance-tracker.js +2 -2
- package/dist/baseline/response-schema-tracker.js +17 -22
- package/dist/cli/commands/auth.js +15 -18
- package/dist/cli/commands/baseline-accept.js +1 -1
- package/dist/cli/commands/baseline.js +1 -1
- package/dist/cli/commands/check.js +6 -5
- package/dist/cli/commands/discover.js +2 -2
- package/dist/cli/commands/explore.js +2 -2
- package/dist/cli/commands/golden.js +20 -23
- package/dist/cli/commands/registry.js +37 -35
- package/dist/cli/output/terminal-reporter.js +9 -9
- package/dist/cli/output.d.ts +1 -1
- package/dist/cli/output.js +9 -11
- package/dist/config/validator.d.ts +33 -33
- package/dist/constants/core.d.ts +3 -7
- package/dist/constants/core.js +3 -7
- package/dist/constants/testing.d.ts +11 -11
- package/dist/constants/testing.js +11 -11
- package/dist/contract/validator.js +7 -7
- package/dist/docs/agents.js +7 -7
- package/dist/docs/contract.js +73 -39
- package/dist/interview/dependency-resolver.d.ts +3 -2
- package/dist/interview/dependency-resolver.js +31 -2
- package/dist/interview/interviewer.js +10 -2
- package/dist/interview/stateful-test-runner.d.ts +1 -0
- package/dist/interview/stateful-test-runner.js +4 -0
- package/dist/interview/types.d.ts +3 -0
- package/dist/prompts/templates.js +30 -15
- package/dist/scenarios/evaluator.js +9 -10
- package/dist/version.js +1 -1
- package/man/bellwether.1 +1 -1
- package/man/bellwether.1.md +2 -2
- package/package.json +2 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [2.1.0] - 2026-02-11
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- **Remove all emoji from CLI and documentation output**: Replaced ~40 unique emoji characters across 35+ files with professional text-based alternatives. Terminal output now uses `[PASS]`/`[FAIL]`/`[WARN]`/`[INFO]` labels; markdown reports use plain-text severity badges (`CRITICAL`, `HIGH`, `MEDIUM`, `LOW`, `OK`); trend indicators use `Improved`/`Degraded`/`Stable`/`New`/`Resolved`. Improves accessibility, log-friendliness, and CI compatibility.
|
|
15
|
+
- **Annotation-aware tool ordering**: `getDependencyOrder()` now sorts readOnly tools first and destructive tools last within each dependency layer, producing safer execution sequences.
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- **Test fixtures configuration**: New `testFixtures` option on `InterviewConfig` allows overriding default parameter values for schema-generated tests. Stateful test runner respects fixture keys and will not overwrite user-provided values.
|
|
20
|
+
|
|
10
21
|
## [2.0.1] - 2026-02-07
|
|
11
22
|
|
|
12
23
|
### Added
|
|
@@ -14,19 +14,31 @@ import { mermaidLabel } from '../utils/index.js';
|
|
|
14
14
|
// Common patterns for tool relationships
|
|
15
15
|
const DEPENDENCY_PATTERNS = [
|
|
16
16
|
// "requires output from X" / "requires X"
|
|
17
|
-
{
|
|
17
|
+
{
|
|
18
|
+
pattern: /requires?\s+(?:output\s+from\s+)?['"`]?(\w+)['"`]?/gi,
|
|
19
|
+
type: 'mention',
|
|
20
|
+
},
|
|
18
21
|
// "after calling X" / "after X"
|
|
19
22
|
{ pattern: /after\s+(?:calling\s+)?['"`]?(\w+)['"`]?/gi, type: 'sequence' },
|
|
20
23
|
// "use result from X" / "use output of X"
|
|
21
|
-
{
|
|
24
|
+
{
|
|
25
|
+
pattern: /use\s+(?:the\s+)?(?:result|output)\s+(?:of|from)\s+['"`]?(\w+)['"`]?/gi,
|
|
26
|
+
type: 'output_input',
|
|
27
|
+
},
|
|
22
28
|
// "first call X" / "call X first"
|
|
23
|
-
{
|
|
29
|
+
{
|
|
30
|
+
pattern: /(?:first\s+call|call\s+first)\s+['"`]?(\w+)['"`]?/gi,
|
|
31
|
+
type: 'sequence',
|
|
32
|
+
},
|
|
24
33
|
// "chain with X" / "chains to X"
|
|
25
34
|
{ pattern: /chains?\s+(?:with|to)\s+['"`]?(\w+)['"`]?/gi, type: 'sequence' },
|
|
26
35
|
// "needs X" / "need X to"
|
|
27
36
|
{ pattern: /needs?\s+['"`]?(\w+)['"`]?\s+(?:to|first)?/gi, type: 'mention' },
|
|
28
37
|
// "X returns ... which is used by"
|
|
29
|
-
{
|
|
38
|
+
{
|
|
39
|
+
pattern: /['"`]?(\w+)['"`]?\s+returns?.*which\s+is\s+used/gi,
|
|
40
|
+
type: 'output_input',
|
|
41
|
+
},
|
|
30
42
|
];
|
|
31
43
|
// Common ID/token parameter patterns that suggest dependencies
|
|
32
44
|
const ID_PARAMETER_PATTERNS = [
|
|
@@ -38,8 +50,17 @@ const ID_PARAMETER_PATTERNS = [
|
|
|
38
50
|
// Common output field names that create resources
|
|
39
51
|
// Note: Reserved for future enhanced dependency detection
|
|
40
52
|
const _RESOURCE_OUTPUT_PATTERNS = [
|
|
41
|
-
'id',
|
|
42
|
-
'
|
|
53
|
+
'id',
|
|
54
|
+
'item_id',
|
|
55
|
+
'account_id',
|
|
56
|
+
'user_id',
|
|
57
|
+
'token',
|
|
58
|
+
'link_token',
|
|
59
|
+
'access_token',
|
|
60
|
+
'session_id',
|
|
61
|
+
'path',
|
|
62
|
+
'file_path',
|
|
63
|
+
'resource_id',
|
|
43
64
|
];
|
|
44
65
|
void _RESOURCE_OUTPUT_PATTERNS;
|
|
45
66
|
/**
|
|
@@ -47,8 +68,8 @@ void _RESOURCE_OUTPUT_PATTERNS;
|
|
|
47
68
|
*/
|
|
48
69
|
export function analyzeDependencies(tools) {
|
|
49
70
|
const edges = [];
|
|
50
|
-
const toolNames = new Set(tools.map(t => t.name.toLowerCase()));
|
|
51
|
-
const toolMap = new Map(tools.map(t => [t.name.toLowerCase(), t]));
|
|
71
|
+
const toolNames = new Set(tools.map((t) => t.name.toLowerCase()));
|
|
72
|
+
const toolMap = new Map(tools.map((t) => [t.name.toLowerCase(), t]));
|
|
52
73
|
for (const tool of tools) {
|
|
53
74
|
// Strategy 1: Description analysis
|
|
54
75
|
const mentionEdges = extractToolMentions(tool, tools, toolNames);
|
|
@@ -102,7 +123,7 @@ function extractToolMentions(tool, allTools, toolNames) {
|
|
|
102
123
|
for (const variant of nameVariants) {
|
|
103
124
|
if (description.toLowerCase().includes(variant.toLowerCase())) {
|
|
104
125
|
// Check if already found via pattern
|
|
105
|
-
const existingEdge = edges.find(e => e.from === otherTool.name.toLowerCase() || e.from === otherTool.name);
|
|
126
|
+
const existingEdge = edges.find((e) => e.from === otherTool.name.toLowerCase() || e.from === otherTool.name);
|
|
106
127
|
if (!existingEdge) {
|
|
107
128
|
edges.push({
|
|
108
129
|
from: otherTool.name,
|
|
@@ -177,8 +198,10 @@ function findResourceReferences(tool, allTools, _toolMap) {
|
|
|
177
198
|
const otherName = otherTool.name.toLowerCase();
|
|
178
199
|
// create_X -> [get|list|update|delete]_X dependency
|
|
179
200
|
if (otherName.includes(resourceType) &&
|
|
180
|
-
(otherName.includes('create') ||
|
|
181
|
-
otherName.includes('
|
|
201
|
+
(otherName.includes('create') ||
|
|
202
|
+
otherName.includes('add') ||
|
|
203
|
+
otherName.includes('link') ||
|
|
204
|
+
otherName.includes('exchange'))) {
|
|
182
205
|
edges.push({
|
|
183
206
|
from: otherTool.name,
|
|
184
207
|
to: tool.name,
|
|
@@ -255,7 +278,7 @@ function deduplicateEdges(edges) {
|
|
|
255
278
|
* Build the full dependency graph from edges.
|
|
256
279
|
*/
|
|
257
280
|
function buildGraph(edges, tools) {
|
|
258
|
-
const toolNames = tools.map(t => t.name);
|
|
281
|
+
const toolNames = tools.map((t) => t.name);
|
|
259
282
|
// Build adjacency lists
|
|
260
283
|
const dependsOn = new Map();
|
|
261
284
|
const dependedBy = new Map();
|
|
@@ -268,9 +291,9 @@ function buildGraph(edges, tools) {
|
|
|
268
291
|
dependedBy.get(edge.from)?.add(edge.to);
|
|
269
292
|
}
|
|
270
293
|
// Find entry points (no dependencies)
|
|
271
|
-
const entryPoints = toolNames.filter(name => (dependsOn.get(name)?.size ?? 0) === 0);
|
|
294
|
+
const entryPoints = toolNames.filter((name) => (dependsOn.get(name)?.size ?? 0) === 0);
|
|
272
295
|
// Find terminal points (no dependents)
|
|
273
|
-
const terminalPoints = toolNames.filter(name => (dependedBy.get(name)?.size ?? 0) === 0);
|
|
296
|
+
const terminalPoints = toolNames.filter((name) => (dependedBy.get(name)?.size ?? 0) === 0);
|
|
274
297
|
// Build layers using topological sort
|
|
275
298
|
const layers = topologicalLayers(toolNames, dependsOn);
|
|
276
299
|
// Detect cycles
|
|
@@ -319,7 +342,7 @@ function topologicalLayers(toolNames, dependsOn) {
|
|
|
319
342
|
// Group by depth
|
|
320
343
|
const maxDepth = Math.max(...Array.from(depth.values()));
|
|
321
344
|
for (let d = 0; d <= maxDepth; d++) {
|
|
322
|
-
const layerTools = toolNames.filter(name => depth.get(name) === d);
|
|
345
|
+
const layerTools = toolNames.filter((name) => depth.get(name) === d);
|
|
323
346
|
if (layerTools.length > 0) {
|
|
324
347
|
layers.push(layerTools);
|
|
325
348
|
}
|
|
@@ -389,10 +412,8 @@ export function calculateDependencyStats(graph) {
|
|
|
389
412
|
.map(([tool, count]) => ({ tool, count }))
|
|
390
413
|
.sort((a, b) => b.count - a.count)
|
|
391
414
|
.slice(0, 5);
|
|
392
|
-
const totalTools = new Set([
|
|
393
|
-
|
|
394
|
-
...graph.edges.map(e => e.to),
|
|
395
|
-
]).size;
|
|
415
|
+
const totalTools = new Set([...graph.edges.map((e) => e.from), ...graph.edges.map((e) => e.to)])
|
|
416
|
+
.size;
|
|
396
417
|
return {
|
|
397
418
|
totalEdges: graph.edges.length,
|
|
398
419
|
byType,
|
|
@@ -408,9 +429,9 @@ export function calculateDependencyStats(graph) {
|
|
|
408
429
|
export function generateDependencyMermaid(graph) {
|
|
409
430
|
const lines = ['graph TD'];
|
|
410
431
|
// Group edges by confidence for styling
|
|
411
|
-
const highConfidence = graph.edges.filter(e => e.confidence >= 0.7);
|
|
412
|
-
const mediumConfidence = graph.edges.filter(e => e.confidence >= 0.5 && e.confidence < 0.7);
|
|
413
|
-
const lowConfidence = graph.edges.filter(e => e.confidence < 0.5);
|
|
432
|
+
const highConfidence = graph.edges.filter((e) => e.confidence >= 0.7);
|
|
433
|
+
const mediumConfidence = graph.edges.filter((e) => e.confidence >= 0.5 && e.confidence < 0.7);
|
|
434
|
+
const lowConfidence = graph.edges.filter((e) => e.confidence < 0.5);
|
|
414
435
|
// Add high confidence edges (solid lines)
|
|
415
436
|
for (const edge of highConfidence) {
|
|
416
437
|
const fromLabel = mermaidLabel(edge.from);
|
|
@@ -538,12 +559,12 @@ export function generateDependencyMarkdown(graph, stats) {
|
|
|
538
559
|
}
|
|
539
560
|
// Cycles warning
|
|
540
561
|
if (graph.cycles.length > 0) {
|
|
541
|
-
lines.push('###
|
|
562
|
+
lines.push('### Circular Dependencies');
|
|
542
563
|
lines.push('');
|
|
543
564
|
lines.push('The following circular dependencies were detected:');
|
|
544
565
|
lines.push('');
|
|
545
566
|
for (const cycle of graph.cycles.slice(0, 5)) {
|
|
546
|
-
lines.push(`- ${cycle.map(t => `\`${t}\``).join(' → ')}`);
|
|
567
|
+
lines.push(`- ${cycle.map((t) => `\`${t}\``).join(' → ')}`);
|
|
547
568
|
}
|
|
548
569
|
lines.push('');
|
|
549
570
|
}
|
|
@@ -555,7 +576,7 @@ export function generateDependencyMarkdown(graph, stats) {
|
|
|
555
576
|
lines.push('');
|
|
556
577
|
for (let i = 0; i < graph.layers.length && i < 5; i++) {
|
|
557
578
|
const layer = graph.layers[i];
|
|
558
|
-
lines.push(`${i + 1}. ${layer.map(t => `\`${t}\``).join(', ')}`);
|
|
579
|
+
lines.push(`${i + 1}. ${layer.map((t) => `\`${t}\``).join(', ')}`);
|
|
559
580
|
}
|
|
560
581
|
if (graph.layers.length > 5) {
|
|
561
582
|
lines.push(`... and ${graph.layers.length - 5} more layers`);
|
package/dist/baseline/diff.js
CHANGED
|
@@ -21,7 +21,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
21
21
|
if (diff.toolsRemoved.length > 0) {
|
|
22
22
|
lines.push(red('─── Tools Removed ───'));
|
|
23
23
|
for (const tool of diff.toolsRemoved) {
|
|
24
|
-
lines.push(` ${red('
|
|
24
|
+
lines.push(` ${red('[FAIL]')} ${tool}`);
|
|
25
25
|
}
|
|
26
26
|
lines.push('');
|
|
27
27
|
}
|
|
@@ -89,7 +89,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
89
89
|
}
|
|
90
90
|
else if (diff.performanceReport?.improvementCount ?? 0 > 0) {
|
|
91
91
|
lines.push(green('─── Performance ───'));
|
|
92
|
-
lines.push(` ${green('
|
|
92
|
+
lines.push(` ${green('[PASS]')} ${diff.performanceReport?.improvementCount} tool(s) improved`);
|
|
93
93
|
lines.push('');
|
|
94
94
|
}
|
|
95
95
|
// Performance confidence changes
|
|
@@ -122,7 +122,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
122
122
|
}
|
|
123
123
|
else if (secReport.resolvedFindings.length > 0) {
|
|
124
124
|
lines.push(green('─── Security Improvements ───'));
|
|
125
|
-
lines.push(` ${green('
|
|
125
|
+
lines.push(` ${green('[PASS]')} ${secReport.resolvedFindings.length} finding(s) resolved`);
|
|
126
126
|
lines.push('');
|
|
127
127
|
}
|
|
128
128
|
// Show risk score change
|
|
@@ -140,7 +140,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
140
140
|
lines.push(` ${formatSchemaEvolutionSummary(schemaReport)}`);
|
|
141
141
|
lines.push('');
|
|
142
142
|
for (const issue of schemaReport.toolsWithIssues) {
|
|
143
|
-
const issueIcon = issue.isBreaking ? red('
|
|
143
|
+
const issueIcon = issue.isBreaking ? red('[FAIL]') : yellow('[WARN]');
|
|
144
144
|
lines.push(` ${issueIcon} ${bold(issue.toolName)}`);
|
|
145
145
|
lines.push(` ${issue.summary}`);
|
|
146
146
|
if (issue.fieldsRemoved.length > 0) {
|
|
@@ -154,7 +154,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
154
154
|
}
|
|
155
155
|
else if (schemaReport.stableCount > 0) {
|
|
156
156
|
lines.push(green('─── Schema Stability ───'));
|
|
157
|
-
lines.push(` ${green('
|
|
157
|
+
lines.push(` ${green('[PASS]')} ${schemaReport.stableCount} tool(s) with stable response schemas`);
|
|
158
158
|
lines.push('');
|
|
159
159
|
}
|
|
160
160
|
}
|
|
@@ -179,7 +179,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
179
179
|
}
|
|
180
180
|
else if (errorReport.trends.length > 0) {
|
|
181
181
|
lines.push(green('─── Error Patterns ───'));
|
|
182
|
-
lines.push(` ${green('
|
|
182
|
+
lines.push(` ${green('[PASS]')} Error patterns stable`);
|
|
183
183
|
lines.push('');
|
|
184
184
|
}
|
|
185
185
|
}
|
|
@@ -201,7 +201,7 @@ export function formatDiffText(diff, useColors = true) {
|
|
|
201
201
|
lines.push(` ${green(indicator)} Score: ${docReport.previousScore} → ${docReport.currentScore} (+${docReport.change})`);
|
|
202
202
|
lines.push(` ${green('Grade:')} ${docReport.previousGrade} → ${docReport.currentGrade}`);
|
|
203
203
|
if (docReport.issuesFixed > 0) {
|
|
204
|
-
lines.push(` ${green('
|
|
204
|
+
lines.push(` ${green('[PASS]')} Issues fixed: ${docReport.issuesFixed}`);
|
|
205
205
|
}
|
|
206
206
|
lines.push('');
|
|
207
207
|
}
|
|
@@ -436,10 +436,10 @@ export function formatDiffMarkdown(diff) {
|
|
|
436
436
|
lines.push('| Tool | Status | Details |');
|
|
437
437
|
lines.push('|------|--------|---------|');
|
|
438
438
|
for (const tool of diff.toolsRemoved) {
|
|
439
|
-
lines.push(`| ${tool} |
|
|
439
|
+
lines.push(`| ${tool} | Removed | Breaking change |`);
|
|
440
440
|
}
|
|
441
441
|
for (const tool of diff.toolsAdded) {
|
|
442
|
-
lines.push(`| ${tool} |
|
|
442
|
+
lines.push(`| ${tool} | Added | New tool |`);
|
|
443
443
|
}
|
|
444
444
|
for (const toolDiff of diff.toolsModified) {
|
|
445
445
|
const details = [
|
|
@@ -449,7 +449,7 @@ export function formatDiffMarkdown(diff) {
|
|
|
449
449
|
]
|
|
450
450
|
.filter(Boolean)
|
|
451
451
|
.join(', ');
|
|
452
|
-
lines.push(`| ${toolDiff.tool} |
|
|
452
|
+
lines.push(`| ${toolDiff.tool} | Modified | ${details} |`);
|
|
453
453
|
}
|
|
454
454
|
lines.push('');
|
|
455
455
|
}
|
|
@@ -459,7 +459,11 @@ export function formatDiffMarkdown(diff) {
|
|
|
459
459
|
lines.push('| Tool | Aspect | Severity | Description |');
|
|
460
460
|
lines.push('|------|--------|----------|-------------|');
|
|
461
461
|
for (const change of diff.behaviorChanges) {
|
|
462
|
-
const sevEmoji = change.severity === 'breaking'
|
|
462
|
+
const sevEmoji = change.severity === 'breaking'
|
|
463
|
+
? 'CRITICAL'
|
|
464
|
+
: change.severity === 'warning'
|
|
465
|
+
? 'WARNING'
|
|
466
|
+
: 'OK';
|
|
463
467
|
lines.push(`| ${change.tool} | ${change.aspect} | ${sevEmoji} ${change.severity} | ${change.description} |`);
|
|
464
468
|
}
|
|
465
469
|
lines.push('');
|
|
@@ -471,7 +475,7 @@ export function formatDiffMarkdown(diff) {
|
|
|
471
475
|
lines.push('### Security');
|
|
472
476
|
lines.push('');
|
|
473
477
|
if (secReport.degraded) {
|
|
474
|
-
lines.push(
|
|
478
|
+
lines.push(`**Security posture degraded**: ${secReport.summary}`);
|
|
475
479
|
lines.push('');
|
|
476
480
|
}
|
|
477
481
|
if (secReport.newFindings.length > 0) {
|
|
@@ -488,7 +492,7 @@ export function formatDiffMarkdown(diff) {
|
|
|
488
492
|
if (secReport.resolvedFindings.length > 0) {
|
|
489
493
|
lines.push('#### Resolved Findings');
|
|
490
494
|
lines.push('');
|
|
491
|
-
lines.push(
|
|
495
|
+
lines.push(`${secReport.resolvedFindings.length} security finding(s) resolved`);
|
|
492
496
|
lines.push('');
|
|
493
497
|
}
|
|
494
498
|
lines.push(`**Risk Score:** ${secReport.previousRiskScore} → ${secReport.currentRiskScore} (${secReport.riskScoreChange >= 0 ? '+' : ''}${secReport.riskScoreChange})`);
|
|
@@ -502,14 +506,18 @@ export function formatDiffMarkdown(diff) {
|
|
|
502
506
|
lines.push('### Schema Evolution');
|
|
503
507
|
lines.push('');
|
|
504
508
|
if (schemaReport.hasBreakingChanges) {
|
|
505
|
-
lines.push('
|
|
509
|
+
lines.push('**Breaking schema changes detected**');
|
|
506
510
|
lines.push('');
|
|
507
511
|
}
|
|
508
512
|
if (schemaReport.toolsWithIssues.length > 0) {
|
|
509
513
|
lines.push('| Tool | Status | Changes |');
|
|
510
514
|
lines.push('|------|--------|---------|');
|
|
511
515
|
for (const issue of schemaReport.toolsWithIssues) {
|
|
512
|
-
const statusIcon = issue.isBreaking
|
|
516
|
+
const statusIcon = issue.isBreaking
|
|
517
|
+
? 'CRITICAL'
|
|
518
|
+
: issue.becameUnstable
|
|
519
|
+
? 'WARNING'
|
|
520
|
+
: 'INFO';
|
|
513
521
|
const status = issue.isBreaking
|
|
514
522
|
? 'Breaking'
|
|
515
523
|
: issue.becameUnstable
|
|
@@ -533,7 +541,7 @@ export function formatDiffMarkdown(diff) {
|
|
|
533
541
|
lines.push('### Error Trends');
|
|
534
542
|
lines.push('');
|
|
535
543
|
if (et.significantChange) {
|
|
536
|
-
lines.push(
|
|
544
|
+
lines.push(`**Error behavior changed**: ${et.summary}`);
|
|
537
545
|
lines.push('');
|
|
538
546
|
}
|
|
539
547
|
if (et.newCategories.length > 0 ||
|
|
@@ -568,14 +576,14 @@ export function formatDiffMarkdown(diff) {
|
|
|
568
576
|
lines.push('|------|----------|---------|--------|------------|');
|
|
569
577
|
for (const regression of perfReport.regressions) {
|
|
570
578
|
const percentStr = (regression.regressionPercent * 100).toFixed(1);
|
|
571
|
-
const confidenceEmoji = regression.isReliable ? '
|
|
579
|
+
const confidenceEmoji = regression.isReliable ? '' : '[low]';
|
|
572
580
|
const confidenceLabel = regression.currentConfidence ?? 'unknown';
|
|
573
581
|
lines.push(`| ${regression.toolName} | ${regression.previousP50Ms.toFixed(0)}ms | ${regression.currentP50Ms.toFixed(0)}ms | +${percentStr}% | ${confidenceEmoji} ${confidenceLabel} |`);
|
|
574
582
|
}
|
|
575
583
|
lines.push('');
|
|
576
584
|
}
|
|
577
585
|
if (perfReport.lowConfidenceTools && perfReport.lowConfidenceTools.length > 0) {
|
|
578
|
-
lines.push(`>
|
|
586
|
+
lines.push(`> **Low confidence metrics**: ${perfReport.lowConfidenceTools.join(', ')}`);
|
|
579
587
|
lines.push('> Consider running with more samples for reliable baselines.');
|
|
580
588
|
lines.push('');
|
|
581
589
|
}
|
|
@@ -585,7 +593,11 @@ export function formatDiffMarkdown(diff) {
|
|
|
585
593
|
lines.push('| Tool | Previous | Current | Status |');
|
|
586
594
|
lines.push('|------|----------|---------|--------|');
|
|
587
595
|
for (const change of perfReport.confidenceChanges) {
|
|
588
|
-
const statusEmoji = change.improved
|
|
596
|
+
const statusEmoji = change.improved
|
|
597
|
+
? 'Improved'
|
|
598
|
+
: change.degraded
|
|
599
|
+
? 'Degraded'
|
|
600
|
+
: 'Stable';
|
|
589
601
|
lines.push(`| ${change.toolName} | ${change.previousLevel ?? 'N/A'} | ${change.currentLevel} | ${statusEmoji} ${change.improved ? 'Improved' : change.degraded ? 'Degraded' : 'Changed'} |`);
|
|
590
602
|
}
|
|
591
603
|
lines.push('');
|
|
@@ -597,7 +609,7 @@ export function formatDiffMarkdown(diff) {
|
|
|
597
609
|
const doc = diff.documentationScoreReport;
|
|
598
610
|
lines.push('### Documentation Quality');
|
|
599
611
|
lines.push('');
|
|
600
|
-
const changeIcon = doc.improved ? '
|
|
612
|
+
const changeIcon = doc.improved ? 'Improved' : doc.degraded ? 'Degraded' : 'Stable';
|
|
601
613
|
const sign = doc.change > 0 ? '+' : '';
|
|
602
614
|
lines.push(`**Score:** ${doc.currentScore}/100 (${doc.currentGrade}) ${changeIcon}`);
|
|
603
615
|
if (doc.change !== 0) {
|
|
@@ -666,15 +678,15 @@ export function formatDiffMarkdown(diff) {
|
|
|
666
678
|
function getTrendEmoji(trend) {
|
|
667
679
|
switch (trend) {
|
|
668
680
|
case 'new':
|
|
669
|
-
return '
|
|
681
|
+
return 'New';
|
|
670
682
|
case 'resolved':
|
|
671
|
-
return '
|
|
683
|
+
return 'Resolved';
|
|
672
684
|
case 'increasing':
|
|
673
|
-
return '
|
|
685
|
+
return 'Improved';
|
|
674
686
|
case 'decreasing':
|
|
675
|
-
return '
|
|
687
|
+
return 'Degraded';
|
|
676
688
|
case 'stable':
|
|
677
|
-
return '
|
|
689
|
+
return 'Stable';
|
|
678
690
|
}
|
|
679
691
|
}
|
|
680
692
|
/**
|
|
@@ -1437,25 +1449,25 @@ function getSeverityBadge(severity, useColors) {
|
|
|
1437
1449
|
const c = useColors ? colors : noColors;
|
|
1438
1450
|
switch (severity) {
|
|
1439
1451
|
case 'none':
|
|
1440
|
-
return c.green('
|
|
1452
|
+
return c.green('[ok] NONE');
|
|
1441
1453
|
case 'info':
|
|
1442
|
-
return c.cyan('
|
|
1454
|
+
return c.cyan('[info] INFO');
|
|
1443
1455
|
case 'warning':
|
|
1444
|
-
return c.yellow('
|
|
1456
|
+
return c.yellow('[warn] WARNING');
|
|
1445
1457
|
case 'breaking':
|
|
1446
|
-
return c.red('
|
|
1458
|
+
return c.red('[break] BREAKING');
|
|
1447
1459
|
}
|
|
1448
1460
|
}
|
|
1449
1461
|
function getSeverityEmoji(severity) {
|
|
1450
1462
|
switch (severity) {
|
|
1451
1463
|
case 'none':
|
|
1452
|
-
return '
|
|
1464
|
+
return 'OK';
|
|
1453
1465
|
case 'info':
|
|
1454
|
-
return '
|
|
1466
|
+
return 'INFO';
|
|
1455
1467
|
case 'warning':
|
|
1456
|
-
return '
|
|
1468
|
+
return 'WARNING';
|
|
1457
1469
|
case 'breaking':
|
|
1458
|
-
return '
|
|
1470
|
+
return 'BREAKING';
|
|
1459
1471
|
}
|
|
1460
1472
|
}
|
|
1461
1473
|
function getChangeIcon(change, useColors) {
|
|
@@ -1517,16 +1529,16 @@ function getRiskLevelColor(riskLevel, useColors) {
|
|
|
1517
1529
|
function getRiskLevelEmoji(riskLevel) {
|
|
1518
1530
|
switch (riskLevel) {
|
|
1519
1531
|
case 'critical':
|
|
1520
|
-
return '
|
|
1532
|
+
return 'CRITICAL';
|
|
1521
1533
|
case 'high':
|
|
1522
|
-
return '
|
|
1534
|
+
return 'HIGH';
|
|
1523
1535
|
case 'medium':
|
|
1524
|
-
return '
|
|
1536
|
+
return 'MEDIUM';
|
|
1525
1537
|
case 'low':
|
|
1526
|
-
return '
|
|
1538
|
+
return 'LOW';
|
|
1527
1539
|
case 'info':
|
|
1528
1540
|
default:
|
|
1529
|
-
return '
|
|
1541
|
+
return 'INFO';
|
|
1530
1542
|
}
|
|
1531
1543
|
}
|
|
1532
1544
|
/**
|
|
@@ -1583,7 +1595,7 @@ export function formatSecurityReport(report, useColors = true) {
|
|
|
1583
1595
|
if (report.resolvedFindings.length > 0) {
|
|
1584
1596
|
lines.push(green('─── Resolved Findings ───'));
|
|
1585
1597
|
for (const finding of report.resolvedFindings) {
|
|
1586
|
-
lines.push(` ${green('
|
|
1598
|
+
lines.push(` ${green('[PASS]')} ${finding.title} (${finding.tool})`);
|
|
1587
1599
|
}
|
|
1588
1600
|
lines.push('');
|
|
1589
1601
|
}
|
|
@@ -187,7 +187,7 @@ export declare function formatDocumentationScoreChange(change: DocumentationScor
|
|
|
187
187
|
*/
|
|
188
188
|
export declare function toDocumentationScoreSummary(score: DocumentationScore): DocumentationScoreSummary;
|
|
189
189
|
/**
|
|
190
|
-
* Get the
|
|
190
|
+
* Get the text indicator for a documentation grade.
|
|
191
191
|
*/
|
|
192
192
|
export declare function getGradeIndicator(grade: DocumentationGrade): string;
|
|
193
193
|
/**
|
|
@@ -415,20 +415,20 @@ function formatIssueType(type) {
|
|
|
415
415
|
.join(' ');
|
|
416
416
|
}
|
|
417
417
|
/**
|
|
418
|
-
* Get the
|
|
418
|
+
* Get the text indicator for a documentation grade.
|
|
419
419
|
*/
|
|
420
420
|
export function getGradeIndicator(grade) {
|
|
421
421
|
switch (grade) {
|
|
422
422
|
case 'A':
|
|
423
|
-
return '
|
|
423
|
+
return '+';
|
|
424
424
|
case 'B':
|
|
425
|
-
return '
|
|
425
|
+
return '+';
|
|
426
426
|
case 'C':
|
|
427
427
|
return '~';
|
|
428
428
|
case 'D':
|
|
429
429
|
return '!';
|
|
430
430
|
case 'F':
|
|
431
|
-
return '
|
|
431
|
+
return '-';
|
|
432
432
|
}
|
|
433
433
|
}
|
|
434
434
|
/**
|
|
@@ -316,9 +316,12 @@ export function isTransientError(errorMessage) {
|
|
|
316
316
|
*/
|
|
317
317
|
function confidenceLevelPriority(level) {
|
|
318
318
|
switch (level) {
|
|
319
|
-
case 'confirmed':
|
|
320
|
-
|
|
321
|
-
case '
|
|
319
|
+
case 'confirmed':
|
|
320
|
+
return 3;
|
|
321
|
+
case 'likely':
|
|
322
|
+
return 2;
|
|
323
|
+
case 'possible':
|
|
324
|
+
return 1;
|
|
322
325
|
}
|
|
323
326
|
}
|
|
324
327
|
/**
|
|
@@ -358,12 +361,15 @@ export function analyzeExternalDependencies(errors) {
|
|
|
358
361
|
if (isConfirmed && !existing.confirmedTools.includes(toolName)) {
|
|
359
362
|
existing.confirmedTools.push(toolName);
|
|
360
363
|
}
|
|
361
|
-
else if (!isConfirmed &&
|
|
364
|
+
else if (!isConfirmed &&
|
|
365
|
+
!existing.detectedTools.includes(toolName) &&
|
|
366
|
+
!existing.confirmedTools.includes(toolName)) {
|
|
362
367
|
existing.detectedTools.push(toolName);
|
|
363
368
|
}
|
|
364
369
|
existing.hasTransientErrors = existing.hasTransientErrors || isTransient;
|
|
365
370
|
// Update highest confidence level
|
|
366
|
-
if (confidenceLevelPriority(confidenceLevel) >
|
|
371
|
+
if (confidenceLevelPriority(confidenceLevel) >
|
|
372
|
+
confidenceLevelPriority(existing.highestConfidenceLevel)) {
|
|
367
373
|
existing.highestConfidenceLevel = confidenceLevel;
|
|
368
374
|
}
|
|
369
375
|
}
|
|
@@ -460,8 +466,11 @@ export function formatExternalDependenciesMarkdown(summary) {
|
|
|
460
466
|
lines.push('|---------|------------|--------|-----------------|----------------|----------------|');
|
|
461
467
|
for (const [, service] of summary.services) {
|
|
462
468
|
// Show confidence level with visual indicator
|
|
463
|
-
const confidenceIcon = service.highestConfidenceLevel === 'confirmed'
|
|
464
|
-
|
|
469
|
+
const confidenceIcon = service.highestConfidenceLevel === 'confirmed'
|
|
470
|
+
? '+'
|
|
471
|
+
: service.highestConfidenceLevel === 'likely'
|
|
472
|
+
? '~'
|
|
473
|
+
: '?';
|
|
465
474
|
const confidenceLabel = `${confidenceIcon} ${service.highestConfidenceLevel}`;
|
|
466
475
|
// Format confirmed tools (from actual errors)
|
|
467
476
|
const confirmedTools = service.confirmedTools.length > 0
|
|
@@ -532,10 +532,10 @@ export function formatComparison(comparison) {
|
|
|
532
532
|
}
|
|
533
533
|
if (comparison.hasRegression) {
|
|
534
534
|
if (!comparison.isReliable) {
|
|
535
|
-
lines.push(`
|
|
535
|
+
lines.push(` REGRESSION DETECTED (low confidence - may not be reliable)`);
|
|
536
536
|
}
|
|
537
537
|
else {
|
|
538
|
-
lines.push(`
|
|
538
|
+
lines.push(` REGRESSION DETECTED`);
|
|
539
539
|
}
|
|
540
540
|
}
|
|
541
541
|
return lines.join('\n');
|