@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,1681 @@
|
|
|
1
|
+
import { analyzeDependencies, calculateDependencyStats, generateDependencyMarkdown, } from '../baseline/dependency-analyzer.js';
|
|
2
|
+
import { getSchemaStabilityGrade } from '../baseline/response-schema-tracker.js';
|
|
3
|
+
import { getGradeIndicator } from '../baseline/documentation-scorer.js';
|
|
4
|
+
import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock, } from '../utils/index.js';
|
|
5
|
+
import { smartTruncate, getExampleLength } from '../utils/smart-truncate.js';
|
|
6
|
+
import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
|
|
7
|
+
import { analyzeExternalDependencies, formatExternalDependenciesMarkdown, } from '../baseline/external-dependency-detector.js';
|
|
8
|
+
import { SEMANTIC_VALIDATION, SCHEMA_EVOLUTION, ERROR_ANALYSIS, PERFORMANCE_CONFIDENCE, DOCUMENTATION_SCORING, EXAMPLE_OUTPUT, EXTERNAL_DEPENDENCIES, RELIABILITY_DISPLAY, CONFIDENCE_INDICATORS, DISPLAY_LIMITS, } from '../constants.js';
|
|
9
|
+
/**
|
|
10
|
+
* Generate CONTRACT.md documentation from check results.
|
|
11
|
+
* Enhanced with examples, error patterns, and performance data.
|
|
12
|
+
* Used by: bellwether check
|
|
13
|
+
*/
|
|
14
|
+
export function generateContractMd(result, options) {
|
|
15
|
+
const lines = [];
|
|
16
|
+
const { discovery, toolProfiles, metadata } = result;
|
|
17
|
+
const securityFingerprints = options?.securityFingerprints;
|
|
18
|
+
const semanticInferences = options?.semanticInferences;
|
|
19
|
+
const schemaEvolution = options?.schemaEvolution;
|
|
20
|
+
const errorAnalysisSummaries = options?.errorAnalysisSummaries;
|
|
21
|
+
const documentationScore = options?.documentationScore;
|
|
22
|
+
const workflowResults = options?.workflowResults;
|
|
23
|
+
const countValidationAsSuccess = options?.countValidationAsSuccess ?? true;
|
|
24
|
+
const separateValidationMetrics = options?.separateValidationMetrics ?? true;
|
|
25
|
+
// Example output configuration
|
|
26
|
+
const fullExamples = options?.fullExamples ?? false;
|
|
27
|
+
const exampleLength = getExampleLength(fullExamples, options?.exampleLength);
|
|
28
|
+
const maxExamplesPerTool = options?.maxExamplesPerTool ?? EXAMPLE_OUTPUT.DEFAULT_EXAMPLES_PER_TOOL;
|
|
29
|
+
// targetConfidence is available for future documentation enhancements
|
|
30
|
+
const _targetConfidence = options?.targetConfidence ?? 'low';
|
|
31
|
+
void _targetConfidence; // Suppress unused variable warning
|
|
32
|
+
// Header
|
|
33
|
+
lines.push(`# ${discovery.serverInfo.name}`);
|
|
34
|
+
lines.push('');
|
|
35
|
+
lines.push(`> Generated by [Bellwether](https://github.com/dotsetlabs/bellwether) on ${formatDateISO(metadata.startTime)}`);
|
|
36
|
+
lines.push('');
|
|
37
|
+
// Overview
|
|
38
|
+
lines.push('## Overview');
|
|
39
|
+
lines.push('');
|
|
40
|
+
lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
|
|
41
|
+
lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
|
|
42
|
+
lines.push('');
|
|
43
|
+
const performanceMetrics = calculatePerformanceMetrics(toolProfiles);
|
|
44
|
+
const performanceByTool = new Map(performanceMetrics.map(metric => [metric.toolName, metric]));
|
|
45
|
+
// Capabilities summary
|
|
46
|
+
lines.push('## Capabilities');
|
|
47
|
+
lines.push('');
|
|
48
|
+
if (discovery.capabilities.tools) {
|
|
49
|
+
lines.push(`- **Tools:** ${discovery.tools.length} available`);
|
|
50
|
+
}
|
|
51
|
+
if (discovery.capabilities.prompts) {
|
|
52
|
+
lines.push(`- **Prompts:** ${discovery.prompts.length} available`);
|
|
53
|
+
}
|
|
54
|
+
if (discovery.capabilities.resources) {
|
|
55
|
+
lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
|
|
56
|
+
}
|
|
57
|
+
if (discovery.capabilities.logging) {
|
|
58
|
+
lines.push('- **Logging:** Supported');
|
|
59
|
+
}
|
|
60
|
+
lines.push('');
|
|
61
|
+
// Quick Reference section with performance data
|
|
62
|
+
if (toolProfiles.length > 0) {
|
|
63
|
+
lines.push('## Quick Reference');
|
|
64
|
+
lines.push('');
|
|
65
|
+
lines.push('| Tool | Parameters | Reliability | P50 | Confidence | Description |');
|
|
66
|
+
lines.push('|------|------------|-------------|-----|------------|-------------|');
|
|
67
|
+
for (const tool of discovery.tools) {
|
|
68
|
+
const params = extractParameters(tool.inputSchema);
|
|
69
|
+
const desc = tool.description?.substring(0, 50) || 'No description';
|
|
70
|
+
const descDisplay = tool.description && tool.description.length > 50 ? desc + '...' : desc;
|
|
71
|
+
const profile = toolProfiles.find(p => p.name === tool.name);
|
|
72
|
+
const perf = performanceByTool.get(tool.name);
|
|
73
|
+
const successRate = calculateToolSuccessRate(profile, {
|
|
74
|
+
countValidationAsSuccess,
|
|
75
|
+
separateValidationMetrics,
|
|
76
|
+
});
|
|
77
|
+
const p50Display = perf ? `${perf.p50Ms}ms` : '-';
|
|
78
|
+
const confidenceDisplay = formatConfidenceIndicator(perf?.confidence?.confidenceLevel);
|
|
79
|
+
lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${successRate} | ${p50Display} | ${confidenceDisplay} | ${escapeTableCell(descDisplay)} |`);
|
|
80
|
+
}
|
|
81
|
+
lines.push('');
|
|
82
|
+
}
|
|
83
|
+
const legendSection = generateMetricsLegendSection();
|
|
84
|
+
if (legendSection.length > 0) {
|
|
85
|
+
lines.push(...legendSection);
|
|
86
|
+
}
|
|
87
|
+
const validationSection = generateValidationTestingSection(toolProfiles);
|
|
88
|
+
if (validationSection.length > 0) {
|
|
89
|
+
lines.push(...validationSection);
|
|
90
|
+
}
|
|
91
|
+
const issuesSection = generateIssuesDetectedSection(toolProfiles);
|
|
92
|
+
if (issuesSection.length > 0) {
|
|
93
|
+
lines.push(...issuesSection);
|
|
94
|
+
}
|
|
95
|
+
// Performance Baseline section
|
|
96
|
+
const perfSection = generateContractPerformanceSection(toolProfiles, performanceMetrics);
|
|
97
|
+
if (perfSection.length > 0) {
|
|
98
|
+
lines.push(...perfSection);
|
|
99
|
+
}
|
|
100
|
+
// Security Baseline section (if security testing was performed)
|
|
101
|
+
if (securityFingerprints && securityFingerprints.size > 0) {
|
|
102
|
+
const securitySection = generateContractSecuritySection(securityFingerprints);
|
|
103
|
+
if (securitySection.length > 0) {
|
|
104
|
+
lines.push(...securitySection);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Workflow Testing section (if workflow testing was performed)
|
|
108
|
+
if (workflowResults && workflowResults.length > 0) {
|
|
109
|
+
const workflowSection = generateWorkflowTestingSection(workflowResults);
|
|
110
|
+
if (workflowSection.length > 0) {
|
|
111
|
+
lines.push(...workflowSection);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Stateful Testing section (if enabled)
|
|
115
|
+
const statefulSection = generateStatefulTestingSection(toolProfiles, result.metadata.statefulTesting);
|
|
116
|
+
if (statefulSection.length > 0) {
|
|
117
|
+
lines.push(...statefulSection);
|
|
118
|
+
}
|
|
119
|
+
// Dependency Analysis section (auto-generated from tools)
|
|
120
|
+
const includeDependencyAnalysis = options?.includeDependencyAnalysis ?? true;
|
|
121
|
+
if (includeDependencyAnalysis && discovery.tools.length >= 2) {
|
|
122
|
+
const depGraph = analyzeDependencies(discovery.tools);
|
|
123
|
+
if (depGraph.edges.length > 0) {
|
|
124
|
+
const depStats = calculateDependencyStats(depGraph);
|
|
125
|
+
const depSection = generateDependencyMarkdown(depGraph, depStats);
|
|
126
|
+
lines.push(depSection);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Semantic Types section (if semantic inferences were discovered)
|
|
130
|
+
if (semanticInferences && semanticInferences.size > 0) {
|
|
131
|
+
const semanticSection = generateSemanticTypesSection(semanticInferences);
|
|
132
|
+
if (semanticSection.length > 0) {
|
|
133
|
+
lines.push(...semanticSection);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Schema Stability section (if schema evolution data available)
|
|
137
|
+
if (schemaEvolution && schemaEvolution.size > 0) {
|
|
138
|
+
const schemaStabilitySection = generateSchemaStabilitySection(schemaEvolution);
|
|
139
|
+
if (schemaStabilitySection.length > 0) {
|
|
140
|
+
lines.push(...schemaStabilitySection);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Error Analysis section (if error analysis summaries available)
|
|
144
|
+
if (errorAnalysisSummaries && errorAnalysisSummaries.size > 0) {
|
|
145
|
+
const errorAnalysisSection = generateErrorAnalysisSection(errorAnalysisSummaries);
|
|
146
|
+
if (errorAnalysisSection.length > 0) {
|
|
147
|
+
lines.push(...errorAnalysisSection);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// External Dependencies section - analyze errors for external service patterns
|
|
151
|
+
const externalDepAnalysis = analyzeToolsForExternalDependencies(toolProfiles, discovery.tools);
|
|
152
|
+
if (externalDepAnalysis && externalDepAnalysis.services.size > 0) {
|
|
153
|
+
const externalDepSection = formatExternalDependenciesMarkdown(externalDepAnalysis);
|
|
154
|
+
if (externalDepSection.length > 0) {
|
|
155
|
+
lines.push(externalDepSection);
|
|
156
|
+
lines.push('');
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// External service configuration section (from config handling)
|
|
160
|
+
const externalConfigSection = generateExternalServiceConfigSection(result.metadata.externalServices);
|
|
161
|
+
if (externalConfigSection.length > 0) {
|
|
162
|
+
lines.push(...externalConfigSection);
|
|
163
|
+
}
|
|
164
|
+
// Response Assertions section
|
|
165
|
+
const assertionSection = generateResponseAssertionsSection(toolProfiles);
|
|
166
|
+
if (assertionSection.length > 0) {
|
|
167
|
+
lines.push(...assertionSection);
|
|
168
|
+
}
|
|
169
|
+
// Documentation Quality section (if documentation score available)
|
|
170
|
+
if (documentationScore) {
|
|
171
|
+
const documentationSection = generateDocumentationQualitySection(documentationScore);
|
|
172
|
+
if (documentationSection.length > 0) {
|
|
173
|
+
lines.push(...documentationSection);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Tools section with examples and error patterns
|
|
177
|
+
if (discovery.tools.length > 0) {
|
|
178
|
+
lines.push('## Tools');
|
|
179
|
+
lines.push('');
|
|
180
|
+
for (const tool of discovery.tools) {
|
|
181
|
+
const profile = toolProfiles.find(p => p.name === tool.name);
|
|
182
|
+
lines.push(`### ${tool.name}`);
|
|
183
|
+
lines.push('');
|
|
184
|
+
lines.push(tool.description || 'No description available.');
|
|
185
|
+
lines.push('');
|
|
186
|
+
if (profile?.skipped) {
|
|
187
|
+
lines.push(`*Skipped:* ${profile.skipReason ?? 'External service not configured.'}`);
|
|
188
|
+
lines.push('');
|
|
189
|
+
}
|
|
190
|
+
if (profile?.mocked) {
|
|
191
|
+
const serviceLabel = profile.mockService ? ` (${profile.mockService})` : '';
|
|
192
|
+
lines.push(`*Mocked response used${serviceLabel}.*`);
|
|
193
|
+
lines.push('');
|
|
194
|
+
}
|
|
195
|
+
if (profile?.assertionSummary) {
|
|
196
|
+
lines.push(`*Response assertions:* ${profile.assertionSummary.passed}/${profile.assertionSummary.total} passed`);
|
|
197
|
+
const failures = collectAssertionFailures(profile);
|
|
198
|
+
if (failures.length > 0) {
|
|
199
|
+
lines.push('Failed assertions:');
|
|
200
|
+
for (const failure of failures.slice(0, 3)) {
|
|
201
|
+
lines.push(`- ${failure}`);
|
|
202
|
+
}
|
|
203
|
+
if (failures.length > 3) {
|
|
204
|
+
lines.push(`- ... and ${failures.length - 3} more`);
|
|
205
|
+
}
|
|
206
|
+
lines.push('');
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
if (tool.inputSchema) {
|
|
210
|
+
lines.push('**Input Schema:**');
|
|
211
|
+
const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
|
|
212
|
+
lines.push('```json');
|
|
213
|
+
lines.push(schemaJson.content);
|
|
214
|
+
lines.push('```');
|
|
215
|
+
lines.push('');
|
|
216
|
+
}
|
|
217
|
+
// Add example usage from successful interactions
|
|
218
|
+
const examples = generateToolExamples(profile, maxExamplesPerTool, exampleLength);
|
|
219
|
+
if (examples.length > 0) {
|
|
220
|
+
lines.push(...examples);
|
|
221
|
+
}
|
|
222
|
+
// Add error patterns if any were observed
|
|
223
|
+
const errorPatterns = generateToolErrorPatterns(profile);
|
|
224
|
+
if (errorPatterns.length > 0) {
|
|
225
|
+
lines.push(...errorPatterns);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// Prompts section
|
|
230
|
+
if (discovery.prompts.length > 0) {
|
|
231
|
+
lines.push('## Prompts');
|
|
232
|
+
lines.push('');
|
|
233
|
+
for (const prompt of discovery.prompts) {
|
|
234
|
+
lines.push(`### ${prompt.name}`);
|
|
235
|
+
lines.push('');
|
|
236
|
+
if (prompt.description) {
|
|
237
|
+
lines.push(prompt.description);
|
|
238
|
+
lines.push('');
|
|
239
|
+
}
|
|
240
|
+
if (prompt.arguments && prompt.arguments.length > 0) {
|
|
241
|
+
lines.push('**Arguments:**');
|
|
242
|
+
for (const arg of prompt.arguments) {
|
|
243
|
+
const required = arg.required ? ' (required)' : '';
|
|
244
|
+
lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
|
|
245
|
+
}
|
|
246
|
+
lines.push('');
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// Resources section
|
|
251
|
+
if ((discovery.resources ?? []).length > 0) {
|
|
252
|
+
lines.push('## Resources');
|
|
253
|
+
lines.push('');
|
|
254
|
+
for (const resource of discovery.resources ?? []) {
|
|
255
|
+
lines.push(`### ${resource.name}`);
|
|
256
|
+
lines.push('');
|
|
257
|
+
lines.push(`**URI:** \`${resource.uri}\``);
|
|
258
|
+
if (resource.mimeType) {
|
|
259
|
+
lines.push(`**MIME Type:** ${resource.mimeType}`);
|
|
260
|
+
}
|
|
261
|
+
lines.push('');
|
|
262
|
+
if (resource.description) {
|
|
263
|
+
lines.push(resource.description);
|
|
264
|
+
lines.push('');
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
// Error Summary section
|
|
269
|
+
const errorSummary = generateErrorSummarySection(toolProfiles);
|
|
270
|
+
if (errorSummary.length > 0) {
|
|
271
|
+
lines.push(...errorSummary);
|
|
272
|
+
}
|
|
273
|
+
// Metadata footer
|
|
274
|
+
lines.push('---');
|
|
275
|
+
lines.push('');
|
|
276
|
+
lines.push(`*Schema validation completed in ${formatDuration(metadata.durationMs)}.*`);
|
|
277
|
+
return lines.join('\n');
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Calculate detailed reliability metrics for a tool.
|
|
281
|
+
* Counts correct rejections (validation tests) as successes.
|
|
282
|
+
*/
|
|
283
|
+
function calculateReliabilityMetrics(profile, options) {
|
|
284
|
+
if (!profile) {
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
const interactions = profile.interactions.filter(i => !i.mocked);
|
|
288
|
+
if (interactions.length === 0) {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
let happyPathSuccesses = 0;
|
|
292
|
+
let happyPathTotal = 0;
|
|
293
|
+
let validationSuccesses = 0;
|
|
294
|
+
let validationTotal = 0;
|
|
295
|
+
for (const interaction of interactions) {
|
|
296
|
+
const expected = interaction.question.expectedOutcome ?? 'success';
|
|
297
|
+
const hasError = interaction.error || interaction.response?.isError;
|
|
298
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
299
|
+
const hasErrorText = textContent && 'text' in textContent && looksLikeError(String(textContent.text));
|
|
300
|
+
const gotError = hasError || hasErrorText;
|
|
301
|
+
if (expected === 'error') {
|
|
302
|
+
// Validation test - error is the expected/correct outcome
|
|
303
|
+
validationTotal++;
|
|
304
|
+
if (gotError) {
|
|
305
|
+
validationSuccesses++; // Correct rejection!
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
else if (expected === 'success') {
|
|
309
|
+
// Happy path test - success is the expected outcome
|
|
310
|
+
happyPathTotal++;
|
|
311
|
+
if (!gotError) {
|
|
312
|
+
happyPathSuccesses++;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
else {
|
|
316
|
+
// 'either' - counts as success regardless
|
|
317
|
+
happyPathTotal++;
|
|
318
|
+
happyPathSuccesses++; // Either outcome is acceptable
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
const total = interactions.length;
|
|
322
|
+
const countedValidationSuccesses = options.countValidationAsSuccess ? validationSuccesses : 0;
|
|
323
|
+
const correctOutcomes = happyPathSuccesses + countedValidationSuccesses;
|
|
324
|
+
const reliabilityRate = total > 0 ? (correctOutcomes / total) * 100 : 0;
|
|
325
|
+
const happyPathRate = happyPathTotal > 0 ? (happyPathSuccesses / happyPathTotal) * 100 : 100;
|
|
326
|
+
const validationRate = options.separateValidationMetrics
|
|
327
|
+
? (validationTotal > 0 ? (validationSuccesses / validationTotal) * 100 : 100)
|
|
328
|
+
: 100;
|
|
329
|
+
return {
|
|
330
|
+
total,
|
|
331
|
+
happyPathSuccesses,
|
|
332
|
+
happyPathTotal,
|
|
333
|
+
validationSuccesses,
|
|
334
|
+
validationTotal,
|
|
335
|
+
reliabilityRate,
|
|
336
|
+
happyPathRate,
|
|
337
|
+
validationRate,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Calculate success rate for a tool from its interactions.
|
|
342
|
+
* Now uses reliability metrics that count correct rejections as success.
|
|
343
|
+
*/
|
|
344
|
+
function calculateToolSuccessRate(profile, options) {
|
|
345
|
+
const metrics = calculateReliabilityMetrics(profile, options);
|
|
346
|
+
if (!metrics) {
|
|
347
|
+
return '-';
|
|
348
|
+
}
|
|
349
|
+
// Use reliability rate (includes correct rejections as success)
|
|
350
|
+
const rate = metrics.reliabilityRate;
|
|
351
|
+
const emoji = rate >= RELIABILITY_DISPLAY.HIGH_THRESHOLD
|
|
352
|
+
? RELIABILITY_DISPLAY.SYMBOLS.PASS
|
|
353
|
+
: rate >= RELIABILITY_DISPLAY.MEDIUM_THRESHOLD
|
|
354
|
+
? RELIABILITY_DISPLAY.SYMBOLS.WARN
|
|
355
|
+
: RELIABILITY_DISPLAY.SYMBOLS.FAIL;
|
|
356
|
+
return `${emoji} ${rate.toFixed(0)}%`;
|
|
357
|
+
}
|
|
358
|
+
function formatConfidenceIndicator(level) {
|
|
359
|
+
if (!level) {
|
|
360
|
+
return '-';
|
|
361
|
+
}
|
|
362
|
+
const indicator = CONFIDENCE_INDICATORS[level];
|
|
363
|
+
return `${indicator} ${level}`;
|
|
364
|
+
}
|
|
365
|
+
function generateMetricsLegendSection() {
|
|
366
|
+
const lines = [];
|
|
367
|
+
lines.push('## Metrics Legend');
|
|
368
|
+
lines.push('');
|
|
369
|
+
lines.push('| Symbol | Meaning |');
|
|
370
|
+
lines.push('|--------|---------|');
|
|
371
|
+
lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.PASS} | All tests passed as expected |`);
|
|
372
|
+
lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.WARN} | Some unexpected behavior |`);
|
|
373
|
+
lines.push(`| ${RELIABILITY_DISPLAY.SYMBOLS.FAIL} | Critical issues detected |`);
|
|
374
|
+
lines.push(`| ${CONFIDENCE_INDICATORS.high} | High confidence in performance metrics |`);
|
|
375
|
+
lines.push(`| ${CONFIDENCE_INDICATORS.medium} | Medium confidence in performance metrics |`);
|
|
376
|
+
lines.push(`| ${CONFIDENCE_INDICATORS.low} | Low confidence in performance metrics |`);
|
|
377
|
+
lines.push('');
|
|
378
|
+
lines.push('**Reliability Score**: Percentage of tests where the tool behaved as expected');
|
|
379
|
+
lines.push('(correct success or correct rejection of invalid input).');
|
|
380
|
+
lines.push('');
|
|
381
|
+
return lines;
|
|
382
|
+
}
|
|
383
|
+
function generateValidationTestingSection(profiles) {
|
|
384
|
+
const lines = [];
|
|
385
|
+
const validationSummary = profiles.map(profile => {
|
|
386
|
+
const buckets = {
|
|
387
|
+
input: summarizeValidationBucket(profile, 'input'),
|
|
388
|
+
type: summarizeValidationBucket(profile, 'type'),
|
|
389
|
+
required: summarizeValidationBucket(profile, 'required'),
|
|
390
|
+
};
|
|
391
|
+
return { profile, buckets };
|
|
392
|
+
});
|
|
393
|
+
const hasValidationTests = validationSummary.some(summary => Object.values(summary.buckets).some(bucket => bucket.total > 0));
|
|
394
|
+
if (!hasValidationTests) {
|
|
395
|
+
return lines;
|
|
396
|
+
}
|
|
397
|
+
lines.push('## Validation Testing');
|
|
398
|
+
lines.push('');
|
|
399
|
+
lines.push('| Tool | Input Validation | Type Checking | Required Params |');
|
|
400
|
+
lines.push('|------|------------------|---------------|-----------------|');
|
|
401
|
+
for (const summary of validationSummary) {
|
|
402
|
+
const toolName = escapeTableCell(summary.profile.name);
|
|
403
|
+
const inputStatus = formatValidationStatus(summary.buckets.input);
|
|
404
|
+
const typeStatus = formatValidationStatus(summary.buckets.type);
|
|
405
|
+
const requiredStatus = formatValidationStatus(summary.buckets.required);
|
|
406
|
+
lines.push(`| \`${toolName}\` | ${inputStatus} | ${typeStatus} | ${requiredStatus} |`);
|
|
407
|
+
}
|
|
408
|
+
lines.push('');
|
|
409
|
+
return lines;
|
|
410
|
+
}
|
|
411
|
+
function generateIssuesDetectedSection(profiles) {
|
|
412
|
+
const lines = [];
|
|
413
|
+
const criticalIssues = [];
|
|
414
|
+
const warnings = [];
|
|
415
|
+
for (const profile of profiles) {
|
|
416
|
+
for (const interaction of profile.interactions) {
|
|
417
|
+
if (interaction.mocked || !interaction.outcomeAssessment || interaction.outcomeAssessment.correct) {
|
|
418
|
+
continue;
|
|
419
|
+
}
|
|
420
|
+
const expected = interaction.outcomeAssessment.expected;
|
|
421
|
+
const actual = interaction.outcomeAssessment.actual;
|
|
422
|
+
const description = interaction.question.description;
|
|
423
|
+
const toolLabel = `\`${escapeTableCell(profile.name)}\``;
|
|
424
|
+
if (expected === 'error' && actual === 'success') {
|
|
425
|
+
criticalIssues.push(`${toolLabel} accepts invalid input: ${description}`);
|
|
426
|
+
}
|
|
427
|
+
else if (expected === 'success' && actual === 'error') {
|
|
428
|
+
warnings.push(`${toolLabel} failed on valid input: ${description}`);
|
|
429
|
+
}
|
|
430
|
+
else {
|
|
431
|
+
warnings.push(`${toolLabel} returned unexpected outcome: ${description}`);
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
lines.push('## Issues Detected');
|
|
436
|
+
lines.push('');
|
|
437
|
+
if (criticalIssues.length === 0 && warnings.length === 0) {
|
|
438
|
+
lines.push(`${RELIABILITY_DISPLAY.SYMBOLS.PASS} No issues detected in validation or happy-path behavior.`);
|
|
439
|
+
lines.push('');
|
|
440
|
+
return lines;
|
|
441
|
+
}
|
|
442
|
+
if (criticalIssues.length > 0) {
|
|
443
|
+
lines.push('### Critical');
|
|
444
|
+
for (const issue of criticalIssues.slice(0, DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT)) {
|
|
445
|
+
lines.push(`- ${issue}`);
|
|
446
|
+
}
|
|
447
|
+
if (criticalIssues.length > DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT) {
|
|
448
|
+
lines.push(`- ... ${criticalIssues.length - DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT} more`);
|
|
449
|
+
}
|
|
450
|
+
lines.push('');
|
|
451
|
+
}
|
|
452
|
+
if (warnings.length > 0) {
|
|
453
|
+
lines.push('### Warnings');
|
|
454
|
+
for (const issue of warnings.slice(0, DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT)) {
|
|
455
|
+
lines.push(`- ${issue}`);
|
|
456
|
+
}
|
|
457
|
+
if (warnings.length > DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT) {
|
|
458
|
+
lines.push(`- ... ${warnings.length - DISPLAY_LIMITS.ISSUES_DISPLAY_LIMIT} more`);
|
|
459
|
+
}
|
|
460
|
+
lines.push('');
|
|
461
|
+
}
|
|
462
|
+
return lines;
|
|
463
|
+
}
|
|
464
|
+
function summarizeValidationBucket(profile, bucket) {
|
|
465
|
+
let total = 0;
|
|
466
|
+
let passed = 0;
|
|
467
|
+
for (const interaction of profile.interactions) {
|
|
468
|
+
if (interaction.mocked) {
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
const question = interaction.question;
|
|
472
|
+
if (question.expectedOutcome !== 'error') {
|
|
473
|
+
continue;
|
|
474
|
+
}
|
|
475
|
+
if (classifyValidationBucket(question) !== bucket) {
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
total += 1;
|
|
479
|
+
if (interaction.outcomeAssessment?.correct) {
|
|
480
|
+
passed += 1;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return { total, passed };
|
|
484
|
+
}
|
|
485
|
+
function classifyValidationBucket(question) {
|
|
486
|
+
const description = question.description.toLowerCase();
|
|
487
|
+
if (/missing|required/.test(description)) {
|
|
488
|
+
return 'required';
|
|
489
|
+
}
|
|
490
|
+
if (/type|coercion|format|invalid\s+type/.test(description)) {
|
|
491
|
+
return 'type';
|
|
492
|
+
}
|
|
493
|
+
return 'input';
|
|
494
|
+
}
|
|
495
|
+
function formatValidationStatus(bucket) {
|
|
496
|
+
if (bucket.total === 0) {
|
|
497
|
+
return '-';
|
|
498
|
+
}
|
|
499
|
+
if (bucket.passed === bucket.total) {
|
|
500
|
+
return `${RELIABILITY_DISPLAY.SYMBOLS.PASS} Pass (${bucket.passed}/${bucket.total})`;
|
|
501
|
+
}
|
|
502
|
+
if (bucket.passed === 0) {
|
|
503
|
+
return `${RELIABILITY_DISPLAY.SYMBOLS.FAIL} Fail (0/${bucket.total})`;
|
|
504
|
+
}
|
|
505
|
+
return `${RELIABILITY_DISPLAY.SYMBOLS.WARN} Partial (${bucket.passed}/${bucket.total})`;
|
|
506
|
+
}
|
|
507
|
+
/**
|
|
508
|
+
* Generate performance baseline section for CONTRACT.md.
|
|
509
|
+
*/
|
|
510
|
+
function generateContractPerformanceSection(profiles, metricsOverride) {
|
|
511
|
+
const lines = [];
|
|
512
|
+
const metrics = metricsOverride ?? calculatePerformanceMetrics(profiles);
|
|
513
|
+
if (metrics.length === 0) {
|
|
514
|
+
return [];
|
|
515
|
+
}
|
|
516
|
+
// Only show if we have meaningful data
|
|
517
|
+
const hasValidMetrics = metrics.some(m => m.callCount >= 2);
|
|
518
|
+
if (!hasValidMetrics) {
|
|
519
|
+
return [];
|
|
520
|
+
}
|
|
521
|
+
lines.push('## Performance Baseline');
|
|
522
|
+
lines.push('');
|
|
523
|
+
lines.push('Response time metrics observed during schema validation:');
|
|
524
|
+
lines.push('');
|
|
525
|
+
lines.push('| Tool | Calls | P50 | P95 | Happy Path % | Confidence |');
|
|
526
|
+
lines.push('|------|-------|-----|-----|--------------|------------|');
|
|
527
|
+
for (const m of metrics) {
|
|
528
|
+
const successRate = ((1 - m.errorRate) * 100).toFixed(0);
|
|
529
|
+
const successEmoji = m.errorRate < 0.1 ? '✓' : m.errorRate < 0.5 ? '⚠' : '✗';
|
|
530
|
+
const confidenceDisplay = formatConfidenceDisplay(m.confidence);
|
|
531
|
+
// Guard against 0 calls edge case - show N/A for latency metrics
|
|
532
|
+
const p50Display = m.callCount > 0 ? `${m.p50Ms}ms` : 'N/A';
|
|
533
|
+
const p95Display = m.callCount > 0 ? `${m.p95Ms}ms` : 'N/A';
|
|
534
|
+
lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.callCount} | ${p50Display} | ${p95Display} | ${successEmoji} ${successRate}% | ${confidenceDisplay} |`);
|
|
535
|
+
}
|
|
536
|
+
lines.push('');
|
|
537
|
+
// Show low confidence warning if any tools have low confidence
|
|
538
|
+
const lowConfidenceTools = metrics.filter(m => m.confidence?.confidenceLevel === 'low');
|
|
539
|
+
if (lowConfidenceTools.length > 0) {
|
|
540
|
+
// Categorize low confidence by reason
|
|
541
|
+
const lowSampleTools = lowConfidenceTools.filter(m => (m.confidence?.successfulSamples ?? 0) < PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES);
|
|
542
|
+
const highVariabilityTools = lowConfidenceTools.filter(m => (m.confidence?.successfulSamples ?? 0) >= PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES &&
|
|
543
|
+
(m.confidence?.coefficientOfVariation ?? 0) > PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV);
|
|
544
|
+
lines.push(`> **⚠️ Low Confidence**: ${lowConfidenceTools.length} tool(s) have low statistical confidence.`);
|
|
545
|
+
if (lowSampleTools.length > 0) {
|
|
546
|
+
lines.push(`> - ${lowSampleTools.length} tool(s) have insufficient happy path samples (need ${PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES}+)`);
|
|
547
|
+
}
|
|
548
|
+
if (highVariabilityTools.length > 0) {
|
|
549
|
+
lines.push(`> - ${highVariabilityTools.length} tool(s) have high response time variability (CV > ${(PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV * 100).toFixed(0)}%)`);
|
|
550
|
+
}
|
|
551
|
+
lines.push('> Run with `--warmup-runs 3` and `--max-questions 5` for more reliable baselines.');
|
|
552
|
+
lines.push('');
|
|
553
|
+
}
|
|
554
|
+
// Add confidence summary section (collapsed)
|
|
555
|
+
const hasConfidenceData = metrics.some(m => m.confidence);
|
|
556
|
+
if (hasConfidenceData) {
|
|
557
|
+
lines.push('<details>');
|
|
558
|
+
lines.push('<summary>Confidence Metrics Details</summary>');
|
|
559
|
+
lines.push('');
|
|
560
|
+
lines.push('| Tool | Happy Path | Validation | Total | Std Dev | CV | Level |');
|
|
561
|
+
lines.push('|------|------------|------------|-------|---------|-----|-------|');
|
|
562
|
+
for (const m of metrics) {
|
|
563
|
+
if (m.confidence) {
|
|
564
|
+
// Guard against impossible metrics: 0 samples shouldn't have stdDev/CV
|
|
565
|
+
const successfulSamples = m.confidence.successfulSamples ?? m.confidence.sampleCount;
|
|
566
|
+
const validationSamples = m.confidence.validationSamples ?? 0;
|
|
567
|
+
const totalTests = m.confidence.totalTests ?? m.confidence.sampleCount;
|
|
568
|
+
// Use confidence.standardDeviation (from successful samples) for consistency with CV
|
|
569
|
+
const roundedStdDev = Math.round(m.confidence.standardDeviation);
|
|
570
|
+
const stdDevDisplay = successfulSamples > 0 ? `${roundedStdDev}ms` : 'N/A';
|
|
571
|
+
// When stdDev rounds to 0ms, showing high CV is misleading (sub-millisecond noise)
|
|
572
|
+
// In this case, display ~0% to indicate the variability is below measurement threshold
|
|
573
|
+
const rawCV = m.confidence.coefficientOfVariation * 100;
|
|
574
|
+
const cvDisplay = successfulSamples > 0
|
|
575
|
+
? (roundedStdDev === 0 && rawCV > 1 ? '~0%' : `${rawCV.toFixed(1)}%`)
|
|
576
|
+
: 'N/A';
|
|
577
|
+
const levelLabel = PERFORMANCE_CONFIDENCE.LABELS[m.confidence.confidenceLevel];
|
|
578
|
+
lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${successfulSamples} | ${validationSamples} | ${totalTests} | ${stdDevDisplay} | ${cvDisplay} | ${levelLabel} |`);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
lines.push('');
|
|
582
|
+
lines.push('**Legend:**');
|
|
583
|
+
lines.push(`- **Happy Path**: Successful tests with expected outcome "success" (used for confidence)`);
|
|
584
|
+
lines.push(`- **Validation**: Tests with expected outcome "error" (not used for performance confidence)`);
|
|
585
|
+
lines.push(`- HIGH: ${PERFORMANCE_CONFIDENCE.HIGH.MIN_SAMPLES}+ happy path samples, CV ≤ ${PERFORMANCE_CONFIDENCE.HIGH.MAX_CV * 100}%`);
|
|
586
|
+
lines.push(`- MEDIUM: ${PERFORMANCE_CONFIDENCE.MEDIUM.MIN_SAMPLES}+ happy path samples, CV ≤ ${PERFORMANCE_CONFIDENCE.MEDIUM.MAX_CV * 100}%`);
|
|
587
|
+
lines.push('- LOW: Insufficient happy path samples or high variability');
|
|
588
|
+
lines.push('');
|
|
589
|
+
lines.push('</details>');
|
|
590
|
+
lines.push('');
|
|
591
|
+
}
|
|
592
|
+
return lines;
|
|
593
|
+
}
|
|
594
|
+
/**
|
|
595
|
+
* Format confidence for display in table.
|
|
596
|
+
*/
|
|
597
|
+
function formatConfidenceDisplay(confidence) {
|
|
598
|
+
if (!confidence) {
|
|
599
|
+
return '-';
|
|
600
|
+
}
|
|
601
|
+
const indicator = PERFORMANCE_CONFIDENCE.INDICATORS[confidence.confidenceLevel];
|
|
602
|
+
const label = PERFORMANCE_CONFIDENCE.LABELS[confidence.confidenceLevel];
|
|
603
|
+
return `${indicator} ${label}`;
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Generate Security Baseline section for CONTRACT.md.
|
|
607
|
+
*/
|
|
608
|
+
function generateContractSecuritySection(fingerprints) {
|
|
609
|
+
const lines = [];
|
|
610
|
+
// Collect all findings
|
|
611
|
+
const allFindings = [];
|
|
612
|
+
let totalTested = 0;
|
|
613
|
+
let totalRiskScore = 0;
|
|
614
|
+
for (const [toolName, fp] of fingerprints) {
|
|
615
|
+
if (fp.tested) {
|
|
616
|
+
totalTested++;
|
|
617
|
+
totalRiskScore += fp.riskScore;
|
|
618
|
+
for (const finding of fp.findings) {
|
|
619
|
+
allFindings.push({ ...finding, toolName });
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
if (totalTested === 0) {
|
|
624
|
+
return [];
|
|
625
|
+
}
|
|
626
|
+
const avgRiskScore = totalTested > 0 ? Math.round(totalRiskScore / totalTested) : 0;
|
|
627
|
+
lines.push('## Security Baseline');
|
|
628
|
+
lines.push('');
|
|
629
|
+
lines.push(`Security testing performed on ${totalTested} tools.`);
|
|
630
|
+
lines.push('');
|
|
631
|
+
// Summary table
|
|
632
|
+
lines.push('| Metric | Value |');
|
|
633
|
+
lines.push('|--------|-------|');
|
|
634
|
+
lines.push(`| Tools Tested | ${totalTested} |`);
|
|
635
|
+
lines.push(`| Total Findings | ${allFindings.length} |`);
|
|
636
|
+
lines.push(`| Average Risk Score | ${avgRiskScore}/100 |`);
|
|
637
|
+
// Count by severity
|
|
638
|
+
const bySeverity = {
|
|
639
|
+
critical: allFindings.filter(f => f.riskLevel === 'critical').length,
|
|
640
|
+
high: allFindings.filter(f => f.riskLevel === 'high').length,
|
|
641
|
+
medium: allFindings.filter(f => f.riskLevel === 'medium').length,
|
|
642
|
+
low: allFindings.filter(f => f.riskLevel === 'low').length,
|
|
643
|
+
info: allFindings.filter(f => f.riskLevel === 'info').length,
|
|
644
|
+
};
|
|
645
|
+
if (bySeverity.critical > 0) {
|
|
646
|
+
lines.push(`| Critical Findings | ${bySeverity.critical} |`);
|
|
647
|
+
}
|
|
648
|
+
if (bySeverity.high > 0) {
|
|
649
|
+
lines.push(`| High Findings | ${bySeverity.high} |`);
|
|
650
|
+
}
|
|
651
|
+
if (bySeverity.medium > 0) {
|
|
652
|
+
lines.push(`| Medium Findings | ${bySeverity.medium} |`);
|
|
653
|
+
}
|
|
654
|
+
lines.push('');
|
|
655
|
+
// If no findings, show clean status
|
|
656
|
+
if (allFindings.length === 0) {
|
|
657
|
+
lines.push('✅ No security vulnerabilities detected during testing.');
|
|
658
|
+
lines.push('');
|
|
659
|
+
return lines;
|
|
660
|
+
}
|
|
661
|
+
// Show findings by severity
|
|
662
|
+
const criticalAndHigh = allFindings.filter(f => f.riskLevel === 'critical' || f.riskLevel === 'high');
|
|
663
|
+
if (criticalAndHigh.length > 0) {
|
|
664
|
+
lines.push('### Critical and High Severity Findings');
|
|
665
|
+
lines.push('');
|
|
666
|
+
lines.push('| Risk | Tool | Finding | CWE |');
|
|
667
|
+
lines.push('|------|------|---------|-----|');
|
|
668
|
+
for (const finding of criticalAndHigh) {
|
|
669
|
+
const riskEmoji = finding.riskLevel === 'critical' ? '🔴' : '🟠';
|
|
670
|
+
lines.push(`| ${riskEmoji} ${finding.riskLevel} | \`${escapeTableCell(finding.tool)}\` | ${escapeTableCell(finding.title)} | ${finding.cweId} |`);
|
|
671
|
+
}
|
|
672
|
+
lines.push('');
|
|
673
|
+
// Detailed findings
|
|
674
|
+
lines.push('<details>');
|
|
675
|
+
lines.push('<summary>Finding Details</summary>');
|
|
676
|
+
lines.push('');
|
|
677
|
+
for (const finding of criticalAndHigh) {
|
|
678
|
+
lines.push(`#### ${finding.title}`);
|
|
679
|
+
lines.push('');
|
|
680
|
+
lines.push(`**Tool:** \`${finding.tool}\``);
|
|
681
|
+
lines.push(`**Parameter:** \`${finding.parameter}\``);
|
|
682
|
+
lines.push(`**Risk Level:** ${finding.riskLevel.toUpperCase()}`);
|
|
683
|
+
lines.push(`**CWE:** ${finding.cweId}`);
|
|
684
|
+
lines.push('');
|
|
685
|
+
lines.push(finding.description);
|
|
686
|
+
lines.push('');
|
|
687
|
+
lines.push('**Remediation:**');
|
|
688
|
+
lines.push(finding.remediation);
|
|
689
|
+
lines.push('');
|
|
690
|
+
}
|
|
691
|
+
lines.push('</details>');
|
|
692
|
+
lines.push('');
|
|
693
|
+
}
|
|
694
|
+
// Show medium/low findings in collapsed section
|
|
695
|
+
const mediumAndLow = allFindings.filter(f => f.riskLevel === 'medium' || f.riskLevel === 'low' || f.riskLevel === 'info');
|
|
696
|
+
if (mediumAndLow.length > 0) {
|
|
697
|
+
lines.push('<details>');
|
|
698
|
+
lines.push(`<summary>Medium/Low Severity Findings (${mediumAndLow.length})</summary>`);
|
|
699
|
+
lines.push('');
|
|
700
|
+
lines.push('| Risk | Tool | Finding | CWE |');
|
|
701
|
+
lines.push('|------|------|---------|-----|');
|
|
702
|
+
for (const finding of mediumAndLow) {
|
|
703
|
+
const riskEmoji = finding.riskLevel === 'medium' ? '🟡' : '🔵';
|
|
704
|
+
lines.push(`| ${riskEmoji} ${finding.riskLevel} | \`${escapeTableCell(finding.tool)}\` | ${escapeTableCell(finding.title)} | ${finding.cweId} |`);
|
|
705
|
+
}
|
|
706
|
+
lines.push('');
|
|
707
|
+
lines.push('</details>');
|
|
708
|
+
lines.push('');
|
|
709
|
+
}
|
|
710
|
+
// Per-tool risk scores
|
|
711
|
+
lines.push('### Tool Risk Scores');
|
|
712
|
+
lines.push('');
|
|
713
|
+
lines.push('| Tool | Risk Score | Findings |');
|
|
714
|
+
lines.push('|------|------------|----------|');
|
|
715
|
+
const toolScores = Array.from(fingerprints.entries())
|
|
716
|
+
.filter(([, fp]) => fp.tested)
|
|
717
|
+
.map(([name, fp]) => ({ name, riskScore: fp.riskScore, findingCount: fp.findings.length }))
|
|
718
|
+
.sort((a, b) => b.riskScore - a.riskScore);
|
|
719
|
+
for (const { name, riskScore, findingCount } of toolScores) {
|
|
720
|
+
const scoreEmoji = riskScore >= 70 ? '🔴' : riskScore >= 40 ? '🟠' : riskScore >= 20 ? '🟡' : '🟢';
|
|
721
|
+
lines.push(`| \`${escapeTableCell(name)}\` | ${scoreEmoji} ${riskScore}/100 | ${findingCount} |`);
|
|
722
|
+
}
|
|
723
|
+
lines.push('');
|
|
724
|
+
return lines;
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Generate Workflow Testing section for CONTRACT.md.
|
|
728
|
+
* Documents workflow test results with step details and data flow.
|
|
729
|
+
*/
|
|
730
|
+
function generateWorkflowTestingSection(results) {
|
|
731
|
+
const lines = [];
|
|
732
|
+
if (results.length === 0) {
|
|
733
|
+
return [];
|
|
734
|
+
}
|
|
735
|
+
const passed = results.filter(r => r.success).length;
|
|
736
|
+
const failed = results.length - passed;
|
|
737
|
+
const totalSteps = results.reduce((sum, r) => sum + r.workflow.steps.length, 0);
|
|
738
|
+
const passedSteps = results.reduce((sum, r) => sum + r.steps.filter(s => s.success).length, 0);
|
|
739
|
+
const totalDurationMs = results.reduce((sum, r) => sum + r.durationMs, 0);
|
|
740
|
+
lines.push('## Workflow Testing');
|
|
741
|
+
lines.push('');
|
|
742
|
+
lines.push('Multi-step workflow tests validate tool chains and state transitions.');
|
|
743
|
+
lines.push('');
|
|
744
|
+
// Summary table
|
|
745
|
+
lines.push('| Metric | Value |');
|
|
746
|
+
lines.push('|--------|-------|');
|
|
747
|
+
lines.push(`| Workflows | ${results.length} |`);
|
|
748
|
+
lines.push(`| Passed | ${passed} |`);
|
|
749
|
+
lines.push(`| Failed | ${failed} |`);
|
|
750
|
+
lines.push(`| Total Steps | ${totalSteps} |`);
|
|
751
|
+
lines.push(`| Steps Passed | ${passedSteps} |`);
|
|
752
|
+
lines.push(`| Total Duration | ${formatDuration(totalDurationMs)} |`);
|
|
753
|
+
lines.push('');
|
|
754
|
+
// Results table
|
|
755
|
+
lines.push('### Results');
|
|
756
|
+
lines.push('');
|
|
757
|
+
lines.push('| Workflow | Status | Steps | Duration |');
|
|
758
|
+
lines.push('|----------|--------|-------|----------|');
|
|
759
|
+
for (const result of results) {
|
|
760
|
+
const status = result.success ? '✓ Passed' : '✗ Failed';
|
|
761
|
+
const stepsInfo = `${result.steps.filter(s => s.success).length}/${result.workflow.steps.length}`;
|
|
762
|
+
const duration = formatDuration(result.durationMs);
|
|
763
|
+
lines.push(`| ${escapeTableCell(result.workflow.name)} | ${status} | ${stepsInfo} | ${duration} |`);
|
|
764
|
+
}
|
|
765
|
+
lines.push('');
|
|
766
|
+
// Details for each workflow
|
|
767
|
+
for (const result of results) {
|
|
768
|
+
const statusIcon = result.success ? '✓' : '✗';
|
|
769
|
+
lines.push(`### ${statusIcon} ${result.workflow.name}`);
|
|
770
|
+
lines.push('');
|
|
771
|
+
lines.push(`**ID:** \`${result.workflow.id}\``);
|
|
772
|
+
if (result.workflow.description) {
|
|
773
|
+
lines.push(`**Description:** ${result.workflow.description}`);
|
|
774
|
+
}
|
|
775
|
+
lines.push(`**Expected Outcome:** ${result.workflow.expectedOutcome}`);
|
|
776
|
+
lines.push('');
|
|
777
|
+
// Step details table
|
|
778
|
+
lines.push('| Step | Tool | Status | Duration | Notes |');
|
|
779
|
+
lines.push('|------|------|--------|----------|-------|');
|
|
780
|
+
for (let i = 0; i < result.steps.length; i++) {
|
|
781
|
+
const stepResult = result.steps[i];
|
|
782
|
+
const step = result.workflow.steps[i];
|
|
783
|
+
const stepNum = i + 1;
|
|
784
|
+
const status = stepResult.success ? '✓ Pass' : '✗ Fail';
|
|
785
|
+
const duration = formatDuration(stepResult.durationMs);
|
|
786
|
+
let notes = '';
|
|
787
|
+
if (!stepResult.success) {
|
|
788
|
+
if (stepResult.error) {
|
|
789
|
+
notes = escapeTableCell(truncateString(stepResult.error, 40));
|
|
790
|
+
}
|
|
791
|
+
else if (stepResult.assertionResults?.some(a => !a.passed)) {
|
|
792
|
+
const failedAssertions = stepResult.assertionResults.filter(a => !a.passed);
|
|
793
|
+
notes = `${failedAssertions.length} assertion(s) failed`;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
else if (step.optional) {
|
|
797
|
+
notes = '(optional)';
|
|
798
|
+
}
|
|
799
|
+
lines.push(`| ${stepNum} | \`${escapeTableCell(step.tool)}\` | ${status} | ${duration} | ${notes} |`);
|
|
800
|
+
}
|
|
801
|
+
lines.push('');
|
|
802
|
+
// Show failure details if any
|
|
803
|
+
if (!result.success && result.failureReason) {
|
|
804
|
+
lines.push('**Failure:**');
|
|
805
|
+
lines.push(`> ${result.failureReason}`);
|
|
806
|
+
lines.push('');
|
|
807
|
+
}
|
|
808
|
+
// Show data flow if present
|
|
809
|
+
if (result.dataFlow && result.dataFlow.length > 0) {
|
|
810
|
+
lines.push('<details>');
|
|
811
|
+
lines.push('<summary>Data Flow</summary>');
|
|
812
|
+
lines.push('');
|
|
813
|
+
lines.push('```mermaid');
|
|
814
|
+
lines.push('graph LR');
|
|
815
|
+
for (const edge of result.dataFlow) {
|
|
816
|
+
const fromLabel = mermaidLabel(`Step ${edge.fromStep + 1}`);
|
|
817
|
+
const toLabel = mermaidLabel(`Step ${edge.toStep + 1}`);
|
|
818
|
+
const edgeLabel = mermaidLabel(edge.targetParam);
|
|
819
|
+
lines.push(` ${fromLabel} -->|${edgeLabel}| ${toLabel}`);
|
|
820
|
+
}
|
|
821
|
+
lines.push('```');
|
|
822
|
+
lines.push('');
|
|
823
|
+
lines.push('</details>');
|
|
824
|
+
lines.push('');
|
|
825
|
+
}
|
|
826
|
+
// Show state changes if present
|
|
827
|
+
if (result.stateTracking?.changes && result.stateTracking.changes.length > 0) {
|
|
828
|
+
lines.push('<details>');
|
|
829
|
+
lines.push('<summary>State Changes</summary>');
|
|
830
|
+
lines.push('');
|
|
831
|
+
lines.push('| Step | Type | Path |');
|
|
832
|
+
lines.push('|------|------|------|');
|
|
833
|
+
for (const change of result.stateTracking.changes) {
|
|
834
|
+
lines.push(`| ${change.causedByStep + 1} | ${change.type} | \`${escapeTableCell(change.path)}\` |`);
|
|
835
|
+
}
|
|
836
|
+
lines.push('');
|
|
837
|
+
lines.push('</details>');
|
|
838
|
+
lines.push('');
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
return lines;
|
|
842
|
+
}
|
|
843
|
+
/**
|
|
844
|
+
* Truncate a string to a maximum length with ellipsis.
|
|
845
|
+
*/
|
|
846
|
+
function truncateString(str, maxLength) {
|
|
847
|
+
if (str.length <= maxLength)
|
|
848
|
+
return str;
|
|
849
|
+
return str.slice(0, maxLength - 3) + '...';
|
|
850
|
+
}
|
|
851
|
+
/**
|
|
852
|
+
* Generate Semantic Types section for CONTRACT.md.
|
|
853
|
+
* Documents inferred semantic types for parameters across all tools.
|
|
854
|
+
*/
|
|
855
|
+
function generateSemanticTypesSection(inferences) {
|
|
856
|
+
const lines = [];
|
|
857
|
+
// Collect all inferences with high confidence
|
|
858
|
+
const allInferences = [];
|
|
859
|
+
for (const [toolName, toolInferences] of inferences) {
|
|
860
|
+
for (const inference of toolInferences) {
|
|
861
|
+
if (inference.confidence >= 0.5 && inference.inferredType !== 'unknown') {
|
|
862
|
+
allInferences.push({ ...inference, toolName });
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
if (allInferences.length === 0) {
|
|
867
|
+
return [];
|
|
868
|
+
}
|
|
869
|
+
lines.push('## Semantic Types');
|
|
870
|
+
lines.push('');
|
|
871
|
+
lines.push('Parameters with inferred semantic types for enhanced validation:');
|
|
872
|
+
lines.push('');
|
|
873
|
+
// Group by semantic type
|
|
874
|
+
const byType = new Map();
|
|
875
|
+
for (const inf of allInferences) {
|
|
876
|
+
const existing = byType.get(inf.inferredType) ?? [];
|
|
877
|
+
existing.push({
|
|
878
|
+
toolName: inf.toolName,
|
|
879
|
+
paramName: inf.paramName,
|
|
880
|
+
confidence: inf.confidence,
|
|
881
|
+
});
|
|
882
|
+
byType.set(inf.inferredType, existing);
|
|
883
|
+
}
|
|
884
|
+
// Sort by number of parameters (most common types first)
|
|
885
|
+
const sortedTypes = Array.from(byType.entries())
|
|
886
|
+
.sort((a, b) => b[1].length - a[1].length);
|
|
887
|
+
lines.push('| Type | Parameters | Expected Format |');
|
|
888
|
+
lines.push('|------|------------|-----------------|');
|
|
889
|
+
for (const [type, params] of sortedTypes) {
|
|
890
|
+
const displayName = SEMANTIC_VALIDATION.TYPE_DISPLAY_NAMES[type] ?? type;
|
|
891
|
+
const exampleValue = SEMANTIC_VALIDATION.EXAMPLE_VALUES[type] ?? '';
|
|
892
|
+
// Format parameters as tool.param
|
|
893
|
+
const paramList = params
|
|
894
|
+
.slice(0, 3)
|
|
895
|
+
.map(p => `\`${p.toolName}.${p.paramName}\``)
|
|
896
|
+
.join(', ');
|
|
897
|
+
const moreCount = params.length > 3 ? ` +${params.length - 3} more` : '';
|
|
898
|
+
lines.push(`| ${displayName} | ${paramList}${moreCount} | \`${exampleValue}\` |`);
|
|
899
|
+
}
|
|
900
|
+
lines.push('');
|
|
901
|
+
// Detailed list (collapsed)
|
|
902
|
+
if (allInferences.length > 5) {
|
|
903
|
+
lines.push('<details>');
|
|
904
|
+
lines.push('<summary>All Inferred Semantic Types</summary>');
|
|
905
|
+
lines.push('');
|
|
906
|
+
}
|
|
907
|
+
// Group by tool
|
|
908
|
+
const byTool = new Map();
|
|
909
|
+
for (const inf of allInferences) {
|
|
910
|
+
const existing = byTool.get(inf.toolName) ?? [];
|
|
911
|
+
existing.push(inf);
|
|
912
|
+
byTool.set(inf.toolName, existing);
|
|
913
|
+
}
|
|
914
|
+
for (const [toolName, toolInferences] of byTool) {
|
|
915
|
+
lines.push(`### ${toolName}`);
|
|
916
|
+
lines.push('');
|
|
917
|
+
lines.push('| Parameter | Type | Confidence |');
|
|
918
|
+
lines.push('|-----------|------|------------|');
|
|
919
|
+
for (const inf of toolInferences) {
|
|
920
|
+
const displayName = SEMANTIC_VALIDATION.TYPE_DISPLAY_NAMES[inf.inferredType] ?? inf.inferredType;
|
|
921
|
+
const confidenceDisplay = `${Math.round(inf.confidence * 100)}%`;
|
|
922
|
+
lines.push(`| \`${escapeTableCell(inf.paramName)}\` | ${displayName} | ${confidenceDisplay} |`);
|
|
923
|
+
}
|
|
924
|
+
lines.push('');
|
|
925
|
+
}
|
|
926
|
+
if (allInferences.length > 5) {
|
|
927
|
+
lines.push('</details>');
|
|
928
|
+
lines.push('');
|
|
929
|
+
}
|
|
930
|
+
return lines;
|
|
931
|
+
}
|
|
932
|
+
/**
|
|
933
|
+
* Generate Schema Stability section for CONTRACT.md.
|
|
934
|
+
* Documents response schema consistency and stability across tools.
|
|
935
|
+
*/
|
|
936
|
+
function generateSchemaStabilitySection(schemaEvolution) {
|
|
937
|
+
const lines = [];
|
|
938
|
+
// Collect tools with meaningful schema data
|
|
939
|
+
const toolsWithSchemas = [];
|
|
940
|
+
for (const [toolName, evolution] of schemaEvolution) {
|
|
941
|
+
if (evolution.sampleCount > 0) {
|
|
942
|
+
const grade = getSchemaStabilityGrade(evolution);
|
|
943
|
+
toolsWithSchemas.push({ name: toolName, evolution, grade });
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
if (toolsWithSchemas.length === 0) {
|
|
947
|
+
return [];
|
|
948
|
+
}
|
|
949
|
+
lines.push('## Schema Stability');
|
|
950
|
+
lines.push('');
|
|
951
|
+
lines.push('Response schema consistency metrics for tools with sufficient test samples:');
|
|
952
|
+
lines.push('');
|
|
953
|
+
// Summary stats
|
|
954
|
+
const stableCount = toolsWithSchemas.filter(t => t.evolution.isStable).length;
|
|
955
|
+
const unstableCount = toolsWithSchemas.length - stableCount;
|
|
956
|
+
const avgConfidence = toolsWithSchemas.reduce((sum, t) => sum + t.evolution.stabilityConfidence, 0) / toolsWithSchemas.length;
|
|
957
|
+
lines.push('| Metric | Value |');
|
|
958
|
+
lines.push('|--------|-------|');
|
|
959
|
+
lines.push(`| Tools Analyzed | ${toolsWithSchemas.length} |`);
|
|
960
|
+
lines.push(`| Stable Schemas | ${stableCount} |`);
|
|
961
|
+
lines.push(`| Unstable Schemas | ${unstableCount} |`);
|
|
962
|
+
lines.push(`| Avg Confidence | ${Math.round(avgConfidence * 100)}% |`);
|
|
963
|
+
lines.push('');
|
|
964
|
+
// Overall status
|
|
965
|
+
if (stableCount === toolsWithSchemas.length) {
|
|
966
|
+
lines.push('✅ All tested tools have consistent response schemas.');
|
|
967
|
+
lines.push('');
|
|
968
|
+
}
|
|
969
|
+
else if (unstableCount > 0) {
|
|
970
|
+
lines.push(`⚠️ ${unstableCount} tool(s) have inconsistent response schemas.`);
|
|
971
|
+
lines.push('');
|
|
972
|
+
}
|
|
973
|
+
// Per-tool table
|
|
974
|
+
lines.push('| Tool | Grade | Stability | Confidence | Samples | Issues |');
|
|
975
|
+
lines.push('|------|-------|-----------|------------|---------|--------|');
|
|
976
|
+
// Sort by grade (worst first, then by name)
|
|
977
|
+
const gradeOrder = { 'F': 0, 'D': 1, 'C': 2, 'B': 3, 'A': 4, 'N/A': 5 };
|
|
978
|
+
const sortedTools = [...toolsWithSchemas].sort((a, b) => {
|
|
979
|
+
const gradeCompare = gradeOrder[a.grade] - gradeOrder[b.grade];
|
|
980
|
+
if (gradeCompare !== 0)
|
|
981
|
+
return gradeCompare;
|
|
982
|
+
return a.name.localeCompare(b.name);
|
|
983
|
+
});
|
|
984
|
+
for (const { name, evolution, grade } of sortedTools) {
|
|
985
|
+
const gradeEmoji = getGradeEmoji(grade);
|
|
986
|
+
const stabilityStatus = evolution.isStable
|
|
987
|
+
? SCHEMA_EVOLUTION.STABILITY_LABELS.STABLE
|
|
988
|
+
: SCHEMA_EVOLUTION.STABILITY_LABELS.UNSTABLE;
|
|
989
|
+
const confidenceDisplay = `${Math.round(evolution.stabilityConfidence * 100)}%`;
|
|
990
|
+
const issues = evolution.inconsistentFields.length > 0
|
|
991
|
+
? evolution.inconsistentFields.slice(0, 2).join(', ') +
|
|
992
|
+
(evolution.inconsistentFields.length > 2 ? ` +${evolution.inconsistentFields.length - 2}` : '')
|
|
993
|
+
: '-';
|
|
994
|
+
lines.push(`| \`${escapeTableCell(name)}\` | ${gradeEmoji} ${grade} | ${stabilityStatus} | ${confidenceDisplay} | ${evolution.sampleCount} | ${escapeTableCell(issues)} |`);
|
|
995
|
+
}
|
|
996
|
+
lines.push('');
|
|
997
|
+
// Detailed breakdown for unstable tools
|
|
998
|
+
const unstableTools = sortedTools.filter(t => !t.evolution.isStable && t.evolution.inconsistentFields.length > 0);
|
|
999
|
+
if (unstableTools.length > 0) {
|
|
1000
|
+
lines.push('<details>');
|
|
1001
|
+
lines.push('<summary>Unstable Schema Details</summary>');
|
|
1002
|
+
lines.push('');
|
|
1003
|
+
for (const { name, evolution } of unstableTools) {
|
|
1004
|
+
lines.push(`### ${name}`);
|
|
1005
|
+
lines.push('');
|
|
1006
|
+
lines.push(`**Inconsistent Fields:** ${evolution.inconsistentFields.join(', ')}`);
|
|
1007
|
+
lines.push('');
|
|
1008
|
+
lines.push('These fields appear inconsistently across responses, indicating the tool may return');
|
|
1009
|
+
lines.push('different structures depending on input or state.');
|
|
1010
|
+
lines.push('');
|
|
1011
|
+
}
|
|
1012
|
+
lines.push('</details>');
|
|
1013
|
+
lines.push('');
|
|
1014
|
+
}
|
|
1015
|
+
// Grade legend
|
|
1016
|
+
lines.push('<details>');
|
|
1017
|
+
lines.push('<summary>Grade Legend</summary>');
|
|
1018
|
+
lines.push('');
|
|
1019
|
+
lines.push(`- **A**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.A * 100}%+ stability confidence`);
|
|
1020
|
+
lines.push(`- **B**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.B * 100}%+ stability confidence`);
|
|
1021
|
+
lines.push(`- **C**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.C * 100}%+ stability confidence`);
|
|
1022
|
+
lines.push(`- **D**: ${SCHEMA_EVOLUTION.GRADE_THRESHOLDS.D * 100}%+ stability confidence`);
|
|
1023
|
+
lines.push('- **F**: Below minimum threshold');
|
|
1024
|
+
lines.push(`- **N/A**: Insufficient samples (< ${SCHEMA_EVOLUTION.MIN_SAMPLES_FOR_STABILITY})`);
|
|
1025
|
+
lines.push('');
|
|
1026
|
+
lines.push('</details>');
|
|
1027
|
+
lines.push('');
|
|
1028
|
+
return lines;
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Get emoji for stability grade.
|
|
1032
|
+
*/
|
|
1033
|
+
function getGradeEmoji(grade) {
|
|
1034
|
+
switch (grade) {
|
|
1035
|
+
case 'A': return '🟢';
|
|
1036
|
+
case 'B': return '🟢';
|
|
1037
|
+
case 'C': return '🟡';
|
|
1038
|
+
case 'D': return '🟠';
|
|
1039
|
+
case 'F': return '🔴';
|
|
1040
|
+
case 'N/A': return '⚪';
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Generate Error Analysis section for CONTRACT.md.
|
|
1045
|
+
* Documents enhanced error analysis with root causes and remediations.
|
|
1046
|
+
*/
|
|
1047
|
+
function generateErrorAnalysisSection(summaries) {
|
|
1048
|
+
const lines = [];
|
|
1049
|
+
// Collect tools with errors
|
|
1050
|
+
const toolsWithErrors = [];
|
|
1051
|
+
for (const [toolName, summary] of summaries) {
|
|
1052
|
+
if (summary.totalErrors > 0) {
|
|
1053
|
+
toolsWithErrors.push({ name: toolName, summary });
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
if (toolsWithErrors.length === 0) {
|
|
1057
|
+
return [];
|
|
1058
|
+
}
|
|
1059
|
+
lines.push('## Error Analysis');
|
|
1060
|
+
lines.push('');
|
|
1061
|
+
lines.push('Enhanced error analysis with root causes and remediation suggestions:');
|
|
1062
|
+
lines.push('');
|
|
1063
|
+
// Summary stats
|
|
1064
|
+
const totalErrors = toolsWithErrors.reduce((sum, t) => sum + t.summary.totalErrors, 0);
|
|
1065
|
+
const allCategories = new Set();
|
|
1066
|
+
const transientCount = toolsWithErrors.reduce((sum, t) => sum + t.summary.transientErrors, 0);
|
|
1067
|
+
for (const { summary } of toolsWithErrors) {
|
|
1068
|
+
for (const cat of summary.categoryCounts.keys()) {
|
|
1069
|
+
allCategories.add(cat);
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
lines.push('| Metric | Value |');
|
|
1073
|
+
lines.push('|--------|-------|');
|
|
1074
|
+
lines.push(`| Tools with Errors | ${toolsWithErrors.length} |`);
|
|
1075
|
+
lines.push(`| Total Errors | ${totalErrors} |`);
|
|
1076
|
+
lines.push(`| Error Categories | ${allCategories.size} |`);
|
|
1077
|
+
lines.push(`| Transient Errors | ${transientCount} |`);
|
|
1078
|
+
lines.push('');
|
|
1079
|
+
// Overall error breakdown by category
|
|
1080
|
+
const globalCategoryCounts = new Map();
|
|
1081
|
+
for (const { summary } of toolsWithErrors) {
|
|
1082
|
+
for (const [cat, count] of summary.categoryCounts) {
|
|
1083
|
+
globalCategoryCounts.set(cat, (globalCategoryCounts.get(cat) ?? 0) + count);
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
if (globalCategoryCounts.size > 0) {
|
|
1087
|
+
lines.push('### Error Categories');
|
|
1088
|
+
lines.push('');
|
|
1089
|
+
lines.push('| Category | Count | Description |');
|
|
1090
|
+
lines.push('|----------|-------|-------------|');
|
|
1091
|
+
// Sort by count descending
|
|
1092
|
+
const sortedCategories = Array.from(globalCategoryCounts.entries())
|
|
1093
|
+
.sort((a, b) => b[1] - a[1]);
|
|
1094
|
+
for (const [category, count] of sortedCategories) {
|
|
1095
|
+
const label = ERROR_ANALYSIS.CATEGORY_LABELS[category] ?? category;
|
|
1096
|
+
const emoji = getCategoryEmoji(category);
|
|
1097
|
+
lines.push(`| ${emoji} ${label} | ${count} | ${escapeTableCell(formatCategoryDescription(category))} |`);
|
|
1098
|
+
}
|
|
1099
|
+
lines.push('');
|
|
1100
|
+
}
|
|
1101
|
+
// Per-tool breakdown
|
|
1102
|
+
lines.push('### By Tool');
|
|
1103
|
+
lines.push('');
|
|
1104
|
+
lines.push('| Tool | Total | Transient | Top Category | Remediation |');
|
|
1105
|
+
lines.push('|------|-------|-----------|--------------|-------------|');
|
|
1106
|
+
// Sort by error count descending
|
|
1107
|
+
const sortedTools = [...toolsWithErrors].sort((a, b) => b.summary.totalErrors - a.summary.totalErrors);
|
|
1108
|
+
for (const { name, summary } of sortedTools) {
|
|
1109
|
+
const topCategory = getTopCategory(summary.categoryCounts);
|
|
1110
|
+
const topCategoryLabel = topCategory
|
|
1111
|
+
? (ERROR_ANALYSIS.CATEGORY_LABELS[topCategory] ?? topCategory)
|
|
1112
|
+
: '-';
|
|
1113
|
+
const topRemediation = summary.topRemediations[0] ?? '-';
|
|
1114
|
+
const truncatedRemediation = topRemediation.length > 50
|
|
1115
|
+
? topRemediation.slice(0, 47) + '...'
|
|
1116
|
+
: topRemediation;
|
|
1117
|
+
lines.push(`| \`${escapeTableCell(name)}\` | ${summary.totalErrors} | ${summary.transientErrors} | ${topCategoryLabel} | ${escapeTableCell(truncatedRemediation)} |`);
|
|
1118
|
+
}
|
|
1119
|
+
lines.push('');
|
|
1120
|
+
// Detailed remediation suggestions (collapsed)
|
|
1121
|
+
const toolsWithRemediations = sortedTools.filter(t => t.summary.topRemediations.length > 0);
|
|
1122
|
+
if (toolsWithRemediations.length > 0) {
|
|
1123
|
+
lines.push('<details>');
|
|
1124
|
+
lines.push('<summary>Remediation Suggestions</summary>');
|
|
1125
|
+
lines.push('');
|
|
1126
|
+
for (const { name, summary } of toolsWithRemediations.slice(0, ERROR_ANALYSIS.MAX_REMEDIATIONS_DISPLAY)) {
|
|
1127
|
+
lines.push(`### ${name}`);
|
|
1128
|
+
lines.push('');
|
|
1129
|
+
if (summary.topRootCauses.length > 0) {
|
|
1130
|
+
lines.push('**Root Causes:**');
|
|
1131
|
+
for (const cause of summary.topRootCauses) {
|
|
1132
|
+
lines.push(`- ${cause}`);
|
|
1133
|
+
}
|
|
1134
|
+
lines.push('');
|
|
1135
|
+
}
|
|
1136
|
+
if (summary.topRemediations.length > 0) {
|
|
1137
|
+
lines.push('**Suggested Remediations:**');
|
|
1138
|
+
for (const remediation of summary.topRemediations) {
|
|
1139
|
+
lines.push(`- ${remediation}`);
|
|
1140
|
+
}
|
|
1141
|
+
lines.push('');
|
|
1142
|
+
}
|
|
1143
|
+
if (summary.relatedParameters.length > 0) {
|
|
1144
|
+
lines.push(`**Related Parameters:** ${summary.relatedParameters.join(', ')}`);
|
|
1145
|
+
lines.push('');
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
lines.push('</details>');
|
|
1149
|
+
lines.push('');
|
|
1150
|
+
}
|
|
1151
|
+
// Category legend
|
|
1152
|
+
lines.push('<details>');
|
|
1153
|
+
lines.push('<summary>Category Legend</summary>');
|
|
1154
|
+
lines.push('');
|
|
1155
|
+
lines.push('- **Validation Error (400)**: Client sent invalid input that failed validation');
|
|
1156
|
+
lines.push('- **Authentication Error (401)**: Missing or invalid authentication credentials');
|
|
1157
|
+
lines.push('- **Not Found (404)**: Requested resource does not exist');
|
|
1158
|
+
lines.push('- **Conflict (409)**: Request conflicts with current state');
|
|
1159
|
+
lines.push('- **Rate Limited (429)**: Too many requests, retry after delay');
|
|
1160
|
+
lines.push('- **Server Error (5xx)**: Internal server error, may be transient');
|
|
1161
|
+
lines.push('');
|
|
1162
|
+
lines.push('</details>');
|
|
1163
|
+
lines.push('');
|
|
1164
|
+
return lines;
|
|
1165
|
+
}
|
|
1166
|
+
/**
|
|
1167
|
+
* Get emoji for error category.
|
|
1168
|
+
*/
|
|
1169
|
+
function getCategoryEmoji(category) {
|
|
1170
|
+
switch (category) {
|
|
1171
|
+
case 'client_error_validation': return '⚠️';
|
|
1172
|
+
case 'client_error_auth': return '🔐';
|
|
1173
|
+
case 'client_error_not_found': return '🔍';
|
|
1174
|
+
case 'client_error_conflict': return '💥';
|
|
1175
|
+
case 'client_error_rate_limit': return '⏱️';
|
|
1176
|
+
case 'server_error': return '🔥';
|
|
1177
|
+
default: return '❓';
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
/**
|
|
1181
|
+
* Get human-readable description for error category.
|
|
1182
|
+
*/
|
|
1183
|
+
function formatCategoryDescription(category) {
|
|
1184
|
+
switch (category) {
|
|
1185
|
+
case 'client_error_validation':
|
|
1186
|
+
return 'Invalid input or missing required parameters';
|
|
1187
|
+
case 'client_error_auth':
|
|
1188
|
+
return 'Authentication or authorization failure';
|
|
1189
|
+
case 'client_error_not_found':
|
|
1190
|
+
return 'Resource not found or does not exist';
|
|
1191
|
+
case 'client_error_conflict':
|
|
1192
|
+
return 'Conflict with current resource state';
|
|
1193
|
+
case 'client_error_rate_limit':
|
|
1194
|
+
return 'Rate limit exceeded, retry after delay';
|
|
1195
|
+
case 'server_error':
|
|
1196
|
+
return 'Internal server error, may be transient';
|
|
1197
|
+
default:
|
|
1198
|
+
return 'Unknown error category';
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
/**
|
|
1202
|
+
* Get the top category from a category counts map.
|
|
1203
|
+
*/
|
|
1204
|
+
function getTopCategory(counts) {
|
|
1205
|
+
let topCategory;
|
|
1206
|
+
let topCount = 0;
|
|
1207
|
+
for (const [category, count] of counts) {
|
|
1208
|
+
if (count > topCount) {
|
|
1209
|
+
topCount = count;
|
|
1210
|
+
topCategory = category;
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
return topCategory;
|
|
1214
|
+
}
|
|
1215
|
+
/**
|
|
1216
|
+
* Generate documentation quality section for CONTRACT.md.
|
|
1217
|
+
*/
|
|
1218
|
+
function generateDocumentationQualitySection(score) {
|
|
1219
|
+
const lines = [];
|
|
1220
|
+
lines.push('## Documentation Quality');
|
|
1221
|
+
lines.push('');
|
|
1222
|
+
// Overall score with grade badge
|
|
1223
|
+
const indicator = getGradeIndicator(score.grade);
|
|
1224
|
+
lines.push(`**Overall Score:** ${indicator} ${score.overallScore}/100 (${score.grade})`);
|
|
1225
|
+
lines.push('');
|
|
1226
|
+
// Component breakdown table
|
|
1227
|
+
lines.push('### Score Components');
|
|
1228
|
+
lines.push('');
|
|
1229
|
+
lines.push('| Component | Score | Weight |');
|
|
1230
|
+
lines.push('|-----------|-------|--------|');
|
|
1231
|
+
const weights = DOCUMENTATION_SCORING.WEIGHTS;
|
|
1232
|
+
lines.push(`| Description Coverage | ${score.components.descriptionCoverage}% | ${(weights.descriptionCoverage * 100).toFixed(0)}% |`);
|
|
1233
|
+
lines.push(`| Description Quality | ${score.components.descriptionQuality}% | ${(weights.descriptionQuality * 100).toFixed(0)}% |`);
|
|
1234
|
+
lines.push(`| Parameter Documentation | ${score.components.parameterDocumentation}% | ${(weights.parameterDocumentation * 100).toFixed(0)}% |`);
|
|
1235
|
+
lines.push(`| Example Coverage | ${score.components.exampleCoverage}% | ${(weights.exampleCoverage * 100).toFixed(0)}% |`);
|
|
1236
|
+
lines.push('');
|
|
1237
|
+
// Issues by type (if any)
|
|
1238
|
+
if (score.issues.length > 0) {
|
|
1239
|
+
lines.push('### Issues');
|
|
1240
|
+
lines.push('');
|
|
1241
|
+
// Group issues by type
|
|
1242
|
+
const issuesByType = new Map();
|
|
1243
|
+
for (const issue of score.issues) {
|
|
1244
|
+
const existing = issuesByType.get(issue.type) ?? [];
|
|
1245
|
+
existing.push(issue);
|
|
1246
|
+
issuesByType.set(issue.type, existing);
|
|
1247
|
+
}
|
|
1248
|
+
// Create issues table
|
|
1249
|
+
lines.push('| Issue Type | Count | Severity |');
|
|
1250
|
+
lines.push('|------------|-------|----------|');
|
|
1251
|
+
for (const [type, issues] of issuesByType) {
|
|
1252
|
+
const severityLabel = issues[0].severity;
|
|
1253
|
+
const severityEmoji = severityLabel === 'error' ? '🔴' : severityLabel === 'warning' ? '🟡' : '🔵';
|
|
1254
|
+
const typeLabel = formatIssueTypeLabel(type);
|
|
1255
|
+
lines.push(`| ${typeLabel} | ${issues.length} | ${severityEmoji} ${severityLabel} |`);
|
|
1256
|
+
}
|
|
1257
|
+
lines.push('');
|
|
1258
|
+
// Show specific issues in collapsible section
|
|
1259
|
+
if (score.issues.length <= 10) {
|
|
1260
|
+
lines.push('<details>');
|
|
1261
|
+
lines.push('<summary>Issue Details</summary>');
|
|
1262
|
+
lines.push('');
|
|
1263
|
+
for (const issue of score.issues) {
|
|
1264
|
+
lines.push(`- **${issue.tool}**: ${issue.message}`);
|
|
1265
|
+
}
|
|
1266
|
+
lines.push('');
|
|
1267
|
+
lines.push('</details>');
|
|
1268
|
+
lines.push('');
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
// Suggestions (if any)
|
|
1272
|
+
if (score.suggestions.length > 0) {
|
|
1273
|
+
lines.push('### Improvement Suggestions');
|
|
1274
|
+
lines.push('');
|
|
1275
|
+
for (const suggestion of score.suggestions) {
|
|
1276
|
+
lines.push(`- ${suggestion}`);
|
|
1277
|
+
}
|
|
1278
|
+
lines.push('');
|
|
1279
|
+
}
|
|
1280
|
+
// Grade thresholds reference
|
|
1281
|
+
lines.push('<details>');
|
|
1282
|
+
lines.push('<summary>Grade Thresholds</summary>');
|
|
1283
|
+
lines.push('');
|
|
1284
|
+
const thresholds = DOCUMENTATION_SCORING.GRADE_THRESHOLDS;
|
|
1285
|
+
lines.push(`- **A**: ${thresholds.A}+`);
|
|
1286
|
+
lines.push(`- **B**: ${thresholds.B}-${thresholds.A - 1}`);
|
|
1287
|
+
lines.push(`- **C**: ${thresholds.C}-${thresholds.B - 1}`);
|
|
1288
|
+
lines.push(`- **D**: ${thresholds.D}-${thresholds.C - 1}`);
|
|
1289
|
+
lines.push(`- **F**: Below ${thresholds.D}`);
|
|
1290
|
+
lines.push('');
|
|
1291
|
+
lines.push('</details>');
|
|
1292
|
+
lines.push('');
|
|
1293
|
+
return lines;
|
|
1294
|
+
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Format issue type label for display.
|
|
1297
|
+
*/
|
|
1298
|
+
function formatIssueTypeLabel(type) {
|
|
1299
|
+
switch (type) {
|
|
1300
|
+
case 'missing_description':
|
|
1301
|
+
return 'Missing Description';
|
|
1302
|
+
case 'short_description':
|
|
1303
|
+
return 'Short Description';
|
|
1304
|
+
case 'missing_param_description':
|
|
1305
|
+
return 'Missing Parameter Description';
|
|
1306
|
+
case 'no_examples':
|
|
1307
|
+
return 'No Examples';
|
|
1308
|
+
default:
|
|
1309
|
+
return type.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
/**
|
|
1313
|
+
* Generate example usage for a tool from successful interactions.
|
|
1314
|
+
*
|
|
1315
|
+
* @param profile - Tool profile with interactions
|
|
1316
|
+
* @param maxExamples - Maximum number of examples to include
|
|
1317
|
+
* @param maxExampleLength - Maximum length for each example response (uses smart truncation)
|
|
1318
|
+
*/
|
|
1319
|
+
function generateToolExamples(profile, maxExamples, maxExampleLength = EXAMPLE_OUTPUT.DEFAULT_LENGTH) {
|
|
1320
|
+
const lines = [];
|
|
1321
|
+
if (!profile || profile.interactions.length === 0) {
|
|
1322
|
+
return [];
|
|
1323
|
+
}
|
|
1324
|
+
// Find successful interactions
|
|
1325
|
+
const successful = profile.interactions.filter(i => {
|
|
1326
|
+
if (i.error || i.response?.isError)
|
|
1327
|
+
return false;
|
|
1328
|
+
const textContent = i.response?.content?.find(c => c.type === 'text');
|
|
1329
|
+
if (textContent && 'text' in textContent) {
|
|
1330
|
+
if (looksLikeError(String(textContent.text)))
|
|
1331
|
+
return false;
|
|
1332
|
+
}
|
|
1333
|
+
return true;
|
|
1334
|
+
});
|
|
1335
|
+
if (successful.length === 0) {
|
|
1336
|
+
return [];
|
|
1337
|
+
}
|
|
1338
|
+
// Take up to maxExamples unique examples (by different args)
|
|
1339
|
+
const examples = [];
|
|
1340
|
+
const seenArgsHashes = new Set();
|
|
1341
|
+
for (const interaction of successful) {
|
|
1342
|
+
if (examples.length >= maxExamples)
|
|
1343
|
+
break;
|
|
1344
|
+
const argsHash = JSON.stringify(interaction.question.args);
|
|
1345
|
+
if (seenArgsHashes.has(argsHash))
|
|
1346
|
+
continue;
|
|
1347
|
+
seenArgsHashes.add(argsHash);
|
|
1348
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
1349
|
+
if (!textContent || !('text' in textContent))
|
|
1350
|
+
continue;
|
|
1351
|
+
const responseText = String(textContent.text);
|
|
1352
|
+
if (responseText.length === 0)
|
|
1353
|
+
continue;
|
|
1354
|
+
// Use smart truncation to preserve structure
|
|
1355
|
+
const truncated = smartTruncate(responseText, { maxLength: maxExampleLength });
|
|
1356
|
+
examples.push({
|
|
1357
|
+
args: interaction.question.args,
|
|
1358
|
+
response: truncated.content,
|
|
1359
|
+
wasTruncated: truncated.wasTruncated,
|
|
1360
|
+
});
|
|
1361
|
+
}
|
|
1362
|
+
if (examples.length === 0) {
|
|
1363
|
+
return [];
|
|
1364
|
+
}
|
|
1365
|
+
lines.push(`**Example${examples.length > 1 ? 's' : ''}:**`);
|
|
1366
|
+
lines.push('');
|
|
1367
|
+
for (let i = 0; i < examples.length; i++) {
|
|
1368
|
+
const example = examples[i];
|
|
1369
|
+
if (examples.length > 1) {
|
|
1370
|
+
lines.push(`*Example ${i + 1}:*`);
|
|
1371
|
+
}
|
|
1372
|
+
// Show input
|
|
1373
|
+
lines.push('Input:');
|
|
1374
|
+
const inputJson = validateJsonForCodeBlock(example.args);
|
|
1375
|
+
lines.push('```json');
|
|
1376
|
+
lines.push(inputJson.content);
|
|
1377
|
+
lines.push('```');
|
|
1378
|
+
// Show output (with truncation note if applicable)
|
|
1379
|
+
const outputLabel = example.wasTruncated ? 'Output (truncated):' : 'Output:';
|
|
1380
|
+
lines.push(outputLabel);
|
|
1381
|
+
const outputJson = validateJsonForCodeBlock(example.response);
|
|
1382
|
+
lines.push('```');
|
|
1383
|
+
lines.push(outputJson.content);
|
|
1384
|
+
lines.push('```');
|
|
1385
|
+
lines.push('');
|
|
1386
|
+
}
|
|
1387
|
+
return lines;
|
|
1388
|
+
}
|
|
1389
|
+
/**
|
|
1390
|
+
* Generate error patterns section for a tool.
|
|
1391
|
+
*/
|
|
1392
|
+
function generateToolErrorPatterns(profile) {
|
|
1393
|
+
const lines = [];
|
|
1394
|
+
if (!profile || profile.interactions.length === 0) {
|
|
1395
|
+
return [];
|
|
1396
|
+
}
|
|
1397
|
+
// Categorize errors
|
|
1398
|
+
const errorCategories = new Map();
|
|
1399
|
+
for (const interaction of profile.interactions) {
|
|
1400
|
+
if (interaction.mocked) {
|
|
1401
|
+
continue;
|
|
1402
|
+
}
|
|
1403
|
+
const errorText = interaction.error || '';
|
|
1404
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
1405
|
+
const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
|
|
1406
|
+
const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
|
|
1407
|
+
if (!isError)
|
|
1408
|
+
continue;
|
|
1409
|
+
const errorContent = errorText || responseText;
|
|
1410
|
+
if (!errorContent)
|
|
1411
|
+
continue;
|
|
1412
|
+
const category = categorizeError(errorContent);
|
|
1413
|
+
const existing = errorCategories.get(category) || [];
|
|
1414
|
+
if (existing.length < 2) { // Max 2 examples per category
|
|
1415
|
+
const truncated = errorContent.length > 100 ? errorContent.slice(0, 97) + '...' : errorContent;
|
|
1416
|
+
existing.push(truncated);
|
|
1417
|
+
}
|
|
1418
|
+
errorCategories.set(category, existing);
|
|
1419
|
+
}
|
|
1420
|
+
if (errorCategories.size === 0) {
|
|
1421
|
+
return [];
|
|
1422
|
+
}
|
|
1423
|
+
lines.push('**Error Patterns:**');
|
|
1424
|
+
lines.push('');
|
|
1425
|
+
for (const [category, examples] of errorCategories) {
|
|
1426
|
+
lines.push(`- **${category}**: ${examples[0]}`);
|
|
1427
|
+
}
|
|
1428
|
+
lines.push('');
|
|
1429
|
+
return lines;
|
|
1430
|
+
}
|
|
1431
|
+
/**
|
|
1432
|
+
* Categorize an error message.
|
|
1433
|
+
*/
|
|
1434
|
+
function categorizeError(errorText) {
|
|
1435
|
+
const lower = errorText.toLowerCase();
|
|
1436
|
+
if (/permission|denied|not allowed|forbidden|unauthorized/i.test(lower)) {
|
|
1437
|
+
return 'Permission';
|
|
1438
|
+
}
|
|
1439
|
+
if (/not found|does not exist|no such|cannot find|missing/i.test(lower)) {
|
|
1440
|
+
return 'NotFound';
|
|
1441
|
+
}
|
|
1442
|
+
if (/invalid|validation|required|must be|expected|type error/i.test(lower)) {
|
|
1443
|
+
return 'Validation';
|
|
1444
|
+
}
|
|
1445
|
+
if (/timeout|timed out|deadline/i.test(lower)) {
|
|
1446
|
+
return 'Timeout';
|
|
1447
|
+
}
|
|
1448
|
+
if (/connect|network|econnrefused|socket/i.test(lower)) {
|
|
1449
|
+
return 'Network';
|
|
1450
|
+
}
|
|
1451
|
+
return 'Other';
|
|
1452
|
+
}
|
|
1453
|
+
/**
|
|
1454
|
+
* Generate error summary section aggregating errors across all tools.
|
|
1455
|
+
*/
|
|
1456
|
+
function generateErrorSummarySection(profiles) {
|
|
1457
|
+
const lines = [];
|
|
1458
|
+
// Count errors by category across all tools
|
|
1459
|
+
const categoryCounts = new Map();
|
|
1460
|
+
for (const profile of profiles) {
|
|
1461
|
+
for (const interaction of profile.interactions) {
|
|
1462
|
+
if (interaction.mocked) {
|
|
1463
|
+
continue;
|
|
1464
|
+
}
|
|
1465
|
+
const errorText = interaction.error || '';
|
|
1466
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
1467
|
+
const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
|
|
1468
|
+
const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
|
|
1469
|
+
if (!isError)
|
|
1470
|
+
continue;
|
|
1471
|
+
const errorContent = errorText || responseText;
|
|
1472
|
+
if (!errorContent)
|
|
1473
|
+
continue;
|
|
1474
|
+
const category = categorizeError(errorContent);
|
|
1475
|
+
const existing = categoryCounts.get(category) || { count: 0, tools: new Set(), example: '' };
|
|
1476
|
+
existing.count++;
|
|
1477
|
+
existing.tools.add(profile.name);
|
|
1478
|
+
if (!existing.example) {
|
|
1479
|
+
existing.example = errorContent.length > 80 ? errorContent.slice(0, 77) + '...' : errorContent;
|
|
1480
|
+
}
|
|
1481
|
+
categoryCounts.set(category, existing);
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
if (categoryCounts.size === 0) {
|
|
1485
|
+
return [];
|
|
1486
|
+
}
|
|
1487
|
+
lines.push('## Error Patterns Summary');
|
|
1488
|
+
lines.push('');
|
|
1489
|
+
lines.push('Errors observed during schema validation:');
|
|
1490
|
+
lines.push('');
|
|
1491
|
+
lines.push('| Category | Count | Affected Tools |');
|
|
1492
|
+
lines.push('|----------|-------|----------------|');
|
|
1493
|
+
for (const [category, data] of categoryCounts) {
|
|
1494
|
+
const toolList = Array.from(data.tools).slice(0, 3).map(t => `\`${t}\``).join(', ');
|
|
1495
|
+
const more = data.tools.size > 3 ? ` +${data.tools.size - 3} more` : '';
|
|
1496
|
+
lines.push(`| ${category} | ${data.count} | ${toolList}${more} |`);
|
|
1497
|
+
}
|
|
1498
|
+
lines.push('');
|
|
1499
|
+
return lines;
|
|
1500
|
+
}
|
|
1501
|
+
/**
|
|
1502
|
+
* Analyze tool profiles for external dependency errors.
|
|
1503
|
+
*
|
|
1504
|
+
* Extracts error patterns from tool interactions and analyzes them
|
|
1505
|
+
* to detect errors from known external services (Plaid, Stripe, AWS, etc.)
|
|
1506
|
+
*
|
|
1507
|
+
* @param profiles - Tool profiles with interaction data
|
|
1508
|
+
* @param tools - MCPTool definitions for description context
|
|
1509
|
+
* @returns External dependency summary or null if no significant external deps
|
|
1510
|
+
*/
|
|
1511
|
+
function analyzeToolsForExternalDependencies(profiles, tools) {
|
|
1512
|
+
const errorInputs = [];
|
|
1513
|
+
for (const profile of profiles) {
|
|
1514
|
+
const patterns = [];
|
|
1515
|
+
const patternCounts = new Map();
|
|
1516
|
+
for (const interaction of profile.interactions) {
|
|
1517
|
+
if (interaction.mocked) {
|
|
1518
|
+
continue;
|
|
1519
|
+
}
|
|
1520
|
+
const errorText = interaction.error || '';
|
|
1521
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
1522
|
+
const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
|
|
1523
|
+
const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
|
|
1524
|
+
if (!isError)
|
|
1525
|
+
continue;
|
|
1526
|
+
const errorContent = errorText || responseText;
|
|
1527
|
+
if (!errorContent)
|
|
1528
|
+
continue;
|
|
1529
|
+
// Simple categorization for pattern hashing
|
|
1530
|
+
const category = categorizeError(errorContent);
|
|
1531
|
+
const key = `${category}:${errorContent.slice(0, 50)}`;
|
|
1532
|
+
const existing = patternCounts.get(key);
|
|
1533
|
+
if (existing) {
|
|
1534
|
+
existing.count++;
|
|
1535
|
+
}
|
|
1536
|
+
else {
|
|
1537
|
+
patternCounts.set(key, { count: 1, example: errorContent });
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
// Convert to ErrorPattern format
|
|
1541
|
+
for (const [key, data] of patternCounts) {
|
|
1542
|
+
const [category] = key.split(':');
|
|
1543
|
+
patterns.push({
|
|
1544
|
+
category: mapCategoryToErrorCategory(category),
|
|
1545
|
+
patternHash: key,
|
|
1546
|
+
example: data.example,
|
|
1547
|
+
count: data.count,
|
|
1548
|
+
});
|
|
1549
|
+
}
|
|
1550
|
+
if (patterns.length > 0) {
|
|
1551
|
+
const tool = tools.find(t => t.name === profile.name);
|
|
1552
|
+
errorInputs.push({
|
|
1553
|
+
toolName: profile.name,
|
|
1554
|
+
toolDescription: tool?.description,
|
|
1555
|
+
patterns,
|
|
1556
|
+
});
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
if (errorInputs.length === 0) {
|
|
1560
|
+
return null;
|
|
1561
|
+
}
|
|
1562
|
+
return analyzeExternalDependencies(errorInputs);
|
|
1563
|
+
}
|
|
1564
|
+
/**
|
|
1565
|
+
* Map simple error category to ErrorPattern category type.
|
|
1566
|
+
*/
|
|
1567
|
+
function mapCategoryToErrorCategory(category) {
|
|
1568
|
+
switch (category.toLowerCase()) {
|
|
1569
|
+
case 'permission':
|
|
1570
|
+
return 'permission';
|
|
1571
|
+
case 'notfound':
|
|
1572
|
+
return 'not_found';
|
|
1573
|
+
case 'validation':
|
|
1574
|
+
return 'validation';
|
|
1575
|
+
case 'timeout':
|
|
1576
|
+
return 'timeout';
|
|
1577
|
+
case 'network':
|
|
1578
|
+
case 'other':
|
|
1579
|
+
default:
|
|
1580
|
+
return 'unknown';
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
function generateStatefulTestingSection(toolProfiles, summary) {
|
|
1584
|
+
if (!summary?.enabled)
|
|
1585
|
+
return [];
|
|
1586
|
+
const lines = [];
|
|
1587
|
+
const withDeps = toolProfiles
|
|
1588
|
+
.filter((p) => p.dependencyInfo)
|
|
1589
|
+
.sort((a, b) => (a.dependencyInfo?.sequencePosition ?? 0) - (b.dependencyInfo?.sequencePosition ?? 0));
|
|
1590
|
+
if (withDeps.length === 0) {
|
|
1591
|
+
return [];
|
|
1592
|
+
}
|
|
1593
|
+
lines.push('## Stateful Testing');
|
|
1594
|
+
lines.push('');
|
|
1595
|
+
lines.push(`Stateful testing executed across ${summary.toolCount} tool(s) with ${summary.dependencyCount} dependency edge(s).`);
|
|
1596
|
+
lines.push('');
|
|
1597
|
+
lines.push('| Tool | Sequence | Depends On |');
|
|
1598
|
+
lines.push('|------|----------|------------|');
|
|
1599
|
+
for (const profile of withDeps) {
|
|
1600
|
+
const deps = profile.dependencyInfo?.dependsOn?.length
|
|
1601
|
+
? profile.dependencyInfo.dependsOn.join(', ')
|
|
1602
|
+
: 'None';
|
|
1603
|
+
lines.push(`| \`${escapeTableCell(profile.name)}\` | ${profile.dependencyInfo?.sequencePosition ?? 0} | ${escapeTableCell(deps)} |`);
|
|
1604
|
+
}
|
|
1605
|
+
lines.push('');
|
|
1606
|
+
const edges = withDeps.flatMap((profile) => (profile.dependencyInfo?.dependsOn ?? []).map((dep) => ({ from: dep, to: profile.name })));
|
|
1607
|
+
if (edges.length > 0 && edges.length <= 50) {
|
|
1608
|
+
lines.push('```mermaid');
|
|
1609
|
+
lines.push('graph TD');
|
|
1610
|
+
for (const edge of edges) {
|
|
1611
|
+
lines.push(` ${mermaidLabel(edge.from)} --> ${mermaidLabel(edge.to)}`);
|
|
1612
|
+
}
|
|
1613
|
+
lines.push('```');
|
|
1614
|
+
lines.push('');
|
|
1615
|
+
}
|
|
1616
|
+
return lines;
|
|
1617
|
+
}
|
|
1618
|
+
function generateExternalServiceConfigSection(summary) {
|
|
1619
|
+
if (!summary || summary.unconfiguredServices.length === 0)
|
|
1620
|
+
return [];
|
|
1621
|
+
const lines = [];
|
|
1622
|
+
lines.push('## External Service Setup');
|
|
1623
|
+
lines.push('');
|
|
1624
|
+
lines.push(`Mode: \`${summary.mode}\``);
|
|
1625
|
+
lines.push('');
|
|
1626
|
+
for (const serviceName of summary.unconfiguredServices) {
|
|
1627
|
+
const service = EXTERNAL_DEPENDENCIES.SERVICES[serviceName];
|
|
1628
|
+
if (!service)
|
|
1629
|
+
continue;
|
|
1630
|
+
lines.push(`- **${service.name}**: ${service.remediation}`);
|
|
1631
|
+
}
|
|
1632
|
+
lines.push('');
|
|
1633
|
+
return lines;
|
|
1634
|
+
}
|
|
1635
|
+
function generateResponseAssertionsSection(toolProfiles) {
|
|
1636
|
+
const profiles = toolProfiles.filter((p) => p.assertionSummary);
|
|
1637
|
+
if (profiles.length === 0)
|
|
1638
|
+
return [];
|
|
1639
|
+
const lines = [];
|
|
1640
|
+
lines.push('## Response Assertions');
|
|
1641
|
+
lines.push('');
|
|
1642
|
+
lines.push('| Tool | Passed | Failed |');
|
|
1643
|
+
lines.push('|------|--------|--------|');
|
|
1644
|
+
for (const profile of profiles) {
|
|
1645
|
+
const summary = profile.assertionSummary;
|
|
1646
|
+
lines.push(`| \`${escapeTableCell(profile.name)}\` | ${summary.passed} | ${summary.failed} |`);
|
|
1647
|
+
}
|
|
1648
|
+
lines.push('');
|
|
1649
|
+
const failingTools = profiles.filter((p) => (p.assertionSummary?.failed ?? 0) > 0);
|
|
1650
|
+
if (failingTools.length > 0) {
|
|
1651
|
+
lines.push('### Assertion Failures');
|
|
1652
|
+
lines.push('');
|
|
1653
|
+
for (const profile of failingTools) {
|
|
1654
|
+
const failures = collectAssertionFailures(profile);
|
|
1655
|
+
lines.push(`- \`${profile.name}\`: ${failures.slice(0, 3).join('; ')}${failures.length > 3 ? ' ...' : ''}`);
|
|
1656
|
+
}
|
|
1657
|
+
lines.push('');
|
|
1658
|
+
}
|
|
1659
|
+
return lines;
|
|
1660
|
+
}
|
|
1661
|
+
function collectAssertionFailures(profile) {
|
|
1662
|
+
const failures = new Set();
|
|
1663
|
+
for (const interaction of profile.interactions) {
|
|
1664
|
+
if (interaction.mocked) {
|
|
1665
|
+
continue;
|
|
1666
|
+
}
|
|
1667
|
+
for (const result of interaction.assertionResults ?? []) {
|
|
1668
|
+
if (result.passed)
|
|
1669
|
+
continue;
|
|
1670
|
+
const message = result.message ? `${result.type}: ${result.message}` : `${result.type} failed`;
|
|
1671
|
+
failures.add(message);
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
return Array.from(failures);
|
|
1675
|
+
}
|
|
1676
|
+
/**
|
|
1677
|
+
* Generate AGENTS.md documentation from explore results.
|
|
1678
|
+
* Full LLM-powered behavioral documentation with persona findings.
|
|
1679
|
+
* Used by: bellwether explore
|
|
1680
|
+
*/
|
|
1681
|
+
//# sourceMappingURL=contract.js.map
|