@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,995 @@
|
|
|
1
|
+
import { formatDateISO, formatDuration, escapeTableCell, mermaidLabel, validateJsonForCodeBlock } from '../utils/index.js';
|
|
2
|
+
import { DISPLAY_LIMITS, MATH_FACTORS } from '../constants.js';
|
|
3
|
+
import { calculatePerformanceMetrics, extractParameters, looksLikeError } from './shared.js';
|
|
4
|
+
/**
|
|
5
|
+
* Detect configuration issues based on error patterns.
|
|
6
|
+
* Returns a warning message if issues detected, null otherwise.
|
|
7
|
+
*/
|
|
8
|
+
function detectConfigurationIssues(profiles, metadata) {
|
|
9
|
+
if (metadata.toolCallCount === 0)
|
|
10
|
+
return null;
|
|
11
|
+
const errorRate = metadata.errorCount / metadata.toolCallCount;
|
|
12
|
+
if (errorRate < 0.7)
|
|
13
|
+
return null; // Less than 70% errors is probably fine
|
|
14
|
+
// Look for access-related errors in interactions
|
|
15
|
+
let accessErrors = 0;
|
|
16
|
+
let totalErrors = 0;
|
|
17
|
+
for (const profile of profiles) {
|
|
18
|
+
for (const interaction of profile.interactions) {
|
|
19
|
+
// Get response text content
|
|
20
|
+
const textContent = interaction.response?.content?.find(c => c.type === 'text');
|
|
21
|
+
const responseText = textContent && 'text' in textContent ? String(textContent.text) : '';
|
|
22
|
+
const errorText = interaction.error || '';
|
|
23
|
+
const combined = errorText + responseText;
|
|
24
|
+
// Count as error if flagged as error OR if content looks like an error
|
|
25
|
+
const isError = interaction.error || interaction.response?.isError || looksLikeError(responseText);
|
|
26
|
+
if (isError) {
|
|
27
|
+
totalErrors++;
|
|
28
|
+
if (/access denied|outside.*(allowed|predefined)|not allowed|permission denied|cannot.*(access|read|write|create|list).*outside|restricted to.*(predefined|allowed)/i.test(combined)) {
|
|
29
|
+
accessErrors++;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// If we found access-related errors, show warning
|
|
35
|
+
if (totalErrors > 0 && accessErrors / totalErrors > 0.5) {
|
|
36
|
+
return 'Most tool calls failed with access-related errors. The server may not have been configured with allowed directories. ' +
|
|
37
|
+
'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
|
|
38
|
+
}
|
|
39
|
+
// Also check synthesized limitations for access patterns (fallback)
|
|
40
|
+
let accessRelatedLimitations = 0;
|
|
41
|
+
let totalLimitations = 0;
|
|
42
|
+
for (const profile of profiles) {
|
|
43
|
+
for (const limitation of profile.limitations) {
|
|
44
|
+
totalLimitations++;
|
|
45
|
+
if (/outside.*(allowed|predefined)|restricted|access.*(denied|control)|cannot.*(access|read|write)/i.test(limitation)) {
|
|
46
|
+
accessRelatedLimitations++;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// If most tools have access-related limitations and high error rate, show warning
|
|
51
|
+
if (totalLimitations > 0 && accessRelatedLimitations / totalLimitations > 0.5 && errorRate > 0.8) {
|
|
52
|
+
return 'Most tool calls failed, likely due to missing allowed directories configuration. ' +
|
|
53
|
+
'For filesystem servers, try: `bellwether check npx @modelcontextprotocol/server-filesystem /path/to/allowed/dir`';
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
export function generateAgentsMd(result) {
|
|
58
|
+
const lines = [];
|
|
59
|
+
const { discovery, toolProfiles, summary, limitations, recommendations, metadata } = result;
|
|
60
|
+
// Header
|
|
61
|
+
lines.push(`# ${discovery.serverInfo.name}`);
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push(`> Generated by [Bellwether](https://github.com/dotsetlabs/bellwether) on ${formatDateISO(metadata.startTime)}`);
|
|
64
|
+
lines.push('');
|
|
65
|
+
// Check for configuration issues (high error rate with access-related errors)
|
|
66
|
+
const configWarning = detectConfigurationIssues(toolProfiles, metadata);
|
|
67
|
+
if (configWarning) {
|
|
68
|
+
lines.push('> ⚠️ **Configuration Issue Detected**');
|
|
69
|
+
lines.push('>');
|
|
70
|
+
lines.push(`> ${configWarning}`);
|
|
71
|
+
lines.push('');
|
|
72
|
+
}
|
|
73
|
+
// Overview
|
|
74
|
+
lines.push('## Overview');
|
|
75
|
+
lines.push('');
|
|
76
|
+
lines.push(summary);
|
|
77
|
+
lines.push('');
|
|
78
|
+
lines.push(`**Server Version:** ${discovery.serverInfo.version}`);
|
|
79
|
+
lines.push(`**Protocol Version:** ${discovery.protocolVersion}`);
|
|
80
|
+
// Show personas used
|
|
81
|
+
if (metadata.personas && metadata.personas.length > 0) {
|
|
82
|
+
const personaNames = metadata.personas.map(p => p.name).join(', ');
|
|
83
|
+
lines.push(`**Interview Personas:** ${personaNames}`);
|
|
84
|
+
}
|
|
85
|
+
lines.push('');
|
|
86
|
+
// Capabilities summary
|
|
87
|
+
lines.push('## Capabilities');
|
|
88
|
+
lines.push('');
|
|
89
|
+
if (discovery.capabilities.tools) {
|
|
90
|
+
lines.push(`- **Tools:** ${discovery.tools.length} available`);
|
|
91
|
+
}
|
|
92
|
+
if (discovery.capabilities.prompts) {
|
|
93
|
+
lines.push(`- **Prompts:** ${discovery.prompts.length} available`);
|
|
94
|
+
}
|
|
95
|
+
if (discovery.capabilities.resources) {
|
|
96
|
+
lines.push(`- **Resources:** ${(discovery.resources ?? []).length} available`);
|
|
97
|
+
}
|
|
98
|
+
if (discovery.capabilities.logging) {
|
|
99
|
+
lines.push('- **Logging:** Supported');
|
|
100
|
+
}
|
|
101
|
+
lines.push('');
|
|
102
|
+
// Extract common constraints across all tools
|
|
103
|
+
const { common: commonConstraints, byTool: toolSpecificConstraints } = extractCommonConstraints(toolProfiles);
|
|
104
|
+
// Quick Reference section
|
|
105
|
+
if (toolProfiles.length > 0) {
|
|
106
|
+
lines.push(...generateQuickReference(discovery.tools, toolProfiles));
|
|
107
|
+
}
|
|
108
|
+
// Global Constraints section (common restrictions)
|
|
109
|
+
if (commonConstraints.length > 0) {
|
|
110
|
+
lines.push(...generateGlobalConstraintsSection(commonConstraints));
|
|
111
|
+
}
|
|
112
|
+
// Security Considerations section (aggregated)
|
|
113
|
+
const securitySection = generateSecuritySection(toolProfiles);
|
|
114
|
+
if (securitySection.length > 0) {
|
|
115
|
+
lines.push(...securitySection);
|
|
116
|
+
}
|
|
117
|
+
// Performance section
|
|
118
|
+
const performanceSection = generatePerformanceSection(toolProfiles);
|
|
119
|
+
if (performanceSection.length > 0) {
|
|
120
|
+
lines.push(...performanceSection);
|
|
121
|
+
}
|
|
122
|
+
// Tools section
|
|
123
|
+
if (toolProfiles.length > 0) {
|
|
124
|
+
lines.push('## Tools');
|
|
125
|
+
lines.push('');
|
|
126
|
+
for (const profile of toolProfiles) {
|
|
127
|
+
lines.push(`### ${profile.name}`);
|
|
128
|
+
lines.push('');
|
|
129
|
+
lines.push(profile.description);
|
|
130
|
+
lines.push('');
|
|
131
|
+
// Find the original tool to get schema
|
|
132
|
+
const tool = discovery.tools.find(t => t.name === profile.name);
|
|
133
|
+
if (tool?.inputSchema) {
|
|
134
|
+
lines.push('**Input Schema:**');
|
|
135
|
+
// Validate JSON and escape for code block
|
|
136
|
+
const schemaJson = validateJsonForCodeBlock(tool.inputSchema);
|
|
137
|
+
lines.push('```json');
|
|
138
|
+
lines.push(schemaJson.content);
|
|
139
|
+
lines.push('```');
|
|
140
|
+
lines.push('');
|
|
141
|
+
}
|
|
142
|
+
// Add sample response if we have successful calls
|
|
143
|
+
const sampleResponse = generateSampleResponse(profile);
|
|
144
|
+
if (sampleResponse.length > 0) {
|
|
145
|
+
lines.push(...sampleResponse);
|
|
146
|
+
}
|
|
147
|
+
if (profile.behavioralNotes.length > 0) {
|
|
148
|
+
lines.push('**Observed Behavior:**');
|
|
149
|
+
for (const note of profile.behavioralNotes) {
|
|
150
|
+
lines.push(`- ${note}`);
|
|
151
|
+
}
|
|
152
|
+
lines.push('');
|
|
153
|
+
}
|
|
154
|
+
// Only show tool-specific limitations (common ones are in Global Constraints)
|
|
155
|
+
const specificLimitations = toolSpecificConstraints.get(profile.name) ?? [];
|
|
156
|
+
if (specificLimitations.length > 0) {
|
|
157
|
+
lines.push('**Limitations:**');
|
|
158
|
+
for (const limitation of specificLimitations) {
|
|
159
|
+
lines.push(`- ${limitation}`);
|
|
160
|
+
}
|
|
161
|
+
lines.push('');
|
|
162
|
+
}
|
|
163
|
+
if (profile.securityNotes.length > 0) {
|
|
164
|
+
lines.push('**Security Considerations:**');
|
|
165
|
+
for (const note of profile.securityNotes) {
|
|
166
|
+
lines.push(`- ${note}`);
|
|
167
|
+
}
|
|
168
|
+
lines.push('');
|
|
169
|
+
}
|
|
170
|
+
// Findings by persona (if multiple personas used)
|
|
171
|
+
if (profile.findingsByPersona && profile.findingsByPersona.length > 1) {
|
|
172
|
+
lines.push('<details>');
|
|
173
|
+
lines.push('<summary><strong>Findings by Persona</strong></summary>');
|
|
174
|
+
lines.push('');
|
|
175
|
+
for (const personaFindings of profile.findingsByPersona) {
|
|
176
|
+
lines.push(`#### ${personaFindings.personaName}`);
|
|
177
|
+
lines.push('');
|
|
178
|
+
if (personaFindings.behavioralNotes.length > 0) {
|
|
179
|
+
for (const note of personaFindings.behavioralNotes) {
|
|
180
|
+
lines.push(`- ${note}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
if (personaFindings.limitations.length > 0) {
|
|
184
|
+
lines.push('');
|
|
185
|
+
lines.push('*Limitations:*');
|
|
186
|
+
for (const limitation of personaFindings.limitations) {
|
|
187
|
+
lines.push(`- ${limitation}`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (personaFindings.securityNotes.length > 0) {
|
|
191
|
+
lines.push('');
|
|
192
|
+
lines.push('*Security:*');
|
|
193
|
+
for (const note of personaFindings.securityNotes) {
|
|
194
|
+
lines.push(`- ${note}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
lines.push('');
|
|
198
|
+
}
|
|
199
|
+
lines.push('</details>');
|
|
200
|
+
lines.push('');
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Common Workflows section (summarized view of successful workflows)
|
|
205
|
+
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
206
|
+
const successfulWorkflows = result.workflowResults.filter(wr => wr.success);
|
|
207
|
+
if (successfulWorkflows.length > 0) {
|
|
208
|
+
lines.push('## Common Workflows');
|
|
209
|
+
lines.push('');
|
|
210
|
+
lines.push('These workflows demonstrate recommended patterns for using tools together:');
|
|
211
|
+
lines.push('');
|
|
212
|
+
for (const wr of successfulWorkflows) {
|
|
213
|
+
const toolSequence = wr.workflow.steps.map(s => `\`${s.tool}\``).join(' → ');
|
|
214
|
+
lines.push(`### ${wr.workflow.name}`);
|
|
215
|
+
lines.push('');
|
|
216
|
+
lines.push(wr.workflow.description);
|
|
217
|
+
lines.push('');
|
|
218
|
+
lines.push(`**Tool Sequence:** ${toolSequence}`);
|
|
219
|
+
lines.push('');
|
|
220
|
+
// Generate mermaid workflow diagram
|
|
221
|
+
lines.push('```mermaid');
|
|
222
|
+
lines.push('flowchart LR');
|
|
223
|
+
for (let i = 0; i < wr.workflow.steps.length; i++) {
|
|
224
|
+
const step = wr.workflow.steps[i];
|
|
225
|
+
const nodeId = `S${i}`;
|
|
226
|
+
const nextNodeId = `S${i + 1}`;
|
|
227
|
+
// Use mermaidLabel to safely escape tool names
|
|
228
|
+
if (i === 0) {
|
|
229
|
+
lines.push(` ${nodeId}[${mermaidLabel(step.tool)}]`);
|
|
230
|
+
}
|
|
231
|
+
if (i < wr.workflow.steps.length - 1) {
|
|
232
|
+
const nextStep = wr.workflow.steps[i + 1];
|
|
233
|
+
lines.push(` ${nodeId} --> ${nextNodeId}[${mermaidLabel(nextStep.tool)}]`);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
lines.push('```');
|
|
237
|
+
lines.push('');
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Detailed Workflows section (with full test results)
|
|
242
|
+
if (result.workflowResults && result.workflowResults.length > 0) {
|
|
243
|
+
lines.push('## Workflow Test Results');
|
|
244
|
+
lines.push('');
|
|
245
|
+
lines.push('The following workflows were tested to verify tool chaining behavior:');
|
|
246
|
+
lines.push('');
|
|
247
|
+
for (const workflowResult of result.workflowResults) {
|
|
248
|
+
const statusIcon = workflowResult.success ? '✅' : '❌';
|
|
249
|
+
lines.push(`### ${statusIcon} ${workflowResult.workflow.name}`);
|
|
250
|
+
lines.push('');
|
|
251
|
+
lines.push(workflowResult.workflow.description);
|
|
252
|
+
lines.push('');
|
|
253
|
+
if (workflowResult.summary) {
|
|
254
|
+
lines.push(`> ${workflowResult.summary}`);
|
|
255
|
+
lines.push('');
|
|
256
|
+
}
|
|
257
|
+
// Show steps
|
|
258
|
+
lines.push('**Steps:**');
|
|
259
|
+
lines.push('');
|
|
260
|
+
for (let i = 0; i < workflowResult.steps.length; i++) {
|
|
261
|
+
const stepResult = workflowResult.steps[i];
|
|
262
|
+
const stepIcon = stepResult.success ? '✓' : '✗';
|
|
263
|
+
lines.push(`${i + 1}. ${stepIcon} **${stepResult.step.tool}**: ${stepResult.step.description}`);
|
|
264
|
+
if (stepResult.analysis) {
|
|
265
|
+
lines.push(` - ${stepResult.analysis}`);
|
|
266
|
+
}
|
|
267
|
+
if (stepResult.error) {
|
|
268
|
+
lines.push(` - ⚠️ Error: ${stepResult.error}`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
lines.push('');
|
|
272
|
+
// Show data flow diagram if present
|
|
273
|
+
if (workflowResult.dataFlow && workflowResult.dataFlow.length > 0) {
|
|
274
|
+
lines.push('**Data Flow:**');
|
|
275
|
+
lines.push('');
|
|
276
|
+
lines.push('```mermaid');
|
|
277
|
+
lines.push('flowchart LR');
|
|
278
|
+
// Add nodes first - escape tool names for Mermaid
|
|
279
|
+
for (let i = 0; i < workflowResult.steps.length; i++) {
|
|
280
|
+
const stepResult = workflowResult.steps[i];
|
|
281
|
+
const status = stepResult.success ? ':::success' : ':::failure';
|
|
282
|
+
lines.push(` Step${i}[${mermaidLabel(stepResult.step.tool)}]${status}`);
|
|
283
|
+
}
|
|
284
|
+
// Add edges - escape parameter names
|
|
285
|
+
for (const edge of workflowResult.dataFlow) {
|
|
286
|
+
const param = mermaidLabel(edge.targetParam).replace(/"/g, '');
|
|
287
|
+
lines.push(` Step${edge.fromStep} -->|${param}| Step${edge.toStep}`);
|
|
288
|
+
}
|
|
289
|
+
// Add styling
|
|
290
|
+
lines.push(' classDef success fill:#90EE90');
|
|
291
|
+
lines.push(' classDef failure fill:#FFB6C1');
|
|
292
|
+
lines.push('```');
|
|
293
|
+
lines.push('');
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
// Prompts section - use profiles if available, otherwise basic listing
|
|
298
|
+
if (result.promptProfiles && result.promptProfiles.length > 0) {
|
|
299
|
+
lines.push('## Prompts');
|
|
300
|
+
lines.push('');
|
|
301
|
+
lines.push('Prompts are reusable templates that generate structured messages for LLM interactions.');
|
|
302
|
+
lines.push('');
|
|
303
|
+
for (const profile of result.promptProfiles) {
|
|
304
|
+
lines.push(`### ${profile.name}`);
|
|
305
|
+
lines.push('');
|
|
306
|
+
lines.push(profile.description);
|
|
307
|
+
lines.push('');
|
|
308
|
+
if (profile.arguments.length > 0) {
|
|
309
|
+
lines.push('**Arguments:**');
|
|
310
|
+
for (const arg of profile.arguments) {
|
|
311
|
+
const required = arg.required ? ' (required)' : '';
|
|
312
|
+
lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
|
|
313
|
+
}
|
|
314
|
+
lines.push('');
|
|
315
|
+
}
|
|
316
|
+
if (profile.exampleOutput) {
|
|
317
|
+
lines.push('**Example Output:**');
|
|
318
|
+
lines.push('```');
|
|
319
|
+
lines.push(profile.exampleOutput.length > DISPLAY_LIMITS.DOCS_EXAMPLE_LENGTH
|
|
320
|
+
? profile.exampleOutput.substring(0, DISPLAY_LIMITS.DOCS_EXAMPLE_LENGTH) + '...'
|
|
321
|
+
: profile.exampleOutput);
|
|
322
|
+
lines.push('```');
|
|
323
|
+
lines.push('');
|
|
324
|
+
}
|
|
325
|
+
if (profile.behavioralNotes.length > 0) {
|
|
326
|
+
lines.push('**Observed Behavior:**');
|
|
327
|
+
for (const note of profile.behavioralNotes) {
|
|
328
|
+
lines.push(`- ${note}`);
|
|
329
|
+
}
|
|
330
|
+
lines.push('');
|
|
331
|
+
}
|
|
332
|
+
if (profile.limitations.length > 0) {
|
|
333
|
+
lines.push('**Limitations:**');
|
|
334
|
+
for (const limitation of profile.limitations) {
|
|
335
|
+
lines.push(`- ${limitation}`);
|
|
336
|
+
}
|
|
337
|
+
lines.push('');
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
else if (discovery.prompts.length > 0) {
|
|
342
|
+
// Fallback to basic listing if no profiles
|
|
343
|
+
lines.push('## Prompts');
|
|
344
|
+
lines.push('');
|
|
345
|
+
for (const prompt of discovery.prompts) {
|
|
346
|
+
lines.push(`### ${prompt.name}`);
|
|
347
|
+
lines.push('');
|
|
348
|
+
if (prompt.description) {
|
|
349
|
+
lines.push(prompt.description);
|
|
350
|
+
lines.push('');
|
|
351
|
+
}
|
|
352
|
+
if (prompt.arguments && prompt.arguments.length > 0) {
|
|
353
|
+
lines.push('**Arguments:**');
|
|
354
|
+
for (const arg of prompt.arguments) {
|
|
355
|
+
const required = arg.required ? ' (required)' : '';
|
|
356
|
+
lines.push(`- \`${arg.name}\`${required}: ${arg.description ?? 'No description'}`);
|
|
357
|
+
}
|
|
358
|
+
lines.push('');
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
// Resources section - use profiles if available, otherwise basic listing
|
|
363
|
+
if (result.resourceProfiles && result.resourceProfiles.length > 0) {
|
|
364
|
+
lines.push('## Resources');
|
|
365
|
+
lines.push('');
|
|
366
|
+
lines.push('Resources are data sources exposed by the server that can be read by clients.');
|
|
367
|
+
lines.push('');
|
|
368
|
+
for (const profile of result.resourceProfiles) {
|
|
369
|
+
lines.push(`### ${profile.name}`);
|
|
370
|
+
lines.push('');
|
|
371
|
+
lines.push(`**URI:** \`${profile.uri}\``);
|
|
372
|
+
if (profile.mimeType) {
|
|
373
|
+
lines.push(`**MIME Type:** ${profile.mimeType}`);
|
|
374
|
+
}
|
|
375
|
+
lines.push('');
|
|
376
|
+
lines.push(profile.description);
|
|
377
|
+
lines.push('');
|
|
378
|
+
if (profile.contentPreview) {
|
|
379
|
+
lines.push('**Content Preview:**');
|
|
380
|
+
lines.push('```');
|
|
381
|
+
lines.push(profile.contentPreview);
|
|
382
|
+
lines.push('```');
|
|
383
|
+
lines.push('');
|
|
384
|
+
}
|
|
385
|
+
if (profile.behavioralNotes.length > 0) {
|
|
386
|
+
lines.push('**Observed Behavior:**');
|
|
387
|
+
for (const note of profile.behavioralNotes) {
|
|
388
|
+
lines.push(`- ${note}`);
|
|
389
|
+
}
|
|
390
|
+
lines.push('');
|
|
391
|
+
}
|
|
392
|
+
if (profile.limitations.length > 0) {
|
|
393
|
+
lines.push('**Limitations:**');
|
|
394
|
+
for (const limitation of profile.limitations) {
|
|
395
|
+
lines.push(`- ${limitation}`);
|
|
396
|
+
}
|
|
397
|
+
lines.push('');
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
else if ((discovery.resources ?? []).length > 0) {
|
|
402
|
+
// Fallback to basic listing if no profiles
|
|
403
|
+
lines.push('## Resources');
|
|
404
|
+
lines.push('');
|
|
405
|
+
lines.push('Resources are data sources exposed by the server that can be read by clients.');
|
|
406
|
+
lines.push('');
|
|
407
|
+
for (const resource of discovery.resources ?? []) {
|
|
408
|
+
lines.push(`### ${resource.name}`);
|
|
409
|
+
lines.push('');
|
|
410
|
+
lines.push(`**URI:** \`${resource.uri}\``);
|
|
411
|
+
if (resource.mimeType) {
|
|
412
|
+
lines.push(`**MIME Type:** ${resource.mimeType}`);
|
|
413
|
+
}
|
|
414
|
+
lines.push('');
|
|
415
|
+
if (resource.description) {
|
|
416
|
+
lines.push(resource.description);
|
|
417
|
+
lines.push('');
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
// Overall limitations
|
|
422
|
+
if (limitations.length > 0) {
|
|
423
|
+
lines.push('## Known Limitations');
|
|
424
|
+
lines.push('');
|
|
425
|
+
for (const limitation of limitations) {
|
|
426
|
+
lines.push(`- ${limitation}`);
|
|
427
|
+
}
|
|
428
|
+
lines.push('');
|
|
429
|
+
}
|
|
430
|
+
// Recommendations
|
|
431
|
+
if (recommendations.length > 0) {
|
|
432
|
+
lines.push('## Recommendations');
|
|
433
|
+
lines.push('');
|
|
434
|
+
for (const rec of recommendations) {
|
|
435
|
+
lines.push(`- ${rec}`);
|
|
436
|
+
}
|
|
437
|
+
lines.push('');
|
|
438
|
+
}
|
|
439
|
+
// Behavioral Matrix (tool × persona findings)
|
|
440
|
+
if (metadata.personas && metadata.personas.length > 1 && toolProfiles.length > 0) {
|
|
441
|
+
const matrixSection = generateBehavioralMatrix(toolProfiles, metadata.personas);
|
|
442
|
+
if (matrixSection.length > 0) {
|
|
443
|
+
lines.push(...matrixSection);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
// Metadata footer
|
|
447
|
+
lines.push('---');
|
|
448
|
+
lines.push('');
|
|
449
|
+
// Check if check mode (no LLM, deterministic)
|
|
450
|
+
const isCheckMode = metadata.model === 'check';
|
|
451
|
+
// Basic stats - simpler for check mode
|
|
452
|
+
let statsLine;
|
|
453
|
+
if (isCheckMode) {
|
|
454
|
+
statsLine = `*Check analysis completed in ${formatDuration(metadata.durationMs)}.*`;
|
|
455
|
+
lines.push(statsLine);
|
|
456
|
+
}
|
|
457
|
+
else {
|
|
458
|
+
// Explore mode: show more details
|
|
459
|
+
statsLine = `*Interview completed in ${formatDuration(metadata.durationMs)} with ${metadata.toolCallCount} tool interactions`;
|
|
460
|
+
// Add persona breakdown if multiple personas
|
|
461
|
+
if (metadata.personas && metadata.personas.length > 1) {
|
|
462
|
+
statsLine += '.*';
|
|
463
|
+
lines.push(statsLine);
|
|
464
|
+
lines.push('');
|
|
465
|
+
lines.push('**Persona Breakdown:**');
|
|
466
|
+
for (const persona of metadata.personas) {
|
|
467
|
+
lines.push(`- ${persona.name}: ${persona.questionsAsked} questions, ${persona.toolCallCount} calls`);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
else {
|
|
471
|
+
statsLine += '.*';
|
|
472
|
+
lines.push(statsLine);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
return lines.join('\n');
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Generate Quick Reference section with tool signatures.
|
|
479
|
+
*/
|
|
480
|
+
function generateQuickReference(tools, profiles) {
|
|
481
|
+
const lines = [];
|
|
482
|
+
lines.push('## Quick Reference');
|
|
483
|
+
lines.push('');
|
|
484
|
+
lines.push('| Tool | Parameters | Returns |');
|
|
485
|
+
lines.push('|------|------------|---------|');
|
|
486
|
+
for (const tool of tools) {
|
|
487
|
+
const params = extractParameters(tool.inputSchema);
|
|
488
|
+
const profile = profiles.find(p => p.name === tool.name);
|
|
489
|
+
const returnType = inferReturnTypeDetailed(profile);
|
|
490
|
+
// Escape table cell content to prevent broken tables
|
|
491
|
+
lines.push(`| \`${escapeTableCell(tool.name)}\` | ${escapeTableCell(params)} | ${escapeTableCell(returnType)} |`);
|
|
492
|
+
}
|
|
493
|
+
lines.push('');
|
|
494
|
+
// Only add example section if we have at least one successful example
|
|
495
|
+
const successfulExamples = [];
|
|
496
|
+
for (const tool of tools) {
|
|
497
|
+
const profile = profiles.find(p => p.name === tool.name);
|
|
498
|
+
const example = generateExampleSnippet(tool, profile);
|
|
499
|
+
if (example) {
|
|
500
|
+
successfulExamples.push({ tool, example });
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (successfulExamples.length > 0) {
|
|
504
|
+
lines.push('### Example Usage');
|
|
505
|
+
lines.push('');
|
|
506
|
+
lines.push('*Examples shown are from successful tool calls during the interview.*');
|
|
507
|
+
lines.push('');
|
|
508
|
+
for (const { tool, example } of successfulExamples) {
|
|
509
|
+
lines.push(`#### ${tool.name}`);
|
|
510
|
+
lines.push('');
|
|
511
|
+
// Validate and escape JSON for code block
|
|
512
|
+
const jsonResult = validateJsonForCodeBlock(example);
|
|
513
|
+
lines.push('```json');
|
|
514
|
+
lines.push(jsonResult.content);
|
|
515
|
+
lines.push('```');
|
|
516
|
+
lines.push('');
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return lines;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Infer return type from tool description when we can't observe actual responses.
|
|
523
|
+
*/
|
|
524
|
+
function inferReturnTypeFromDescription(description) {
|
|
525
|
+
const patterns = [
|
|
526
|
+
[/returns?\s+(the\s+)?base64[\s-]?encoded/i, 'base64 data (expected)'],
|
|
527
|
+
[/returns?\s+(a\s+)?json/i, 'JSON (expected)'],
|
|
528
|
+
[/returns?\s+(the\s+)?contents?\s+of/i, 'file content (expected)'],
|
|
529
|
+
[/returns?\s+(a\s+)?list(ing)?\s+of/i, 'list (expected)'],
|
|
530
|
+
[/returns?\s+(the\s+)?tree\s+(view|structure)/i, 'tree structure (expected)'],
|
|
531
|
+
[/returns?\s+(detailed\s+)?metadata/i, 'metadata (expected)'],
|
|
532
|
+
[/returns?\s+full\s+paths?/i, 'file paths (expected)'],
|
|
533
|
+
[/returns?\s+(a\s+)?git-style\s+diff/i, 'diff output (expected)'],
|
|
534
|
+
[/returns?\s+(a\s+)?recursive\s+tree/i, 'tree structure (expected)'],
|
|
535
|
+
[/read\s+(the\s+)?(complete\s+)?contents/i, 'file content (expected)'],
|
|
536
|
+
[/get\s+(a\s+)?(detailed\s+)?listing/i, 'directory listing (expected)'],
|
|
537
|
+
[/retrieve\s+(detailed\s+)?metadata/i, 'metadata (expected)'],
|
|
538
|
+
[/search\s+(for\s+)?files/i, 'file paths (expected)'],
|
|
539
|
+
[/create\s+(a\s+)?new\s+(file|directory)/i, 'success/error status (expected)'],
|
|
540
|
+
[/move\s+or\s+rename/i, 'success/error status (expected)'],
|
|
541
|
+
];
|
|
542
|
+
for (const [pattern, returnType] of patterns) {
|
|
543
|
+
if (pattern.test(description)) {
|
|
544
|
+
return returnType;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Infer return type with more detail for Quick Reference.
|
|
551
|
+
*/
|
|
552
|
+
function inferReturnTypeDetailed(profile) {
|
|
553
|
+
if (!profile || profile.interactions.length === 0) {
|
|
554
|
+
return 'unknown';
|
|
555
|
+
}
|
|
556
|
+
// Look at successful interactions that don't have error-like content
|
|
557
|
+
const successful = profile.interactions.find(i => {
|
|
558
|
+
if (i.error || !i.response || i.response.isError)
|
|
559
|
+
return false;
|
|
560
|
+
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
561
|
+
if (textContent && 'text' in textContent) {
|
|
562
|
+
if (looksLikeError(String(textContent.text)))
|
|
563
|
+
return false;
|
|
564
|
+
}
|
|
565
|
+
return true;
|
|
566
|
+
});
|
|
567
|
+
if (!successful || !successful.response) {
|
|
568
|
+
// No successful calls - try to infer from description
|
|
569
|
+
const inferred = inferReturnTypeFromDescription(profile.description);
|
|
570
|
+
if (inferred) {
|
|
571
|
+
return inferred;
|
|
572
|
+
}
|
|
573
|
+
return 'unknown';
|
|
574
|
+
}
|
|
575
|
+
const content = successful.response.content;
|
|
576
|
+
if (!content || content.length === 0) {
|
|
577
|
+
return 'empty response';
|
|
578
|
+
}
|
|
579
|
+
// Check content types
|
|
580
|
+
const types = new Set(content.map(c => c.type));
|
|
581
|
+
if (types.size === 1) {
|
|
582
|
+
const type = content[0].type;
|
|
583
|
+
if (type === 'text') {
|
|
584
|
+
const text = content[0].text || '';
|
|
585
|
+
// Try to identify JSON structure
|
|
586
|
+
if (text.startsWith('{')) {
|
|
587
|
+
try {
|
|
588
|
+
const parsed = JSON.parse(text);
|
|
589
|
+
const keys = Object.keys(parsed).slice(0, 3);
|
|
590
|
+
if (keys.length > 0) {
|
|
591
|
+
return `JSON object {${keys.join(', ')}${Object.keys(parsed).length > 3 ? ', ...' : ''}}`;
|
|
592
|
+
}
|
|
593
|
+
return 'JSON object';
|
|
594
|
+
}
|
|
595
|
+
catch {
|
|
596
|
+
return 'JSON-like text';
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
if (text.startsWith('[')) {
|
|
600
|
+
try {
|
|
601
|
+
const parsed = JSON.parse(text);
|
|
602
|
+
if (Array.isArray(parsed)) {
|
|
603
|
+
return `JSON array (${parsed.length} items)`;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
catch {
|
|
607
|
+
return 'JSON-like text';
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
// Check for common patterns
|
|
611
|
+
if (text.includes('\n')) {
|
|
612
|
+
const lines = text.split('\n').length;
|
|
613
|
+
return `multi-line text (${lines} lines)`;
|
|
614
|
+
}
|
|
615
|
+
if (text.length > 100) {
|
|
616
|
+
return `text (${text.length} chars)`;
|
|
617
|
+
}
|
|
618
|
+
return 'text';
|
|
619
|
+
}
|
|
620
|
+
if (type === 'image') {
|
|
621
|
+
return 'base64 image';
|
|
622
|
+
}
|
|
623
|
+
return type;
|
|
624
|
+
}
|
|
625
|
+
return `mixed (${Array.from(types).join(', ')})`;
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Generate example code snippet for a tool.
|
|
629
|
+
* Only returns examples from successful calls - returns null if no successful calls.
|
|
630
|
+
*/
|
|
631
|
+
function generateExampleSnippet(tool, profile) {
|
|
632
|
+
// ONLY use successful interactions - never generate synthetic examples
|
|
633
|
+
if (!profile || profile.interactions.length === 0) {
|
|
634
|
+
return null;
|
|
635
|
+
}
|
|
636
|
+
// Find a successful interaction (not an error) that doesn't have error-like content
|
|
637
|
+
const successful = profile.interactions.find(i => {
|
|
638
|
+
if (i.error || !i.response || i.response.isError)
|
|
639
|
+
return false;
|
|
640
|
+
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
641
|
+
if (textContent && 'text' in textContent) {
|
|
642
|
+
if (looksLikeError(String(textContent.text)))
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
return true;
|
|
646
|
+
});
|
|
647
|
+
if (!successful) {
|
|
648
|
+
// No successful calls - don't generate fake examples
|
|
649
|
+
return null;
|
|
650
|
+
}
|
|
651
|
+
return JSON.stringify({
|
|
652
|
+
tool: tool.name,
|
|
653
|
+
arguments: successful.question.args,
|
|
654
|
+
}, null, 2);
|
|
655
|
+
}
|
|
656
|
+
/**
|
|
657
|
+
* Generate sample response section for a tool if we have successful calls.
|
|
658
|
+
*/
|
|
659
|
+
function generateSampleResponse(profile) {
|
|
660
|
+
const lines = [];
|
|
661
|
+
// Find a successful interaction with a response that doesn't look like an error
|
|
662
|
+
const successful = profile.interactions.find(i => {
|
|
663
|
+
if (i.error || !i.response || i.response.isError)
|
|
664
|
+
return false;
|
|
665
|
+
// Also check if the response content looks like an error
|
|
666
|
+
const textContent = i.response.content?.find(c => c.type === 'text');
|
|
667
|
+
if (textContent && 'text' in textContent) {
|
|
668
|
+
if (looksLikeError(String(textContent.text)))
|
|
669
|
+
return false;
|
|
670
|
+
}
|
|
671
|
+
return true;
|
|
672
|
+
});
|
|
673
|
+
if (!successful || !successful.response?.content) {
|
|
674
|
+
return lines;
|
|
675
|
+
}
|
|
676
|
+
const textContent = successful.response.content.find(c => c.type === 'text');
|
|
677
|
+
if (!textContent || !('text' in textContent)) {
|
|
678
|
+
return lines;
|
|
679
|
+
}
|
|
680
|
+
const text = String(textContent.text);
|
|
681
|
+
if (!text || text.length === 0) {
|
|
682
|
+
return lines;
|
|
683
|
+
}
|
|
684
|
+
// Truncate very long responses
|
|
685
|
+
const maxLength = 500;
|
|
686
|
+
let displayText = text;
|
|
687
|
+
let truncated = false;
|
|
688
|
+
if (text.length > maxLength) {
|
|
689
|
+
displayText = text.substring(0, maxLength);
|
|
690
|
+
truncated = true;
|
|
691
|
+
}
|
|
692
|
+
// Try to pretty-print JSON and validate/escape
|
|
693
|
+
const jsonResult = validateJsonForCodeBlock(displayText, {
|
|
694
|
+
maxLength: truncated ? maxLength : undefined,
|
|
695
|
+
truncationIndicator: ' ...',
|
|
696
|
+
});
|
|
697
|
+
lines.push('**Sample Response:**');
|
|
698
|
+
lines.push('```');
|
|
699
|
+
lines.push(jsonResult.content);
|
|
700
|
+
lines.push('```');
|
|
701
|
+
lines.push('');
|
|
702
|
+
return lines;
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Extract common constraints that apply to multiple tools.
|
|
706
|
+
* Returns { common: string[], byTool: Map<string, string[]> }
|
|
707
|
+
*/
|
|
708
|
+
function extractCommonConstraints(profiles) {
|
|
709
|
+
// Count how many tools have each constraint (normalized)
|
|
710
|
+
const constraintCounts = new Map();
|
|
711
|
+
const toolConstraints = new Map();
|
|
712
|
+
const normalizeConstraint = (c) => {
|
|
713
|
+
return c.toLowerCase()
|
|
714
|
+
.replace(/['"`]/g, '')
|
|
715
|
+
.replace(/\s+/g, ' ')
|
|
716
|
+
.trim();
|
|
717
|
+
};
|
|
718
|
+
// Common patterns that should be grouped
|
|
719
|
+
const commonPatterns = [
|
|
720
|
+
{ pattern: /directory.*restriction|access.*control|allowed.*director|within allowed/i, label: 'Directory access restrictions apply' },
|
|
721
|
+
{ pattern: /path.*restriction|access denied.*path|outside.*allowed/i, label: 'Path access is restricted' },
|
|
722
|
+
{ pattern: /requires.*parameter|parameter.*required|missing.*parameter/i, label: 'Validates required parameters' },
|
|
723
|
+
];
|
|
724
|
+
for (const profile of profiles) {
|
|
725
|
+
const toolSpecific = [];
|
|
726
|
+
for (const limitation of profile.limitations) {
|
|
727
|
+
const normalized = normalizeConstraint(limitation);
|
|
728
|
+
// Check if this matches a common pattern
|
|
729
|
+
let isCommon = false;
|
|
730
|
+
for (const { pattern, label } of commonPatterns) {
|
|
731
|
+
if (pattern.test(limitation)) {
|
|
732
|
+
const existing = constraintCounts.get(label);
|
|
733
|
+
if (existing) {
|
|
734
|
+
existing.count++;
|
|
735
|
+
}
|
|
736
|
+
else {
|
|
737
|
+
constraintCounts.set(label, { count: 1, original: label });
|
|
738
|
+
}
|
|
739
|
+
isCommon = true;
|
|
740
|
+
break;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
if (!isCommon) {
|
|
744
|
+
// Check for exact/similar matches
|
|
745
|
+
const existing = constraintCounts.get(normalized);
|
|
746
|
+
if (existing) {
|
|
747
|
+
existing.count++;
|
|
748
|
+
}
|
|
749
|
+
else {
|
|
750
|
+
constraintCounts.set(normalized, { count: 1, original: limitation });
|
|
751
|
+
toolSpecific.push(limitation);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
if (toolSpecific.length > 0) {
|
|
756
|
+
toolConstraints.set(profile.name, toolSpecific);
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
// Constraints that appear in more than half of tools are "common"
|
|
760
|
+
const threshold = Math.max(MATH_FACTORS.MIN_COMMON_CONSTRAINT_THRESHOLD, Math.floor(profiles.length / 2));
|
|
761
|
+
const common = [];
|
|
762
|
+
for (const [, { count, original }] of constraintCounts) {
|
|
763
|
+
if (count >= threshold) {
|
|
764
|
+
common.push(original);
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
// Remove common constraints from per-tool lists
|
|
768
|
+
if (common.length > 0) {
|
|
769
|
+
for (const [toolName, constraints] of toolConstraints) {
|
|
770
|
+
const filtered = constraints.filter(c => {
|
|
771
|
+
const normalized = normalizeConstraint(c);
|
|
772
|
+
// Keep if it's tool-specific
|
|
773
|
+
return !common.some(common => normalizeConstraint(common) === normalized ||
|
|
774
|
+
commonPatterns.some(p => p.label === common && p.pattern.test(c)));
|
|
775
|
+
});
|
|
776
|
+
toolConstraints.set(toolName, filtered);
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
return { common, byTool: toolConstraints };
|
|
780
|
+
}
|
|
781
|
+
/**
|
|
782
|
+
* Generate Global Constraints section for common restrictions.
|
|
783
|
+
*/
|
|
784
|
+
function generateGlobalConstraintsSection(common) {
|
|
785
|
+
if (common.length === 0) {
|
|
786
|
+
return [];
|
|
787
|
+
}
|
|
788
|
+
const lines = [];
|
|
789
|
+
lines.push('## Global Constraints');
|
|
790
|
+
lines.push('');
|
|
791
|
+
lines.push('The following restrictions apply to all tools:');
|
|
792
|
+
lines.push('');
|
|
793
|
+
for (const constraint of common) {
|
|
794
|
+
lines.push(`- ${constraint}`);
|
|
795
|
+
}
|
|
796
|
+
lines.push('');
|
|
797
|
+
return lines;
|
|
798
|
+
}
|
|
799
|
+
/**
|
|
800
|
+
* Generate Security Considerations section.
|
|
801
|
+
*/
|
|
802
|
+
function generateSecuritySection(profiles) {
|
|
803
|
+
const lines = [];
|
|
804
|
+
// Collect all security notes
|
|
805
|
+
const securityFindings = [];
|
|
806
|
+
for (const profile of profiles) {
|
|
807
|
+
for (const note of profile.securityNotes) {
|
|
808
|
+
const severity = classifySecuritySeverity(note);
|
|
809
|
+
securityFindings.push({ tool: profile.name, note, severity });
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
if (securityFindings.length === 0) {
|
|
813
|
+
return [];
|
|
814
|
+
}
|
|
815
|
+
lines.push('## Security Considerations');
|
|
816
|
+
lines.push('');
|
|
817
|
+
// Group by severity
|
|
818
|
+
const critical = securityFindings.filter(f => f.severity === 'critical');
|
|
819
|
+
const warnings = securityFindings.filter(f => f.severity === 'warning');
|
|
820
|
+
const info = securityFindings.filter(f => f.severity === 'info');
|
|
821
|
+
if (critical.length > 0) {
|
|
822
|
+
lines.push('### Critical Issues');
|
|
823
|
+
lines.push('');
|
|
824
|
+
for (const finding of critical) {
|
|
825
|
+
lines.push(`- **${finding.tool}**: ${finding.note}`);
|
|
826
|
+
}
|
|
827
|
+
lines.push('');
|
|
828
|
+
}
|
|
829
|
+
if (warnings.length > 0) {
|
|
830
|
+
lines.push('### Warnings');
|
|
831
|
+
lines.push('');
|
|
832
|
+
for (const finding of warnings) {
|
|
833
|
+
lines.push(`- **${finding.tool}**: ${finding.note}`);
|
|
834
|
+
}
|
|
835
|
+
lines.push('');
|
|
836
|
+
}
|
|
837
|
+
if (info.length > 0) {
|
|
838
|
+
lines.push('### Informational');
|
|
839
|
+
lines.push('');
|
|
840
|
+
for (const finding of info) {
|
|
841
|
+
lines.push(`- **${finding.tool}**: ${finding.note}`);
|
|
842
|
+
}
|
|
843
|
+
lines.push('');
|
|
844
|
+
}
|
|
845
|
+
return lines;
|
|
846
|
+
}
|
|
847
|
+
/**
|
|
848
|
+
* Classify security note severity.
|
|
849
|
+
*/
|
|
850
|
+
function classifySecuritySeverity(note) {
|
|
851
|
+
const lowerNote = note.toLowerCase();
|
|
852
|
+
const criticalKeywords = ['injection', 'rce', 'remote code', 'arbitrary code', 'command execution', 'sql injection', 'xss'];
|
|
853
|
+
const warningKeywords = ['risk', 'vulnerab', 'dangerous', 'unsafe', 'leak', 'exposure', 'sensitive'];
|
|
854
|
+
if (criticalKeywords.some(kw => lowerNote.includes(kw))) {
|
|
855
|
+
return 'critical';
|
|
856
|
+
}
|
|
857
|
+
if (warningKeywords.some(kw => lowerNote.includes(kw))) {
|
|
858
|
+
return 'warning';
|
|
859
|
+
}
|
|
860
|
+
return 'info';
|
|
861
|
+
}
|
|
862
|
+
/**
|
|
863
|
+
* Generate Performance section for AGENTS.md.
|
|
864
|
+
*/
|
|
865
|
+
function generatePerformanceSection(profiles) {
|
|
866
|
+
const lines = [];
|
|
867
|
+
const metrics = calculatePerformanceMetrics(profiles);
|
|
868
|
+
if (metrics.length === 0) {
|
|
869
|
+
return [];
|
|
870
|
+
}
|
|
871
|
+
lines.push('## Performance');
|
|
872
|
+
lines.push('');
|
|
873
|
+
lines.push('Response time metrics from interview sessions (in milliseconds):');
|
|
874
|
+
lines.push('');
|
|
875
|
+
lines.push('| Tool | Calls | Avg | P50 | P95 | Max | Error Rate |');
|
|
876
|
+
lines.push('|------|-------|-----|-----|-----|-----|------------|');
|
|
877
|
+
for (const m of metrics) {
|
|
878
|
+
const errorPct = (m.errorRate * 100).toFixed(0);
|
|
879
|
+
const errorDisplay = m.errorRate > 0.5 ? `**${errorPct}%**` : `${errorPct}%`;
|
|
880
|
+
// Escape table cell content
|
|
881
|
+
lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.callCount} | ${m.avgMs}ms | ${m.p50Ms}ms | ${m.p95Ms}ms | ${m.maxMs}ms | ${errorDisplay} |`);
|
|
882
|
+
}
|
|
883
|
+
lines.push('');
|
|
884
|
+
// Add timing breakdown if separate timing data is available
|
|
885
|
+
const metricsWithBreakdown = metrics.filter(m => m.avgToolMs !== undefined && m.avgAnalysisMs !== undefined);
|
|
886
|
+
if (metricsWithBreakdown.length > 0) {
|
|
887
|
+
lines.push('### Timing Breakdown');
|
|
888
|
+
lines.push('');
|
|
889
|
+
lines.push('Breakdown of total time into tool execution (MCP) and LLM analysis:');
|
|
890
|
+
lines.push('');
|
|
891
|
+
lines.push('| Tool | Total Avg | Tool Exec | LLM Analysis | Tool % |');
|
|
892
|
+
lines.push('|------|-----------|-----------|--------------|--------|');
|
|
893
|
+
for (const m of metricsWithBreakdown) {
|
|
894
|
+
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0
|
|
895
|
+
? Math.round((m.avgToolMs / m.avgMs) * 100)
|
|
896
|
+
: 0;
|
|
897
|
+
lines.push(`| \`${escapeTableCell(m.toolName)}\` | ${m.avgMs}ms | ${m.avgToolMs}ms | ${m.avgAnalysisMs}ms | ${toolPct}% |`);
|
|
898
|
+
}
|
|
899
|
+
lines.push('');
|
|
900
|
+
}
|
|
901
|
+
// Add performance insights
|
|
902
|
+
const slowTools = metrics.filter(m => m.avgMs > 1000);
|
|
903
|
+
const unreliableTools = metrics.filter(m => m.errorRate > 0.3);
|
|
904
|
+
// Identify tools where LLM analysis dominates (>70% of total time)
|
|
905
|
+
const llmDominatedTools = metricsWithBreakdown.filter(m => {
|
|
906
|
+
const toolPct = m.avgToolMs !== undefined && m.avgMs > 0 ? (m.avgToolMs / m.avgMs) : 0;
|
|
907
|
+
return toolPct < 0.3; // Tool execution is < 30% means LLM is > 70%
|
|
908
|
+
});
|
|
909
|
+
if (slowTools.length > 0 || unreliableTools.length > 0 || llmDominatedTools.length > 0) {
|
|
910
|
+
lines.push('### Performance Insights');
|
|
911
|
+
lines.push('');
|
|
912
|
+
if (slowTools.length > 0) {
|
|
913
|
+
lines.push('**Slow Tools** (avg > 1s):');
|
|
914
|
+
for (const tool of slowTools) {
|
|
915
|
+
// Include breakdown if available
|
|
916
|
+
if (tool.avgToolMs !== undefined && tool.avgAnalysisMs !== undefined) {
|
|
917
|
+
lines.push(`- \`${tool.toolName}\`: ${tool.avgMs}ms average (tool: ${tool.avgToolMs}ms, analysis: ${tool.avgAnalysisMs}ms)`);
|
|
918
|
+
}
|
|
919
|
+
else {
|
|
920
|
+
lines.push(`- \`${tool.toolName}\`: ${tool.avgMs}ms average`);
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
lines.push('');
|
|
924
|
+
}
|
|
925
|
+
if (llmDominatedTools.length > 0) {
|
|
926
|
+
lines.push('**LLM Analysis Dominated** (tool execution < 30% of total):');
|
|
927
|
+
lines.push('');
|
|
928
|
+
lines.push('These timings are dominated by LLM analysis rather than actual tool execution:');
|
|
929
|
+
for (const tool of llmDominatedTools) {
|
|
930
|
+
lines.push(`- \`${tool.toolName}\`: tool exec ${tool.avgToolMs}ms vs analysis ${tool.avgAnalysisMs}ms`);
|
|
931
|
+
}
|
|
932
|
+
lines.push('');
|
|
933
|
+
}
|
|
934
|
+
if (unreliableTools.length > 0) {
|
|
935
|
+
lines.push('**Unreliable Tools** (error rate > 30%):');
|
|
936
|
+
for (const tool of unreliableTools) {
|
|
937
|
+
const errorPct = (tool.errorRate * 100).toFixed(0);
|
|
938
|
+
lines.push(`- \`${tool.toolName}\`: ${errorPct}% error rate`);
|
|
939
|
+
}
|
|
940
|
+
lines.push('');
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
return lines;
|
|
944
|
+
}
|
|
945
|
+
/**
|
|
946
|
+
* Generate Behavioral Matrix (tool × persona findings).
|
|
947
|
+
*/
|
|
948
|
+
function generateBehavioralMatrix(profiles, personas) {
|
|
949
|
+
const lines = [];
|
|
950
|
+
// Check if we have findings by persona
|
|
951
|
+
const hasPersonaFindings = profiles.some(p => p.findingsByPersona && p.findingsByPersona.length > 0);
|
|
952
|
+
if (!hasPersonaFindings) {
|
|
953
|
+
return [];
|
|
954
|
+
}
|
|
955
|
+
lines.push('## Behavioral Matrix');
|
|
956
|
+
lines.push('');
|
|
957
|
+
lines.push('Summary of findings by tool and persona:');
|
|
958
|
+
lines.push('');
|
|
959
|
+
// Build header - escape persona names in case they contain special characters
|
|
960
|
+
const header = ['Tool', ...personas.map(p => escapeTableCell(p.name))];
|
|
961
|
+
lines.push(`| ${header.join(' | ')} |`);
|
|
962
|
+
lines.push(`| ${header.map(() => '---').join(' | ')} |`);
|
|
963
|
+
// Build rows
|
|
964
|
+
for (const profile of profiles) {
|
|
965
|
+
const row = [escapeTableCell(profile.name)];
|
|
966
|
+
for (const persona of personas) {
|
|
967
|
+
const findings = profile.findingsByPersona?.find(f => f.personaId === persona.id);
|
|
968
|
+
if (findings) {
|
|
969
|
+
const count = findings.behavioralNotes.length +
|
|
970
|
+
findings.limitations.length +
|
|
971
|
+
findings.securityNotes.length;
|
|
972
|
+
const hasWarnings = findings.securityNotes.length > 0 || findings.limitations.length > 0;
|
|
973
|
+
const icon = hasWarnings ? '⚠️' : '✓';
|
|
974
|
+
row.push(`${icon} ${count} finding${count !== 1 ? 's' : ''}`);
|
|
975
|
+
}
|
|
976
|
+
else {
|
|
977
|
+
row.push('-');
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
lines.push(`| ${row.join(' | ')} |`);
|
|
981
|
+
}
|
|
982
|
+
lines.push('');
|
|
983
|
+
// Add legend
|
|
984
|
+
lines.push('<details>');
|
|
985
|
+
lines.push('<summary>Matrix Legend</summary>');
|
|
986
|
+
lines.push('');
|
|
987
|
+
lines.push('- ✓ = No warnings or limitations found');
|
|
988
|
+
lines.push('- ⚠️ = Warnings or limitations detected');
|
|
989
|
+
lines.push('- Numbers indicate total findings (behavioral notes + limitations + security notes)');
|
|
990
|
+
lines.push('');
|
|
991
|
+
lines.push('</details>');
|
|
992
|
+
lines.push('');
|
|
993
|
+
return lines;
|
|
994
|
+
}
|
|
995
|
+
//# sourceMappingURL=agents.js.map
|