@dotsetlabs/bellwether 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +291 -0
- package/LICENSE +21 -0
- package/README.md +739 -0
- package/dist/auth/credentials.d.ts +64 -0
- package/dist/auth/credentials.js +218 -0
- package/dist/auth/index.d.ts +6 -0
- package/dist/auth/index.js +6 -0
- package/dist/auth/keychain.d.ts +64 -0
- package/dist/auth/keychain.js +268 -0
- package/dist/baseline/ab-testing.d.ts +80 -0
- package/dist/baseline/ab-testing.js +236 -0
- package/dist/baseline/ai-compatibility-scorer.d.ts +95 -0
- package/dist/baseline/ai-compatibility-scorer.js +606 -0
- package/dist/baseline/calibration.d.ts +77 -0
- package/dist/baseline/calibration.js +136 -0
- package/dist/baseline/category-matching.d.ts +85 -0
- package/dist/baseline/category-matching.js +289 -0
- package/dist/baseline/change-impact-analyzer.d.ts +98 -0
- package/dist/baseline/change-impact-analyzer.js +592 -0
- package/dist/baseline/comparator.d.ts +64 -0
- package/dist/baseline/comparator.js +916 -0
- package/dist/baseline/confidence.d.ts +55 -0
- package/dist/baseline/confidence.js +122 -0
- package/dist/baseline/converter.d.ts +61 -0
- package/dist/baseline/converter.js +585 -0
- package/dist/baseline/dependency-analyzer.d.ts +89 -0
- package/dist/baseline/dependency-analyzer.js +567 -0
- package/dist/baseline/deprecation-tracker.d.ts +133 -0
- package/dist/baseline/deprecation-tracker.js +322 -0
- package/dist/baseline/diff.d.ts +55 -0
- package/dist/baseline/diff.js +1584 -0
- package/dist/baseline/documentation-scorer.d.ts +205 -0
- package/dist/baseline/documentation-scorer.js +466 -0
- package/dist/baseline/embeddings.d.ts +118 -0
- package/dist/baseline/embeddings.js +251 -0
- package/dist/baseline/error-analyzer.d.ts +198 -0
- package/dist/baseline/error-analyzer.js +721 -0
- package/dist/baseline/evaluation/evaluator.d.ts +42 -0
- package/dist/baseline/evaluation/evaluator.js +323 -0
- package/dist/baseline/evaluation/expanded-dataset.d.ts +45 -0
- package/dist/baseline/evaluation/expanded-dataset.js +1164 -0
- package/dist/baseline/evaluation/golden-dataset.d.ts +58 -0
- package/dist/baseline/evaluation/golden-dataset.js +717 -0
- package/dist/baseline/evaluation/index.d.ts +15 -0
- package/dist/baseline/evaluation/index.js +15 -0
- package/dist/baseline/evaluation/types.d.ts +186 -0
- package/dist/baseline/evaluation/types.js +8 -0
- package/dist/baseline/external-dependency-detector.d.ts +181 -0
- package/dist/baseline/external-dependency-detector.js +524 -0
- package/dist/baseline/golden-output.d.ts +162 -0
- package/dist/baseline/golden-output.js +636 -0
- package/dist/baseline/health-scorer.d.ts +174 -0
- package/dist/baseline/health-scorer.js +451 -0
- package/dist/baseline/incremental-checker.d.ts +97 -0
- package/dist/baseline/incremental-checker.js +174 -0
- package/dist/baseline/index.d.ts +31 -0
- package/dist/baseline/index.js +42 -0
- package/dist/baseline/migration-generator.d.ts +137 -0
- package/dist/baseline/migration-generator.js +554 -0
- package/dist/baseline/migrations.d.ts +60 -0
- package/dist/baseline/migrations.js +197 -0
- package/dist/baseline/performance-tracker.d.ts +214 -0
- package/dist/baseline/performance-tracker.js +577 -0
- package/dist/baseline/pr-comment-generator.d.ts +117 -0
- package/dist/baseline/pr-comment-generator.js +546 -0
- package/dist/baseline/response-fingerprint.d.ts +127 -0
- package/dist/baseline/response-fingerprint.js +728 -0
- package/dist/baseline/response-schema-tracker.d.ts +129 -0
- package/dist/baseline/response-schema-tracker.js +420 -0
- package/dist/baseline/risk-scorer.d.ts +54 -0
- package/dist/baseline/risk-scorer.js +434 -0
- package/dist/baseline/saver.d.ts +89 -0
- package/dist/baseline/saver.js +554 -0
- package/dist/baseline/scenario-generator.d.ts +151 -0
- package/dist/baseline/scenario-generator.js +905 -0
- package/dist/baseline/schema-compare.d.ts +86 -0
- package/dist/baseline/schema-compare.js +557 -0
- package/dist/baseline/schema-evolution.d.ts +189 -0
- package/dist/baseline/schema-evolution.js +467 -0
- package/dist/baseline/semantic.d.ts +203 -0
- package/dist/baseline/semantic.js +908 -0
- package/dist/baseline/synonyms.d.ts +60 -0
- package/dist/baseline/synonyms.js +386 -0
- package/dist/baseline/telemetry.d.ts +165 -0
- package/dist/baseline/telemetry.js +294 -0
- package/dist/baseline/test-pruner.d.ts +120 -0
- package/dist/baseline/test-pruner.js +387 -0
- package/dist/baseline/types.d.ts +449 -0
- package/dist/baseline/types.js +5 -0
- package/dist/baseline/version.d.ts +138 -0
- package/dist/baseline/version.js +206 -0
- package/dist/cache/index.d.ts +5 -0
- package/dist/cache/index.js +5 -0
- package/dist/cache/response-cache.d.ts +151 -0
- package/dist/cache/response-cache.js +287 -0
- package/dist/ci/index.d.ts +60 -0
- package/dist/ci/index.js +342 -0
- package/dist/cli/commands/auth.d.ts +12 -0
- package/dist/cli/commands/auth.js +352 -0
- package/dist/cli/commands/badge.d.ts +3 -0
- package/dist/cli/commands/badge.js +74 -0
- package/dist/cli/commands/baseline-accept.d.ts +15 -0
- package/dist/cli/commands/baseline-accept.js +178 -0
- package/dist/cli/commands/baseline-migrate.d.ts +12 -0
- package/dist/cli/commands/baseline-migrate.js +164 -0
- package/dist/cli/commands/baseline.d.ts +14 -0
- package/dist/cli/commands/baseline.js +449 -0
- package/dist/cli/commands/beta.d.ts +10 -0
- package/dist/cli/commands/beta.js +231 -0
- package/dist/cli/commands/check.d.ts +11 -0
- package/dist/cli/commands/check.js +820 -0
- package/dist/cli/commands/cloud/badge.d.ts +3 -0
- package/dist/cli/commands/cloud/badge.js +74 -0
- package/dist/cli/commands/cloud/diff.d.ts +6 -0
- package/dist/cli/commands/cloud/diff.js +79 -0
- package/dist/cli/commands/cloud/history.d.ts +6 -0
- package/dist/cli/commands/cloud/history.js +102 -0
- package/dist/cli/commands/cloud/link.d.ts +9 -0
- package/dist/cli/commands/cloud/link.js +119 -0
- package/dist/cli/commands/cloud/login.d.ts +7 -0
- package/dist/cli/commands/cloud/login.js +499 -0
- package/dist/cli/commands/cloud/projects.d.ts +6 -0
- package/dist/cli/commands/cloud/projects.js +44 -0
- package/dist/cli/commands/cloud/shared.d.ts +7 -0
- package/dist/cli/commands/cloud/shared.js +42 -0
- package/dist/cli/commands/cloud/teams.d.ts +8 -0
- package/dist/cli/commands/cloud/teams.js +169 -0
- package/dist/cli/commands/cloud/upload.d.ts +8 -0
- package/dist/cli/commands/cloud/upload.js +181 -0
- package/dist/cli/commands/contract.d.ts +11 -0
- package/dist/cli/commands/contract.js +280 -0
- package/dist/cli/commands/discover.d.ts +3 -0
- package/dist/cli/commands/discover.js +82 -0
- package/dist/cli/commands/eval.d.ts +9 -0
- package/dist/cli/commands/eval.js +187 -0
- package/dist/cli/commands/explore.d.ts +11 -0
- package/dist/cli/commands/explore.js +437 -0
- package/dist/cli/commands/feedback.d.ts +9 -0
- package/dist/cli/commands/feedback.js +174 -0
- package/dist/cli/commands/golden.d.ts +12 -0
- package/dist/cli/commands/golden.js +407 -0
- package/dist/cli/commands/history.d.ts +10 -0
- package/dist/cli/commands/history.js +202 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.js +219 -0
- package/dist/cli/commands/interview.d.ts +3 -0
- package/dist/cli/commands/interview.js +903 -0
- package/dist/cli/commands/link.d.ts +10 -0
- package/dist/cli/commands/link.js +169 -0
- package/dist/cli/commands/login.d.ts +7 -0
- package/dist/cli/commands/login.js +499 -0
- package/dist/cli/commands/preset.d.ts +33 -0
- package/dist/cli/commands/preset.js +297 -0
- package/dist/cli/commands/profile.d.ts +33 -0
- package/dist/cli/commands/profile.js +286 -0
- package/dist/cli/commands/registry.d.ts +11 -0
- package/dist/cli/commands/registry.js +146 -0
- package/dist/cli/commands/shared.d.ts +79 -0
- package/dist/cli/commands/shared.js +196 -0
- package/dist/cli/commands/teams.d.ts +8 -0
- package/dist/cli/commands/teams.js +169 -0
- package/dist/cli/commands/test.d.ts +9 -0
- package/dist/cli/commands/test.js +500 -0
- package/dist/cli/commands/upload.d.ts +8 -0
- package/dist/cli/commands/upload.js +223 -0
- package/dist/cli/commands/validate-config.d.ts +6 -0
- package/dist/cli/commands/validate-config.js +35 -0
- package/dist/cli/commands/verify.d.ts +11 -0
- package/dist/cli/commands/verify.js +283 -0
- package/dist/cli/commands/watch.d.ts +12 -0
- package/dist/cli/commands/watch.js +253 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +178 -0
- package/dist/cli/interactive.d.ts +47 -0
- package/dist/cli/interactive.js +216 -0
- package/dist/cli/output/terminal-reporter.d.ts +19 -0
- package/dist/cli/output/terminal-reporter.js +104 -0
- package/dist/cli/output.d.ts +226 -0
- package/dist/cli/output.js +438 -0
- package/dist/cli/utils/env.d.ts +5 -0
- package/dist/cli/utils/env.js +14 -0
- package/dist/cli/utils/progress.d.ts +59 -0
- package/dist/cli/utils/progress.js +206 -0
- package/dist/cli/utils/server-context.d.ts +10 -0
- package/dist/cli/utils/server-context.js +36 -0
- package/dist/cloud/auth.d.ts +144 -0
- package/dist/cloud/auth.js +374 -0
- package/dist/cloud/client.d.ts +24 -0
- package/dist/cloud/client.js +65 -0
- package/dist/cloud/http-client.d.ts +38 -0
- package/dist/cloud/http-client.js +215 -0
- package/dist/cloud/index.d.ts +23 -0
- package/dist/cloud/index.js +25 -0
- package/dist/cloud/mock-client.d.ts +107 -0
- package/dist/cloud/mock-client.js +545 -0
- package/dist/cloud/types.d.ts +515 -0
- package/dist/cloud/types.js +15 -0
- package/dist/config/defaults.d.ts +160 -0
- package/dist/config/defaults.js +169 -0
- package/dist/config/loader.d.ts +24 -0
- package/dist/config/loader.js +122 -0
- package/dist/config/template.d.ts +42 -0
- package/dist/config/template.js +647 -0
- package/dist/config/validator.d.ts +2112 -0
- package/dist/config/validator.js +658 -0
- package/dist/constants/cloud.d.ts +107 -0
- package/dist/constants/cloud.js +110 -0
- package/dist/constants/core.d.ts +521 -0
- package/dist/constants/core.js +556 -0
- package/dist/constants/testing.d.ts +1283 -0
- package/dist/constants/testing.js +1568 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.js +10 -0
- package/dist/contract/index.d.ts +6 -0
- package/dist/contract/index.js +5 -0
- package/dist/contract/validator.d.ts +177 -0
- package/dist/contract/validator.js +574 -0
- package/dist/cost/index.d.ts +6 -0
- package/dist/cost/index.js +5 -0
- package/dist/cost/tracker.d.ts +134 -0
- package/dist/cost/tracker.js +313 -0
- package/dist/discovery/discovery.d.ts +16 -0
- package/dist/discovery/discovery.js +173 -0
- package/dist/discovery/types.d.ts +51 -0
- package/dist/discovery/types.js +2 -0
- package/dist/docs/agents.d.ts +3 -0
- package/dist/docs/agents.js +995 -0
- package/dist/docs/contract.d.ts +51 -0
- package/dist/docs/contract.js +1681 -0
- package/dist/docs/generator.d.ts +4 -0
- package/dist/docs/generator.js +4 -0
- package/dist/docs/html-reporter.d.ts +9 -0
- package/dist/docs/html-reporter.js +757 -0
- package/dist/docs/index.d.ts +10 -0
- package/dist/docs/index.js +11 -0
- package/dist/docs/junit-reporter.d.ts +18 -0
- package/dist/docs/junit-reporter.js +210 -0
- package/dist/docs/report.d.ts +14 -0
- package/dist/docs/report.js +44 -0
- package/dist/docs/sarif-reporter.d.ts +19 -0
- package/dist/docs/sarif-reporter.js +335 -0
- package/dist/docs/shared.d.ts +35 -0
- package/dist/docs/shared.js +162 -0
- package/dist/docs/templates.d.ts +12 -0
- package/dist/docs/templates.js +76 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.js +6 -0
- package/dist/errors/retry.d.ts +92 -0
- package/dist/errors/retry.js +323 -0
- package/dist/errors/types.d.ts +321 -0
- package/dist/errors/types.js +584 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +32 -0
- package/dist/interview/dependency-resolver.d.ts +11 -0
- package/dist/interview/dependency-resolver.js +32 -0
- package/dist/interview/interviewer.d.ts +232 -0
- package/dist/interview/interviewer.js +1939 -0
- package/dist/interview/mock-response-generator.d.ts +7 -0
- package/dist/interview/mock-response-generator.js +102 -0
- package/dist/interview/orchestrator.d.ts +237 -0
- package/dist/interview/orchestrator.js +1296 -0
- package/dist/interview/rate-limiter.d.ts +15 -0
- package/dist/interview/rate-limiter.js +55 -0
- package/dist/interview/response-validator.d.ts +10 -0
- package/dist/interview/response-validator.js +132 -0
- package/dist/interview/schema-inferrer.d.ts +8 -0
- package/dist/interview/schema-inferrer.js +71 -0
- package/dist/interview/schema-test-generator.d.ts +71 -0
- package/dist/interview/schema-test-generator.js +834 -0
- package/dist/interview/smart-value-generator.d.ts +155 -0
- package/dist/interview/smart-value-generator.js +554 -0
- package/dist/interview/stateful-test-runner.d.ts +19 -0
- package/dist/interview/stateful-test-runner.js +106 -0
- package/dist/interview/types.d.ts +561 -0
- package/dist/interview/types.js +2 -0
- package/dist/llm/anthropic.d.ts +41 -0
- package/dist/llm/anthropic.js +355 -0
- package/dist/llm/client.d.ts +123 -0
- package/dist/llm/client.js +42 -0
- package/dist/llm/factory.d.ts +38 -0
- package/dist/llm/factory.js +145 -0
- package/dist/llm/fallback.d.ts +140 -0
- package/dist/llm/fallback.js +379 -0
- package/dist/llm/index.d.ts +18 -0
- package/dist/llm/index.js +15 -0
- package/dist/llm/ollama.d.ts +37 -0
- package/dist/llm/ollama.js +330 -0
- package/dist/llm/openai.d.ts +25 -0
- package/dist/llm/openai.js +320 -0
- package/dist/llm/token-budget.d.ts +161 -0
- package/dist/llm/token-budget.js +395 -0
- package/dist/logging/logger.d.ts +70 -0
- package/dist/logging/logger.js +130 -0
- package/dist/metrics/collector.d.ts +106 -0
- package/dist/metrics/collector.js +547 -0
- package/dist/metrics/index.d.ts +7 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/prometheus.d.ts +20 -0
- package/dist/metrics/prometheus.js +241 -0
- package/dist/metrics/types.d.ts +209 -0
- package/dist/metrics/types.js +5 -0
- package/dist/persona/builtins.d.ts +54 -0
- package/dist/persona/builtins.js +219 -0
- package/dist/persona/index.d.ts +8 -0
- package/dist/persona/index.js +8 -0
- package/dist/persona/loader.d.ts +30 -0
- package/dist/persona/loader.js +190 -0
- package/dist/persona/types.d.ts +144 -0
- package/dist/persona/types.js +5 -0
- package/dist/persona/validation.d.ts +94 -0
- package/dist/persona/validation.js +332 -0
- package/dist/prompts/index.d.ts +5 -0
- package/dist/prompts/index.js +5 -0
- package/dist/prompts/templates.d.ts +180 -0
- package/dist/prompts/templates.js +431 -0
- package/dist/registry/client.d.ts +49 -0
- package/dist/registry/client.js +191 -0
- package/dist/registry/index.d.ts +7 -0
- package/dist/registry/index.js +6 -0
- package/dist/registry/types.d.ts +140 -0
- package/dist/registry/types.js +6 -0
- package/dist/scenarios/evaluator.d.ts +43 -0
- package/dist/scenarios/evaluator.js +206 -0
- package/dist/scenarios/index.d.ts +10 -0
- package/dist/scenarios/index.js +9 -0
- package/dist/scenarios/loader.d.ts +20 -0
- package/dist/scenarios/loader.js +285 -0
- package/dist/scenarios/types.d.ts +153 -0
- package/dist/scenarios/types.js +8 -0
- package/dist/security/index.d.ts +17 -0
- package/dist/security/index.js +18 -0
- package/dist/security/payloads.d.ts +61 -0
- package/dist/security/payloads.js +268 -0
- package/dist/security/security-tester.d.ts +42 -0
- package/dist/security/security-tester.js +582 -0
- package/dist/security/types.d.ts +166 -0
- package/dist/security/types.js +8 -0
- package/dist/transport/base-transport.d.ts +59 -0
- package/dist/transport/base-transport.js +38 -0
- package/dist/transport/http-transport.d.ts +67 -0
- package/dist/transport/http-transport.js +238 -0
- package/dist/transport/mcp-client.d.ts +141 -0
- package/dist/transport/mcp-client.js +496 -0
- package/dist/transport/sse-transport.d.ts +88 -0
- package/dist/transport/sse-transport.js +316 -0
- package/dist/transport/stdio-transport.d.ts +43 -0
- package/dist/transport/stdio-transport.js +238 -0
- package/dist/transport/types.d.ts +125 -0
- package/dist/transport/types.js +16 -0
- package/dist/utils/concurrency.d.ts +123 -0
- package/dist/utils/concurrency.js +213 -0
- package/dist/utils/formatters.d.ts +16 -0
- package/dist/utils/formatters.js +37 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/jsonpath.d.ts +87 -0
- package/dist/utils/jsonpath.js +326 -0
- package/dist/utils/markdown.d.ts +113 -0
- package/dist/utils/markdown.js +265 -0
- package/dist/utils/network.d.ts +14 -0
- package/dist/utils/network.js +17 -0
- package/dist/utils/sanitize.d.ts +92 -0
- package/dist/utils/sanitize.js +191 -0
- package/dist/utils/semantic.d.ts +194 -0
- package/dist/utils/semantic.js +1051 -0
- package/dist/utils/smart-truncate.d.ts +94 -0
- package/dist/utils/smart-truncate.js +361 -0
- package/dist/utils/timeout.d.ts +153 -0
- package/dist/utils/timeout.js +205 -0
- package/dist/utils/yaml-parser.d.ts +58 -0
- package/dist/utils/yaml-parser.js +86 -0
- package/dist/validation/index.d.ts +32 -0
- package/dist/validation/index.js +32 -0
- package/dist/validation/semantic-test-generator.d.ts +50 -0
- package/dist/validation/semantic-test-generator.js +176 -0
- package/dist/validation/semantic-types.d.ts +66 -0
- package/dist/validation/semantic-types.js +94 -0
- package/dist/validation/semantic-validator.d.ts +38 -0
- package/dist/validation/semantic-validator.js +340 -0
- package/dist/verification/index.d.ts +6 -0
- package/dist/verification/index.js +5 -0
- package/dist/verification/types.d.ts +133 -0
- package/dist/verification/types.js +5 -0
- package/dist/verification/verifier.d.ts +30 -0
- package/dist/verification/verifier.js +309 -0
- package/dist/version.d.ts +19 -0
- package/dist/version.js +48 -0
- package/dist/workflow/auto-generator.d.ts +27 -0
- package/dist/workflow/auto-generator.js +513 -0
- package/dist/workflow/discovery.d.ts +40 -0
- package/dist/workflow/discovery.js +195 -0
- package/dist/workflow/executor.d.ts +82 -0
- package/dist/workflow/executor.js +611 -0
- package/dist/workflow/index.d.ts +10 -0
- package/dist/workflow/index.js +10 -0
- package/dist/workflow/loader.d.ts +24 -0
- package/dist/workflow/loader.js +194 -0
- package/dist/workflow/state-tracker.d.ts +98 -0
- package/dist/workflow/state-tracker.js +424 -0
- package/dist/workflow/types.d.ts +337 -0
- package/dist/workflow/types.js +5 -0
- package/package.json +94 -0
- package/schemas/bellwether-check.schema.json +651 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Baseline save/load functionality.
|
|
3
|
+
*/
|
|
4
|
+
import { createHash } from 'crypto';
|
|
5
|
+
import { readFileSync, writeFileSync, existsSync, statSync } from 'fs';
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { computeConsensusSchemaHash } from './schema-compare.js';
|
|
8
|
+
import { getBaselineVersion, parseVersion, formatVersion, } from './version.js';
|
|
9
|
+
import { migrateBaseline, needsMigration } from './migrations.js';
|
|
10
|
+
import { analyzeResponses } from './response-fingerprint.js';
|
|
11
|
+
import { calculateMetrics, calculatePerformanceConfidence } from './performance-tracker.js';
|
|
12
|
+
import { PATTERNS, PAYLOAD_LIMITS } from '../constants.js';
|
|
13
|
+
import { getLogger } from '../logging/logger.js';
|
|
14
|
+
/**
|
|
15
|
+
* Zod schema for behavioral assertion validation.
|
|
16
|
+
*/
|
|
17
|
+
const behavioralAssertionSchema = z.object({
|
|
18
|
+
tool: z.string(),
|
|
19
|
+
aspect: z.enum([
|
|
20
|
+
'response_format',
|
|
21
|
+
'response_structure',
|
|
22
|
+
'error_handling',
|
|
23
|
+
'error_pattern',
|
|
24
|
+
'security',
|
|
25
|
+
'performance',
|
|
26
|
+
'schema',
|
|
27
|
+
'description',
|
|
28
|
+
]),
|
|
29
|
+
assertion: z.string(),
|
|
30
|
+
evidence: z.string().optional(),
|
|
31
|
+
isPositive: z.boolean(),
|
|
32
|
+
});
|
|
33
|
+
/**
|
|
34
|
+
* Zod schema for response fingerprint validation.
|
|
35
|
+
*/
|
|
36
|
+
const responseFingerprintSchema = z.object({
|
|
37
|
+
structureHash: z.string(),
|
|
38
|
+
contentType: z.enum(['text', 'object', 'array', 'primitive', 'empty', 'error', 'mixed', 'binary']),
|
|
39
|
+
fields: z.array(z.string()).optional(),
|
|
40
|
+
arrayItemStructure: z.string().optional(),
|
|
41
|
+
size: z.enum(['tiny', 'small', 'medium', 'large']),
|
|
42
|
+
isEmpty: z.boolean(),
|
|
43
|
+
sampleCount: z.number(),
|
|
44
|
+
confidence: z.number(),
|
|
45
|
+
});
|
|
46
|
+
/**
|
|
47
|
+
* Zod schema for inferred schema validation (recursive).
|
|
48
|
+
*/
|
|
49
|
+
const inferredSchemaSchema = z.lazy(() => z.object({
|
|
50
|
+
type: z.string(),
|
|
51
|
+
properties: z.record(inferredSchemaSchema).optional(),
|
|
52
|
+
items: inferredSchemaSchema.optional(),
|
|
53
|
+
required: z.array(z.string()).optional(),
|
|
54
|
+
nullable: z.boolean().optional(),
|
|
55
|
+
enum: z.array(z.unknown()).optional(),
|
|
56
|
+
}));
|
|
57
|
+
/**
|
|
58
|
+
* Zod schema for error pattern validation.
|
|
59
|
+
*/
|
|
60
|
+
const errorPatternSchema = z.object({
|
|
61
|
+
category: z.enum(['validation', 'not_found', 'permission', 'timeout', 'internal', 'unknown']),
|
|
62
|
+
patternHash: z.string(),
|
|
63
|
+
example: z.string(),
|
|
64
|
+
count: z.number(),
|
|
65
|
+
});
|
|
66
|
+
/**
|
|
67
|
+
* Zod schema for performance confidence validation.
|
|
68
|
+
*/
|
|
69
|
+
const performanceConfidenceSchema = z.object({
|
|
70
|
+
sampleCount: z.number(),
|
|
71
|
+
successfulSamples: z.number(),
|
|
72
|
+
validationSamples: z.number(),
|
|
73
|
+
totalTests: z.number(),
|
|
74
|
+
standardDeviation: z.number(),
|
|
75
|
+
coefficientOfVariation: z.number(),
|
|
76
|
+
confidenceLevel: z.enum(['low', 'medium', 'high']),
|
|
77
|
+
recommendation: z.string().optional(),
|
|
78
|
+
});
|
|
79
|
+
/**
|
|
80
|
+
* Zod schema for tool fingerprint validation.
|
|
81
|
+
*/
|
|
82
|
+
const toolFingerprintSchema = z.object({
|
|
83
|
+
name: z.string(),
|
|
84
|
+
description: z.string(),
|
|
85
|
+
schemaHash: z.string(),
|
|
86
|
+
inputSchema: z.record(z.unknown()).optional(),
|
|
87
|
+
assertions: z.array(behavioralAssertionSchema),
|
|
88
|
+
securityNotes: z.array(z.string()),
|
|
89
|
+
limitations: z.array(z.string()),
|
|
90
|
+
// Response fingerprinting fields (check mode enhancement)
|
|
91
|
+
responseFingerprint: responseFingerprintSchema.optional(),
|
|
92
|
+
inferredOutputSchema: inferredSchemaSchema.optional(),
|
|
93
|
+
errorPatterns: z.array(errorPatternSchema).optional(),
|
|
94
|
+
// Performance baseline fields
|
|
95
|
+
baselineP50Ms: z.number().optional(),
|
|
96
|
+
baselineP95Ms: z.number().optional(),
|
|
97
|
+
baselineSuccessRate: z.number().optional(),
|
|
98
|
+
performanceConfidence: performanceConfidenceSchema.optional(),
|
|
99
|
+
});
|
|
100
|
+
/**
|
|
101
|
+
* Zod schema for server fingerprint validation.
|
|
102
|
+
*/
|
|
103
|
+
const serverFingerprintSchema = z.object({
|
|
104
|
+
name: z.string(),
|
|
105
|
+
version: z.string(),
|
|
106
|
+
protocolVersion: z.string(),
|
|
107
|
+
capabilities: z.array(z.string()),
|
|
108
|
+
});
|
|
109
|
+
/**
|
|
110
|
+
* Zod schema for workflow signature validation.
|
|
111
|
+
*/
|
|
112
|
+
const workflowSignatureSchema = z.object({
|
|
113
|
+
id: z.string(),
|
|
114
|
+
name: z.string(),
|
|
115
|
+
toolSequence: z.array(z.string()),
|
|
116
|
+
succeeded: z.boolean(),
|
|
117
|
+
summary: z.string().optional(),
|
|
118
|
+
});
|
|
119
|
+
/**
|
|
120
|
+
* Zod schema for accepted diff validation.
|
|
121
|
+
*/
|
|
122
|
+
const acceptedDiffSchema = z.object({
|
|
123
|
+
toolsAdded: z.array(z.string()),
|
|
124
|
+
toolsRemoved: z.array(z.string()),
|
|
125
|
+
toolsModified: z.array(z.string()),
|
|
126
|
+
severity: z.enum(['none', 'info', 'warning', 'breaking']),
|
|
127
|
+
breakingCount: z.number(),
|
|
128
|
+
warningCount: z.number(),
|
|
129
|
+
infoCount: z.number(),
|
|
130
|
+
});
|
|
131
|
+
/**
|
|
132
|
+
* Zod schema for drift acceptance validation.
|
|
133
|
+
*/
|
|
134
|
+
const driftAcceptanceSchema = z.object({
|
|
135
|
+
acceptedAt: z.string().or(z.date()),
|
|
136
|
+
acceptedBy: z.string().optional(),
|
|
137
|
+
reason: z.string().optional(),
|
|
138
|
+
acceptedDiff: acceptedDiffSchema,
|
|
139
|
+
});
|
|
140
|
+
/**
|
|
141
|
+
* Zod schema for baseline validation.
|
|
142
|
+
* Validates untrusted JSON to prevent injection attacks.
|
|
143
|
+
*
|
|
144
|
+
* Version can be:
|
|
145
|
+
* - A semver string like "1.0.0" (current format)
|
|
146
|
+
* - A legacy number like 1 (old format, will be migrated)
|
|
147
|
+
*/
|
|
148
|
+
const baselineSchema = z.object({
|
|
149
|
+
version: z.union([
|
|
150
|
+
z.string().regex(PATTERNS.SEMVER, 'Version must be semver format (e.g., "1.0.0")'),
|
|
151
|
+
z.number().int().positive(), // Legacy format support
|
|
152
|
+
]),
|
|
153
|
+
createdAt: z.string().or(z.date()),
|
|
154
|
+
mode: z.enum(['check']).optional(),
|
|
155
|
+
serverCommand: z.string(),
|
|
156
|
+
server: serverFingerprintSchema,
|
|
157
|
+
tools: z.array(toolFingerprintSchema),
|
|
158
|
+
summary: z.string(),
|
|
159
|
+
assertions: z.array(behavioralAssertionSchema),
|
|
160
|
+
workflowSignatures: z.array(workflowSignatureSchema).optional(),
|
|
161
|
+
integrityHash: z.string(),
|
|
162
|
+
acceptance: driftAcceptanceSchema.optional(),
|
|
163
|
+
});
|
|
164
|
+
/**
|
|
165
|
+
* Create a behavioral baseline from interview results.
|
|
166
|
+
*
|
|
167
|
+
* Baselines can only be created from check mode results.
|
|
168
|
+
* Explore mode results are for documentation only.
|
|
169
|
+
*/
|
|
170
|
+
export function createBaseline(result, serverCommand) {
|
|
171
|
+
// Baselines are always check mode
|
|
172
|
+
const effectiveMode = 'check';
|
|
173
|
+
const server = createServerFingerprint(result);
|
|
174
|
+
// Create a map of tool name -> inputSchema from discovery
|
|
175
|
+
const schemaMap = new Map();
|
|
176
|
+
for (const tool of result.discovery.tools) {
|
|
177
|
+
if (tool.inputSchema) {
|
|
178
|
+
schemaMap.set(tool.name, tool.inputSchema);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
const tools = result.toolProfiles.map(profile => createToolFingerprint(profile, schemaMap.get(profile.name)));
|
|
182
|
+
const assertions = extractAssertions(result);
|
|
183
|
+
const workflowSignatures = extractWorkflowSignatures(result);
|
|
184
|
+
const baselineData = {
|
|
185
|
+
version: getBaselineVersion(),
|
|
186
|
+
createdAt: new Date(),
|
|
187
|
+
mode: effectiveMode,
|
|
188
|
+
serverCommand,
|
|
189
|
+
server,
|
|
190
|
+
tools,
|
|
191
|
+
summary: result.summary,
|
|
192
|
+
assertions,
|
|
193
|
+
workflowSignatures,
|
|
194
|
+
};
|
|
195
|
+
// Calculate integrity hash
|
|
196
|
+
const integrityHash = calculateIntegrityHash(baselineData);
|
|
197
|
+
return {
|
|
198
|
+
...baselineData,
|
|
199
|
+
integrityHash,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Save baseline to a file.
|
|
204
|
+
*/
|
|
205
|
+
export function saveBaseline(baseline, path) {
|
|
206
|
+
const serialized = JSON.stringify(baseline, null, 2);
|
|
207
|
+
writeFileSync(path, serialized, 'utf-8');
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Load baseline from a file.
|
|
211
|
+
* Validates against Zod schema to prevent malicious JSON injection.
|
|
212
|
+
*
|
|
213
|
+
* @param path - Path to the baseline file
|
|
214
|
+
* @param options - Load options
|
|
215
|
+
* @returns Loaded baseline (migrated to current version if needed)
|
|
216
|
+
*/
|
|
217
|
+
export function loadBaseline(path, options = {}) {
|
|
218
|
+
const { migrate = true, skipIntegrityCheck = false } = options;
|
|
219
|
+
if (!existsSync(path)) {
|
|
220
|
+
throw new Error(`Baseline file not found: ${path}`);
|
|
221
|
+
}
|
|
222
|
+
const content = readFileSync(path, 'utf-8');
|
|
223
|
+
// Check file size to prevent resource exhaustion
|
|
224
|
+
const contentSize = Buffer.byteLength(content, 'utf-8');
|
|
225
|
+
if (contentSize > PAYLOAD_LIMITS.MAX_BASELINE_SIZE) {
|
|
226
|
+
const sizeMB = (contentSize / (1024 * 1024)).toFixed(2);
|
|
227
|
+
const limitMB = (PAYLOAD_LIMITS.MAX_BASELINE_SIZE / (1024 * 1024)).toFixed(0);
|
|
228
|
+
throw new Error(`Baseline file too large: ${sizeMB}MB exceeds limit of ${limitMB}MB. ` +
|
|
229
|
+
`File may be corrupted or contain excessive data.`);
|
|
230
|
+
}
|
|
231
|
+
let parsed;
|
|
232
|
+
try {
|
|
233
|
+
parsed = JSON.parse(content);
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
throw new Error(`Invalid JSON in baseline file ${path}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
237
|
+
}
|
|
238
|
+
// Validate against schema to prevent malicious JSON
|
|
239
|
+
const result = baselineSchema.safeParse(parsed);
|
|
240
|
+
if (!result.success) {
|
|
241
|
+
const issues = result.error.issues.map((issue) => {
|
|
242
|
+
const fieldPath = issue.path.join('.');
|
|
243
|
+
return ` - ${fieldPath}: ${issue.message}`;
|
|
244
|
+
});
|
|
245
|
+
throw new Error(`Invalid baseline format in ${path}:\n${issues.join('\n')}`);
|
|
246
|
+
}
|
|
247
|
+
let baseline = result.data;
|
|
248
|
+
// Check if migration is needed
|
|
249
|
+
if (needsMigration(baseline)) {
|
|
250
|
+
const currentVersion = parseVersion(baseline.version);
|
|
251
|
+
if (migrate) {
|
|
252
|
+
// Automatically migrate to current version
|
|
253
|
+
baseline = migrateBaseline(baseline);
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
// Log warning but continue with the old format
|
|
257
|
+
getLogger('baseline').warn(`Baseline uses older CLI version ${formatVersion(currentVersion.raw)}. ` +
|
|
258
|
+
`Current CLI version is ${formatVersion(getBaselineVersion())}. ` +
|
|
259
|
+
`Run \`bellwether baseline migrate\` to upgrade.`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
const typedBaseline = baseline;
|
|
263
|
+
// Restore Date objects
|
|
264
|
+
typedBaseline.createdAt = new Date(typedBaseline.createdAt);
|
|
265
|
+
if (typedBaseline.acceptance?.acceptedAt) {
|
|
266
|
+
typedBaseline.acceptance.acceptedAt = new Date(typedBaseline.acceptance.acceptedAt);
|
|
267
|
+
}
|
|
268
|
+
// Verify integrity (unless skipped or just migrated)
|
|
269
|
+
if (!skipIntegrityCheck && !needsMigration(result.data)) {
|
|
270
|
+
if (!verifyIntegrity(typedBaseline)) {
|
|
271
|
+
throw new Error('Baseline integrity check failed - file may have been modified');
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return typedBaseline;
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Verify baseline integrity.
|
|
278
|
+
*/
|
|
279
|
+
export function verifyIntegrity(baseline) {
|
|
280
|
+
const { integrityHash, ...rest } = baseline;
|
|
281
|
+
const expectedHash = calculateIntegrityHash(rest);
|
|
282
|
+
return integrityHash === expectedHash;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Recalculate and update the integrity hash for a baseline.
|
|
286
|
+
* Useful after migration or manual modifications.
|
|
287
|
+
*/
|
|
288
|
+
export function recalculateIntegrityHash(baseline) {
|
|
289
|
+
const integrityHash = calculateIntegrityHash(baseline);
|
|
290
|
+
return {
|
|
291
|
+
...baseline,
|
|
292
|
+
integrityHash,
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Create server fingerprint from discovery result.
|
|
297
|
+
*/
|
|
298
|
+
function createServerFingerprint(result) {
|
|
299
|
+
const { discovery } = result;
|
|
300
|
+
const capabilities = [];
|
|
301
|
+
if (discovery.capabilities.tools)
|
|
302
|
+
capabilities.push('tools');
|
|
303
|
+
if (discovery.capabilities.prompts)
|
|
304
|
+
capabilities.push('prompts');
|
|
305
|
+
if (discovery.capabilities.resources)
|
|
306
|
+
capabilities.push('resources');
|
|
307
|
+
if (discovery.capabilities.logging)
|
|
308
|
+
capabilities.push('logging');
|
|
309
|
+
return {
|
|
310
|
+
name: discovery.serverInfo.name,
|
|
311
|
+
version: discovery.serverInfo.version,
|
|
312
|
+
protocolVersion: discovery.protocolVersion,
|
|
313
|
+
capabilities,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Create tool fingerprint from tool profile.
|
|
318
|
+
* Includes response fingerprinting for enhanced structural drift detection.
|
|
319
|
+
*/
|
|
320
|
+
function createToolFingerprint(profile, inputSchema) {
|
|
321
|
+
const assertions = extractToolAssertions(profile);
|
|
322
|
+
// Compute schema hash from all interactions (not just first)
|
|
323
|
+
// This includes argument types and infers schema from actual values
|
|
324
|
+
const interactions = profile.interactions.map(i => ({ args: i.question.args }));
|
|
325
|
+
const { hash: schemaHash } = computeConsensusSchemaHash(interactions);
|
|
326
|
+
// Analyze responses to create fingerprint (check mode enhancement)
|
|
327
|
+
const responseData = profile.interactions
|
|
328
|
+
.filter(i => !i.mocked)
|
|
329
|
+
.map(i => ({
|
|
330
|
+
response: i.response,
|
|
331
|
+
error: i.error,
|
|
332
|
+
}));
|
|
333
|
+
const responseAnalysis = analyzeResponses(responseData);
|
|
334
|
+
// Calculate performance metrics from interactions
|
|
335
|
+
const latencySamples = profile.interactions
|
|
336
|
+
.filter(i => i.toolExecutionMs !== undefined && !i.mocked)
|
|
337
|
+
.map(i => ({
|
|
338
|
+
toolName: profile.name,
|
|
339
|
+
durationMs: i.toolExecutionMs ?? 0,
|
|
340
|
+
success: !i.error && !i.response?.isError,
|
|
341
|
+
timestamp: new Date(),
|
|
342
|
+
expectedOutcome: i.question.expectedOutcome,
|
|
343
|
+
outcomeCorrect: i.outcomeAssessment?.correct,
|
|
344
|
+
}));
|
|
345
|
+
let baselineP50Ms;
|
|
346
|
+
let baselineP95Ms;
|
|
347
|
+
let baselineSuccessRate;
|
|
348
|
+
if (latencySamples.length > 0) {
|
|
349
|
+
const metrics = calculateMetrics(latencySamples);
|
|
350
|
+
if (metrics) {
|
|
351
|
+
baselineP50Ms = metrics.p50Ms;
|
|
352
|
+
baselineP95Ms = metrics.p95Ms;
|
|
353
|
+
baselineSuccessRate = metrics.successRate;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// Calculate performance confidence (sample count + coefficient of variation)
|
|
357
|
+
const performanceConfidence = calculatePerformanceConfidence(latencySamples);
|
|
358
|
+
return {
|
|
359
|
+
name: profile.name,
|
|
360
|
+
description: profile.description,
|
|
361
|
+
schemaHash,
|
|
362
|
+
inputSchema,
|
|
363
|
+
assertions,
|
|
364
|
+
securityNotes: [...profile.securityNotes],
|
|
365
|
+
limitations: [...profile.limitations],
|
|
366
|
+
// Response fingerprinting
|
|
367
|
+
responseFingerprint: responseAnalysis.fingerprint,
|
|
368
|
+
inferredOutputSchema: responseAnalysis.inferredSchema,
|
|
369
|
+
errorPatterns: responseAnalysis.errorPatterns.length > 0
|
|
370
|
+
? responseAnalysis.errorPatterns
|
|
371
|
+
: undefined,
|
|
372
|
+
// Performance baseline
|
|
373
|
+
baselineP50Ms,
|
|
374
|
+
baselineP95Ms,
|
|
375
|
+
baselineSuccessRate,
|
|
376
|
+
performanceConfidence,
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Extract behavioral assertions from a tool profile.
|
|
381
|
+
*/
|
|
382
|
+
function extractToolAssertions(profile) {
|
|
383
|
+
const assertions = [];
|
|
384
|
+
// Convert behavioral notes to assertions
|
|
385
|
+
for (const note of profile.behavioralNotes) {
|
|
386
|
+
assertions.push({
|
|
387
|
+
tool: profile.name,
|
|
388
|
+
aspect: 'response_format',
|
|
389
|
+
assertion: note,
|
|
390
|
+
isPositive: true,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
// Convert limitations to negative assertions
|
|
394
|
+
for (const limitation of profile.limitations) {
|
|
395
|
+
assertions.push({
|
|
396
|
+
tool: profile.name,
|
|
397
|
+
aspect: 'error_handling',
|
|
398
|
+
assertion: limitation,
|
|
399
|
+
isPositive: false,
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
// Convert security notes to security assertions
|
|
403
|
+
for (const secNote of profile.securityNotes) {
|
|
404
|
+
assertions.push({
|
|
405
|
+
tool: profile.name,
|
|
406
|
+
aspect: 'security',
|
|
407
|
+
assertion: secNote,
|
|
408
|
+
isPositive: !secNote.toLowerCase().includes('risk') &&
|
|
409
|
+
!secNote.toLowerCase().includes('vulnerab') &&
|
|
410
|
+
!secNote.toLowerCase().includes('dangerous'),
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
return assertions;
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* Extract all assertions from interview result.
|
|
417
|
+
*/
|
|
418
|
+
function extractAssertions(result) {
|
|
419
|
+
const assertions = [];
|
|
420
|
+
// Extract from each tool
|
|
421
|
+
for (const profile of result.toolProfiles) {
|
|
422
|
+
assertions.push(...extractToolAssertions(profile));
|
|
423
|
+
}
|
|
424
|
+
// Add overall limitations as assertions
|
|
425
|
+
for (const limitation of result.limitations) {
|
|
426
|
+
assertions.push({
|
|
427
|
+
tool: 'server',
|
|
428
|
+
aspect: 'error_handling',
|
|
429
|
+
assertion: limitation,
|
|
430
|
+
isPositive: false,
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
return assertions;
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Extract workflow signatures from interview result.
|
|
437
|
+
*/
|
|
438
|
+
function extractWorkflowSignatures(result) {
|
|
439
|
+
if (!result.workflowResults || result.workflowResults.length === 0) {
|
|
440
|
+
return [];
|
|
441
|
+
}
|
|
442
|
+
return result.workflowResults.map((wr) => ({
|
|
443
|
+
id: wr.workflow.id,
|
|
444
|
+
name: wr.workflow.name,
|
|
445
|
+
toolSequence: wr.workflow.steps.map((s) => s.tool),
|
|
446
|
+
succeeded: wr.success,
|
|
447
|
+
summary: wr.summary,
|
|
448
|
+
}));
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* Calculate integrity hash for baseline data.
|
|
452
|
+
*/
|
|
453
|
+
function calculateIntegrityHash(data) {
|
|
454
|
+
// Create a deterministic representation
|
|
455
|
+
const normalized = JSON.stringify(data, (_key, value) => {
|
|
456
|
+
// Normalize dates to ISO strings for consistent hashing
|
|
457
|
+
if (value instanceof Date) {
|
|
458
|
+
return value.toISOString();
|
|
459
|
+
}
|
|
460
|
+
return value;
|
|
461
|
+
});
|
|
462
|
+
return hashString(normalized);
|
|
463
|
+
}
|
|
464
|
+
/**
|
|
465
|
+
* Create a SHA-256 hash of a string.
|
|
466
|
+
*/
|
|
467
|
+
function hashString(input) {
|
|
468
|
+
return createHash('sha256').update(input).digest('hex').slice(0, 16);
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Check if a baseline file exists.
|
|
472
|
+
* Returns false for directories - baselines must be files.
|
|
473
|
+
*/
|
|
474
|
+
export function baselineExists(path) {
|
|
475
|
+
if (!existsSync(path)) {
|
|
476
|
+
return false;
|
|
477
|
+
}
|
|
478
|
+
try {
|
|
479
|
+
return statSync(path).isFile();
|
|
480
|
+
}
|
|
481
|
+
catch {
|
|
482
|
+
return false;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Accept drift by updating a baseline with drift acceptance metadata.
|
|
487
|
+
*
|
|
488
|
+
* This marks the current state of the server as the new expected baseline,
|
|
489
|
+
* acknowledging that the detected changes were intentional.
|
|
490
|
+
*
|
|
491
|
+
* @param currentBaseline - The new baseline from the current server state
|
|
492
|
+
* @param diff - The diff that is being accepted
|
|
493
|
+
* @param options - Acceptance options (reason, acceptedBy)
|
|
494
|
+
* @returns The baseline with acceptance metadata attached
|
|
495
|
+
*/
|
|
496
|
+
export function acceptDrift(currentBaseline, diff, options = {}) {
|
|
497
|
+
// Create the accepted diff snapshot
|
|
498
|
+
const acceptedDiff = {
|
|
499
|
+
toolsAdded: [...diff.toolsAdded],
|
|
500
|
+
toolsRemoved: [...diff.toolsRemoved],
|
|
501
|
+
toolsModified: diff.toolsModified.map(t => t.tool),
|
|
502
|
+
severity: diff.severity,
|
|
503
|
+
breakingCount: diff.breakingCount,
|
|
504
|
+
warningCount: diff.warningCount,
|
|
505
|
+
infoCount: diff.infoCount,
|
|
506
|
+
};
|
|
507
|
+
// Create acceptance metadata
|
|
508
|
+
const acceptance = {
|
|
509
|
+
acceptedAt: new Date(),
|
|
510
|
+
acceptedBy: options.acceptedBy,
|
|
511
|
+
reason: options.reason,
|
|
512
|
+
acceptedDiff,
|
|
513
|
+
};
|
|
514
|
+
// Create new baseline with acceptance metadata
|
|
515
|
+
const baselineWithAcceptance = {
|
|
516
|
+
version: currentBaseline.version,
|
|
517
|
+
createdAt: currentBaseline.createdAt,
|
|
518
|
+
mode: currentBaseline.mode,
|
|
519
|
+
serverCommand: currentBaseline.serverCommand,
|
|
520
|
+
server: currentBaseline.server,
|
|
521
|
+
tools: currentBaseline.tools,
|
|
522
|
+
summary: currentBaseline.summary,
|
|
523
|
+
assertions: currentBaseline.assertions,
|
|
524
|
+
workflowSignatures: currentBaseline.workflowSignatures,
|
|
525
|
+
acceptance,
|
|
526
|
+
};
|
|
527
|
+
// Recalculate integrity hash with acceptance metadata
|
|
528
|
+
const integrityHash = calculateIntegrityHash(baselineWithAcceptance);
|
|
529
|
+
return {
|
|
530
|
+
...baselineWithAcceptance,
|
|
531
|
+
integrityHash,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Check if a baseline has acceptance metadata.
|
|
536
|
+
*/
|
|
537
|
+
export function hasAcceptance(baseline) {
|
|
538
|
+
return baseline.acceptance !== undefined;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Clear acceptance metadata from a baseline.
|
|
542
|
+
* Useful when re-running checks after the accepted changes are no longer relevant.
|
|
543
|
+
* Returns a new baseline without acceptance, with recalculated integrity hash.
|
|
544
|
+
*/
|
|
545
|
+
export function clearAcceptance(baseline) {
|
|
546
|
+
// Destructure to exclude acceptance (and integrityHash which needs recalculating)
|
|
547
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
548
|
+
const { acceptance: _removed, integrityHash: _oldHash, ...baselineWithoutAcceptance } = baseline;
|
|
549
|
+
return {
|
|
550
|
+
...baselineWithoutAcceptance,
|
|
551
|
+
integrityHash: calculateIntegrityHash(baselineWithoutAcceptance),
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
//# sourceMappingURL=saver.js.map
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-generated test scenario generation from schema analysis.
|
|
3
|
+
*
|
|
4
|
+
* This module analyzes tool schemas and generates comprehensive test scenarios
|
|
5
|
+
* covering happy paths, edge cases, error handling, and security testing.
|
|
6
|
+
*/
|
|
7
|
+
import type { ToolFingerprint, BehavioralBaseline } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Category of test scenario.
|
|
10
|
+
*/
|
|
11
|
+
export type ScenarioCategory = 'happy_path' | 'edge_cases' | 'error_handling' | 'security';
|
|
12
|
+
/**
|
|
13
|
+
* Priority level for test scenarios.
|
|
14
|
+
*/
|
|
15
|
+
export type ScenarioPriority = 'critical' | 'high' | 'medium' | 'low';
|
|
16
|
+
/**
|
|
17
|
+
* A single test scenario for a tool.
|
|
18
|
+
*/
|
|
19
|
+
export interface TestScenario {
|
|
20
|
+
/** Unique identifier for the scenario */
|
|
21
|
+
id: string;
|
|
22
|
+
/** Name of the tool this scenario tests */
|
|
23
|
+
toolName: string;
|
|
24
|
+
/** Category of test scenario */
|
|
25
|
+
category: ScenarioCategory;
|
|
26
|
+
/** Human-readable description of what is being tested */
|
|
27
|
+
description: string;
|
|
28
|
+
/** The input to provide to the tool */
|
|
29
|
+
input: Record<string, unknown>;
|
|
30
|
+
/** Expected behavior or assertion */
|
|
31
|
+
expectedBehavior: string;
|
|
32
|
+
/** Priority level for execution order */
|
|
33
|
+
priority: ScenarioPriority;
|
|
34
|
+
/** Tags for filtering/grouping */
|
|
35
|
+
tags: string[];
|
|
36
|
+
/** Why this scenario was generated */
|
|
37
|
+
rationale: string;
|
|
38
|
+
/** Parameter(s) being tested */
|
|
39
|
+
targetParameter?: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Collection of auto-generated test scenarios for a tool.
|
|
43
|
+
*/
|
|
44
|
+
export interface AutoGeneratedScenarios {
|
|
45
|
+
/** Tool being tested */
|
|
46
|
+
toolName: string;
|
|
47
|
+
/** Tool description for context */
|
|
48
|
+
toolDescription: string;
|
|
49
|
+
/** Success/valid input scenarios */
|
|
50
|
+
happyPath: TestScenario[];
|
|
51
|
+
/** Boundary conditions and unusual inputs */
|
|
52
|
+
edgeCases: TestScenario[];
|
|
53
|
+
/** Invalid input and error scenarios */
|
|
54
|
+
errorCases: TestScenario[];
|
|
55
|
+
/** Security-focused test cases */
|
|
56
|
+
securityTests: TestScenario[];
|
|
57
|
+
/** Total coverage percentage estimate */
|
|
58
|
+
coverageEstimate: number;
|
|
59
|
+
/** Parameters with generated scenarios */
|
|
60
|
+
coveredParameters: string[];
|
|
61
|
+
/** Parameters without scenarios (may need manual tests) */
|
|
62
|
+
uncoveredParameters: string[];
|
|
63
|
+
/** When scenarios were generated */
|
|
64
|
+
generatedAt: Date;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Summary of generated scenarios for an entire baseline.
|
|
68
|
+
*/
|
|
69
|
+
export interface ScenarioGenerationSummary {
|
|
70
|
+
/** Total tools processed */
|
|
71
|
+
toolsProcessed: number;
|
|
72
|
+
/** Tools that got scenarios generated */
|
|
73
|
+
toolsWithScenarios: number;
|
|
74
|
+
/** Tools skipped (no schema, etc.) */
|
|
75
|
+
toolsSkipped: number;
|
|
76
|
+
/** Total scenarios generated */
|
|
77
|
+
totalScenarios: number;
|
|
78
|
+
/** Breakdown by category */
|
|
79
|
+
scenariosByCategory: Record<ScenarioCategory, number>;
|
|
80
|
+
/** Breakdown by priority */
|
|
81
|
+
scenariosByPriority: Record<ScenarioPriority, number>;
|
|
82
|
+
/** Average coverage estimate across tools */
|
|
83
|
+
averageCoverage: number;
|
|
84
|
+
/** Tools with low coverage that need attention */
|
|
85
|
+
lowCoverageTools: string[];
|
|
86
|
+
/** Generated timestamp */
|
|
87
|
+
generatedAt: Date;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Result of scenario generation for a baseline.
|
|
91
|
+
*/
|
|
92
|
+
export interface ScenarioGenerationResult {
|
|
93
|
+
/** Per-tool scenario collections */
|
|
94
|
+
scenarios: AutoGeneratedScenarios[];
|
|
95
|
+
/** Overall summary */
|
|
96
|
+
summary: ScenarioGenerationSummary;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Configuration for scenario generation.
|
|
100
|
+
*/
|
|
101
|
+
export interface ScenarioGenerationConfig {
|
|
102
|
+
/** Maximum happy path scenarios per tool */
|
|
103
|
+
maxHappyPath?: number;
|
|
104
|
+
/** Maximum edge case scenarios per tool */
|
|
105
|
+
maxEdgeCases?: number;
|
|
106
|
+
/** Maximum error case scenarios per tool */
|
|
107
|
+
maxErrorCases?: number;
|
|
108
|
+
/** Maximum security scenarios per tool */
|
|
109
|
+
maxSecurityScenarios?: number;
|
|
110
|
+
/** Minimum coverage to aim for */
|
|
111
|
+
minCoverage?: number;
|
|
112
|
+
/** Categories to generate (defaults to all) */
|
|
113
|
+
categories?: ScenarioCategory[];
|
|
114
|
+
/** Specific tools to generate for (defaults to all) */
|
|
115
|
+
tools?: string[];
|
|
116
|
+
/** Include security payloads for injection testing */
|
|
117
|
+
includeSecurityPayloads?: boolean;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Generate test scenarios for a single tool.
|
|
121
|
+
*/
|
|
122
|
+
export declare function generateToolScenarios(tool: ToolFingerprint, config?: ScenarioGenerationConfig): AutoGeneratedScenarios;
|
|
123
|
+
/**
|
|
124
|
+
* Generate test scenarios for all tools in a baseline.
|
|
125
|
+
*/
|
|
126
|
+
export declare function generateBaselineScenarios(baseline: BehavioralBaseline, config?: ScenarioGenerationConfig): ScenarioGenerationResult;
|
|
127
|
+
/**
|
|
128
|
+
* Format scenarios as YAML for test execution.
|
|
129
|
+
*/
|
|
130
|
+
export declare function formatScenariosAsYaml(result: ScenarioGenerationResult): string;
|
|
131
|
+
/**
|
|
132
|
+
* Format scenarios as a human-readable report.
|
|
133
|
+
*/
|
|
134
|
+
export declare function formatScenariosReport(result: ScenarioGenerationResult): string;
|
|
135
|
+
/**
|
|
136
|
+
* Get scenarios filtered by priority.
|
|
137
|
+
*/
|
|
138
|
+
export declare function getScenariosByPriority(result: ScenarioGenerationResult, priority: ScenarioPriority): TestScenario[];
|
|
139
|
+
/**
|
|
140
|
+
* Get scenarios filtered by category.
|
|
141
|
+
*/
|
|
142
|
+
export declare function getScenariosByCategory(result: ScenarioGenerationResult, category: ScenarioCategory): TestScenario[];
|
|
143
|
+
/**
|
|
144
|
+
* Get critical scenarios for smoke testing.
|
|
145
|
+
*/
|
|
146
|
+
export declare function getCriticalScenarios(result: ScenarioGenerationResult): TestScenario[];
|
|
147
|
+
/**
|
|
148
|
+
* Get security-focused scenarios.
|
|
149
|
+
*/
|
|
150
|
+
export declare function getSecurityScenarios(result: ScenarioGenerationResult): TestScenario[];
|
|
151
|
+
//# sourceMappingURL=scenario-generator.d.ts.map
|