@dotsetlabs/bellwether 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/README.md +22 -2
- package/dist/cli/commands/check.js +11 -3
- package/dist/cli/commands/discover.js +1 -0
- package/dist/cli/commands/explore.js +11 -3
- package/dist/config/loader.d.ts +14 -0
- package/dist/config/loader.js +59 -0
- package/dist/config/validator.d.ts +18 -18
- package/dist/constants/testing.d.ts +64 -0
- package/dist/constants/testing.js +149 -25
- package/dist/interview/schema-test-generator.js +320 -24
- package/dist/interview/types.d.ts +23 -0
- package/dist/transport/http-transport.d.ts +6 -2
- package/dist/transport/http-transport.js +23 -9
- package/dist/transport/mcp-client.d.ts +11 -0
- package/dist/transport/mcp-client.js +98 -1
- package/dist/transport/types.d.ts +20 -2
- package/dist/validation/semantic-test-generator.d.ts +7 -0
- package/dist/validation/semantic-test-generator.js +13 -4
- package/dist/version.js +1 -1
- package/package.json +1 -1
- package/schemas/bellwether-check.schema.json +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.12.0] - 2026-01-26
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
- **Streamable HTTP transport improvements**: Full compliance with [MCP Streamable HTTP specification](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports)
|
|
10
|
+
- Fixed Accept header to include both `application/json` and `text/event-stream` as required by spec
|
|
11
|
+
- Added automatic session ID capture from `Mcp-Session-Id` response header
|
|
12
|
+
- Session ID is automatically included in all subsequent requests after initialization
|
|
13
|
+
- Changed header name from `X-Session-Id` to `Mcp-Session-Id` per MCP specification
|
|
14
|
+
- **False positive reduction**: Intelligent pattern detection to reduce false positives in automated testing
|
|
15
|
+
- **Operation-based tool detection**: Tools with `operation` enum + `args` object patterns now use flexible `either` outcome
|
|
16
|
+
- **Self-stateful tool detection**: Tools requiring prior state (session/chain/context) are handled appropriately
|
|
17
|
+
- **Complex array schema detection**: Arrays with nested objects containing required properties use flexible validation
|
|
18
|
+
- **Flexible semantic validation**: Semantic type tests now use `either` outcome by default, allowing tools to accept varied formats (e.g., dayjs, date-fns)
|
|
19
|
+
- **Pattern detection metadata**: Test metadata now includes detection flags for transparency
|
|
20
|
+
- `operationBased`, `operationParam`, `argsParam` for operation-based tools
|
|
21
|
+
- `selfStateful`, `selfStatefulReason` for stateful tools
|
|
22
|
+
- `hasComplexArrays`, `complexArrayParams` for complex schema tools
|
|
23
|
+
|
|
24
|
+
### Configuration
|
|
25
|
+
|
|
26
|
+
- **New semantic validation option**: `check.flexibleSemanticTests` (default: `true`)
|
|
27
|
+
- When `true`, semantic validation tests use `either` outcome
|
|
28
|
+
- Set to `false` for strict format enforcement
|
|
29
|
+
|
|
30
|
+
### Documentation
|
|
31
|
+
|
|
32
|
+
- Updated remote-servers guide with correct streamable-http protocol details
|
|
33
|
+
- Added MCP specification link for transport documentation
|
|
34
|
+
- Clarified session ID behavior and Accept header requirements
|
|
35
|
+
|
|
36
|
+
### Fixes
|
|
37
|
+
|
|
38
|
+
- **Streamable HTTP session management**: Fixed session ID header to use MCP-compliant `Mcp-Session-Id`
|
|
39
|
+
- **False positive tests**: Tests for operation-based, self-stateful, and complex array patterns no longer fail incorrectly
|
|
40
|
+
|
|
41
|
+
### Tests
|
|
42
|
+
|
|
43
|
+
- Added 17 HTTP transport tests including session ID capture verification
|
|
44
|
+
- Added 11 new pattern detection tests for false positive reduction
|
|
45
|
+
|
|
5
46
|
## [0.11.0] - 2026-01-26
|
|
6
47
|
|
|
7
48
|
### Breaking Changes
|
package/README.md
CHANGED
|
@@ -245,6 +245,26 @@ bellwether golden save --tool my_tool --args '{"id":"123"}'
|
|
|
245
245
|
bellwether golden compare
|
|
246
246
|
```
|
|
247
247
|
|
|
248
|
+
### Server Command Options
|
|
249
|
+
|
|
250
|
+
Server commands can be specified in several ways:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
# As separate command and args
|
|
254
|
+
bellwether check npx @mcp/server
|
|
255
|
+
|
|
256
|
+
# With flags for the server command (flags pass through automatically)
|
|
257
|
+
bellwether check npx -y @mcp/server
|
|
258
|
+
|
|
259
|
+
# In config as a command string (auto-parsed)
|
|
260
|
+
# bellwether.yaml:
|
|
261
|
+
# server:
|
|
262
|
+
# command: "npx -y @mcp/server"
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
For complex commands with flags, the CLI automatically parses command strings with spaces
|
|
266
|
+
and handles server flags like `-y` correctly without requiring `--` separators.
|
|
267
|
+
|
|
248
268
|
### Baseline Commands
|
|
249
269
|
|
|
250
270
|
```bash
|
|
@@ -674,14 +694,14 @@ Use with: `bellwether init --preset <name> npx @mcp/server`
|
|
|
674
694
|
|
|
675
695
|
```yaml
|
|
676
696
|
- name: Detect Behavioral Drift
|
|
677
|
-
uses: dotsetlabs/bellwether
|
|
697
|
+
uses: dotsetlabs/bellwether@v1
|
|
678
698
|
with:
|
|
679
699
|
server-command: 'npx @mcp/your-server'
|
|
680
700
|
baseline-path: './bellwether-baseline.json'
|
|
681
701
|
fail-on-severity: 'warning'
|
|
682
702
|
```
|
|
683
703
|
|
|
684
|
-
See [
|
|
704
|
+
See the [CI/CD Integration guide](https://bellwether.sh/guides/ci-cd) for full documentation.
|
|
685
705
|
|
|
686
706
|
## Environment Variables
|
|
687
707
|
|
|
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
|
|
|
13
13
|
import { discover } from '../../discovery/discovery.js';
|
|
14
14
|
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
15
|
import { generateContractMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
-
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
|
|
17
17
|
import { validateConfigForCheck, getConfigWarnings } from '../../config/validator.js';
|
|
18
18
|
import { createBaseline, loadBaseline, saveBaseline, getToolFingerprints, toToolCapability, compareBaselines, acceptDrift, formatDiffText, formatDiffJson, formatDiffCompact, formatDiffGitHubActions, formatDiffMarkdown, formatDiffJUnit, formatDiffSarif, applySeverityConfig, shouldFailOnDiff, analyzeForIncremental, formatIncrementalSummary, runSecurityTests, parseSecurityCategories, getAllSecurityCategories, } from '../../baseline/index.js';
|
|
19
19
|
import { convertAssertions } from '../../baseline/converter.js';
|
|
@@ -29,6 +29,7 @@ import { configureLogger } from '../../logging/logger.js';
|
|
|
29
29
|
import { EXIT_CODES, SEVERITY_TO_EXIT_CODE, PATHS, SECURITY_TESTING, CHECK_SAMPLING, WORKFLOW, REPORT_SCHEMAS, } from '../../constants.js';
|
|
30
30
|
export const checkCommand = new Command('check')
|
|
31
31
|
.description('Check MCP server schema and detect drift (free, fast, deterministic)')
|
|
32
|
+
.allowUnknownOption() // Allow server flags like -y for npx to pass through
|
|
32
33
|
.argument('[server-command]', 'Server command (overrides config)')
|
|
33
34
|
.argument('[args...]', 'Server arguments')
|
|
34
35
|
.option('-c, --config <path>', 'Path to config file', PATHS.DEFAULT_CONFIG_FILENAME)
|
|
@@ -52,8 +53,15 @@ export const checkCommand = new Command('check')
|
|
|
52
53
|
throw error;
|
|
53
54
|
}
|
|
54
55
|
// Determine server command (CLI arg overrides config)
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
// If command string contains spaces and no separate args, parse it
|
|
57
|
+
let serverCommand = serverCommandArg || config.server.command;
|
|
58
|
+
let args = serverArgs.length > 0 ? serverArgs : config.server.args;
|
|
59
|
+
// Handle command strings like "npx @package" in config when args is empty
|
|
60
|
+
if (!serverCommandArg && args.length === 0 && serverCommand.includes(' ')) {
|
|
61
|
+
const parsed = parseCommandString(serverCommand);
|
|
62
|
+
serverCommand = parsed.command;
|
|
63
|
+
args = parsed.args;
|
|
64
|
+
}
|
|
57
65
|
const transport = config.server.transport ?? 'stdio';
|
|
58
66
|
const remoteUrl = config.server.url?.trim();
|
|
59
67
|
const remoteSessionId = config.server.sessionId?.trim();
|
|
@@ -92,6 +92,7 @@ async function discoverAction(command, args, options) {
|
|
|
92
92
|
}
|
|
93
93
|
export const discoverCommand = new Command('discover')
|
|
94
94
|
.description('Discover MCP server capabilities (tools, prompts, resources)')
|
|
95
|
+
.allowUnknownOption() // Allow server flags like -y for npx to pass through
|
|
95
96
|
.argument('[command]', 'Command to start the MCP server (not required for remote)')
|
|
96
97
|
.argument('[args...]', 'Arguments to pass to the server')
|
|
97
98
|
.option('-c, --config <path>', 'Path to config file')
|
|
@@ -13,7 +13,7 @@ import { MCPClient } from '../../transport/mcp-client.js';
|
|
|
13
13
|
import { discover } from '../../discovery/discovery.js';
|
|
14
14
|
import { Interviewer } from '../../interview/interviewer.js';
|
|
15
15
|
import { generateAgentsMd, generateJsonReport } from '../../docs/generator.js';
|
|
16
|
-
import { loadConfig, ConfigNotFoundError } from '../../config/loader.js';
|
|
16
|
+
import { loadConfig, ConfigNotFoundError, parseCommandString } from '../../config/loader.js';
|
|
17
17
|
import { validateConfigForExplore } from '../../config/validator.js';
|
|
18
18
|
import { CostTracker, estimateInterviewCost, estimateInterviewTime, formatCostAndTimeEstimate, suggestOptimizations, formatOptimizationSuggestions, } from '../../cost/index.js';
|
|
19
19
|
import { getMetricsCollector, resetMetricsCollector } from '../../metrics/collector.js';
|
|
@@ -39,6 +39,7 @@ function parsePersonasWithWarning(personaList) {
|
|
|
39
39
|
}
|
|
40
40
|
export const exploreCommand = new Command('explore')
|
|
41
41
|
.description('Explore MCP server behavior with LLM-powered testing')
|
|
42
|
+
.allowUnknownOption() // Allow server flags like -y for npx to pass through
|
|
42
43
|
.argument('[server-command]', 'Server command (overrides config)')
|
|
43
44
|
.argument('[args...]', 'Server arguments')
|
|
44
45
|
.option('-c, --config <path>', 'Path to config file', PATHS.DEFAULT_CONFIG_FILENAME)
|
|
@@ -56,8 +57,15 @@ export const exploreCommand = new Command('explore')
|
|
|
56
57
|
throw error;
|
|
57
58
|
}
|
|
58
59
|
// Determine server command (CLI arg overrides config)
|
|
59
|
-
|
|
60
|
-
|
|
60
|
+
// If command string contains spaces and no separate args, parse it
|
|
61
|
+
let serverCommand = serverCommandArg || config.server.command;
|
|
62
|
+
let args = serverArgs.length > 0 ? serverArgs : config.server.args;
|
|
63
|
+
// Handle command strings like "npx @package" in config when args is empty
|
|
64
|
+
if (!serverCommandArg && args.length === 0 && serverCommand.includes(' ')) {
|
|
65
|
+
const parsed = parseCommandString(serverCommand);
|
|
66
|
+
serverCommand = parsed.command;
|
|
67
|
+
args = parsed.args;
|
|
68
|
+
}
|
|
61
69
|
const transport = config.server.transport ?? 'stdio';
|
|
62
70
|
const remoteUrl = config.server.url?.trim();
|
|
63
71
|
const remoteSessionId = config.server.sessionId?.trim();
|
package/dist/config/loader.d.ts
CHANGED
|
@@ -6,6 +6,20 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { type BellwetherConfig } from './validator.js';
|
|
8
8
|
export type { BellwetherConfig };
|
|
9
|
+
/**
|
|
10
|
+
* Parse a command string into command and arguments.
|
|
11
|
+
* Handles quoted strings properly for cases like:
|
|
12
|
+
* "npx @gitkraken/gk@latest" -> { command: "npx", args: ["@gitkraken/gk@latest"] }
|
|
13
|
+
* "node ./server.js --port 3000" -> { command: "node", args: ["./server.js", "--port", "3000"] }
|
|
14
|
+
* 'my-cmd "path with spaces"' -> { command: "my-cmd", args: ["path with spaces"] }
|
|
15
|
+
*
|
|
16
|
+
* @param commandString - Full command string that may include arguments
|
|
17
|
+
* @returns Parsed command and arguments
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseCommandString(commandString: string): {
|
|
20
|
+
command: string;
|
|
21
|
+
args: string[];
|
|
22
|
+
};
|
|
9
23
|
/**
|
|
10
24
|
* Error thrown when no config file is found.
|
|
11
25
|
*/
|
package/dist/config/loader.js
CHANGED
|
@@ -58,6 +58,65 @@ function interpolateConfig(obj) {
|
|
|
58
58
|
}
|
|
59
59
|
return obj;
|
|
60
60
|
}
|
|
61
|
+
/**
|
|
62
|
+
* Parse a command string into command and arguments.
|
|
63
|
+
* Handles quoted strings properly for cases like:
|
|
64
|
+
* "npx @gitkraken/gk@latest" -> { command: "npx", args: ["@gitkraken/gk@latest"] }
|
|
65
|
+
* "node ./server.js --port 3000" -> { command: "node", args: ["./server.js", "--port", "3000"] }
|
|
66
|
+
* 'my-cmd "path with spaces"' -> { command: "my-cmd", args: ["path with spaces"] }
|
|
67
|
+
*
|
|
68
|
+
* @param commandString - Full command string that may include arguments
|
|
69
|
+
* @returns Parsed command and arguments
|
|
70
|
+
*/
|
|
71
|
+
export function parseCommandString(commandString) {
|
|
72
|
+
const tokens = [];
|
|
73
|
+
let current = '';
|
|
74
|
+
let inQuotes = false;
|
|
75
|
+
let quoteChar = '';
|
|
76
|
+
for (let i = 0; i < commandString.length; i++) {
|
|
77
|
+
const char = commandString[i];
|
|
78
|
+
const prevChar = i > 0 ? commandString[i - 1] : '';
|
|
79
|
+
// Handle escape sequences (\" or \')
|
|
80
|
+
if (char === '\\' && i + 1 < commandString.length) {
|
|
81
|
+
const nextChar = commandString[i + 1];
|
|
82
|
+
if (nextChar === '"' || nextChar === "'" || nextChar === '\\') {
|
|
83
|
+
current += nextChar;
|
|
84
|
+
i++; // Skip next char
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Handle quote start
|
|
89
|
+
if ((char === '"' || char === "'") && !inQuotes) {
|
|
90
|
+
inQuotes = true;
|
|
91
|
+
quoteChar = char;
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
// Handle quote end
|
|
95
|
+
if (char === quoteChar && inQuotes && prevChar !== '\\') {
|
|
96
|
+
inQuotes = false;
|
|
97
|
+
quoteChar = '';
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
// Handle space outside quotes
|
|
101
|
+
if (char === ' ' && !inQuotes) {
|
|
102
|
+
if (current.length > 0) {
|
|
103
|
+
tokens.push(current);
|
|
104
|
+
current = '';
|
|
105
|
+
}
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
// Regular character
|
|
109
|
+
current += char;
|
|
110
|
+
}
|
|
111
|
+
// Push final token
|
|
112
|
+
if (current.length > 0) {
|
|
113
|
+
tokens.push(current);
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
command: tokens[0] ?? '',
|
|
117
|
+
args: tokens.slice(1),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
61
120
|
/**
|
|
62
121
|
* Error thrown when no config file is found.
|
|
63
122
|
*/
|
|
@@ -24,20 +24,20 @@ export declare const serverConfigSchema: z.ZodDefault<z.ZodObject<{
|
|
|
24
24
|
/** Additional environment variables */
|
|
25
25
|
env: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
26
26
|
}, "strip", z.ZodTypeAny, {
|
|
27
|
+
command: string;
|
|
28
|
+
args: string[];
|
|
27
29
|
transport: "stdio" | "sse" | "streamable-http";
|
|
28
30
|
timeout: number;
|
|
29
31
|
sessionId: string;
|
|
30
32
|
url: string;
|
|
31
|
-
args: string[];
|
|
32
|
-
command: string;
|
|
33
33
|
env?: Record<string, string> | undefined;
|
|
34
34
|
}, {
|
|
35
|
+
command?: string | undefined;
|
|
36
|
+
args?: string[] | undefined;
|
|
35
37
|
transport?: "stdio" | "sse" | "streamable-http" | undefined;
|
|
36
38
|
timeout?: number | undefined;
|
|
37
39
|
sessionId?: string | undefined;
|
|
38
40
|
url?: string | undefined;
|
|
39
|
-
args?: string[] | undefined;
|
|
40
|
-
command?: string | undefined;
|
|
41
41
|
env?: Record<string, string> | undefined;
|
|
42
42
|
}>>;
|
|
43
43
|
/**
|
|
@@ -1101,16 +1101,16 @@ export declare const contractConfigSchema: z.ZodDefault<z.ZodObject<{
|
|
|
1101
1101
|
/** Exit with error when violations are found */
|
|
1102
1102
|
failOnViolation: z.ZodDefault<z.ZodBoolean>;
|
|
1103
1103
|
}, "strip", z.ZodTypeAny, {
|
|
1104
|
+
mode: "strict" | "lenient" | "report";
|
|
1104
1105
|
format: "text" | "json" | "markdown";
|
|
1105
1106
|
timeout: number;
|
|
1106
|
-
mode: "strict" | "lenient" | "report";
|
|
1107
1107
|
failOnViolation: boolean;
|
|
1108
1108
|
path?: string | undefined;
|
|
1109
1109
|
}, {
|
|
1110
|
+
mode?: "strict" | "lenient" | "report" | undefined;
|
|
1110
1111
|
format?: "text" | "json" | "markdown" | undefined;
|
|
1111
1112
|
timeout?: number | undefined;
|
|
1112
1113
|
path?: string | undefined;
|
|
1113
|
-
mode?: "strict" | "lenient" | "report" | undefined;
|
|
1114
1114
|
failOnViolation?: boolean | undefined;
|
|
1115
1115
|
}>>;
|
|
1116
1116
|
/**
|
|
@@ -1137,20 +1137,20 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
1137
1137
|
/** Additional environment variables */
|
|
1138
1138
|
env: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1139
1139
|
}, "strip", z.ZodTypeAny, {
|
|
1140
|
+
command: string;
|
|
1141
|
+
args: string[];
|
|
1140
1142
|
transport: "stdio" | "sse" | "streamable-http";
|
|
1141
1143
|
timeout: number;
|
|
1142
1144
|
sessionId: string;
|
|
1143
1145
|
url: string;
|
|
1144
|
-
args: string[];
|
|
1145
|
-
command: string;
|
|
1146
1146
|
env?: Record<string, string> | undefined;
|
|
1147
1147
|
}, {
|
|
1148
|
+
command?: string | undefined;
|
|
1149
|
+
args?: string[] | undefined;
|
|
1148
1150
|
transport?: "stdio" | "sse" | "streamable-http" | undefined;
|
|
1149
1151
|
timeout?: number | undefined;
|
|
1150
1152
|
sessionId?: string | undefined;
|
|
1151
1153
|
url?: string | undefined;
|
|
1152
|
-
args?: string[] | undefined;
|
|
1153
|
-
command?: string | undefined;
|
|
1154
1154
|
env?: Record<string, string> | undefined;
|
|
1155
1155
|
}>>;
|
|
1156
1156
|
/** LLM configuration (used by explore command) */
|
|
@@ -1888,16 +1888,16 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
1888
1888
|
/** Exit with error when violations are found */
|
|
1889
1889
|
failOnViolation: z.ZodDefault<z.ZodBoolean>;
|
|
1890
1890
|
}, "strip", z.ZodTypeAny, {
|
|
1891
|
+
mode: "strict" | "lenient" | "report";
|
|
1891
1892
|
format: "text" | "json" | "markdown";
|
|
1892
1893
|
timeout: number;
|
|
1893
|
-
mode: "strict" | "lenient" | "report";
|
|
1894
1894
|
failOnViolation: boolean;
|
|
1895
1895
|
path?: string | undefined;
|
|
1896
1896
|
}, {
|
|
1897
|
+
mode?: "strict" | "lenient" | "report" | undefined;
|
|
1897
1898
|
format?: "text" | "json" | "markdown" | undefined;
|
|
1898
1899
|
timeout?: number | undefined;
|
|
1899
1900
|
path?: string | undefined;
|
|
1900
|
-
mode?: "strict" | "lenient" | "report" | undefined;
|
|
1901
1901
|
failOnViolation?: boolean | undefined;
|
|
1902
1902
|
}>>;
|
|
1903
1903
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -1922,12 +1922,12 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
1922
1922
|
defaultServerCommand: string;
|
|
1923
1923
|
};
|
|
1924
1924
|
server: {
|
|
1925
|
+
command: string;
|
|
1926
|
+
args: string[];
|
|
1925
1927
|
transport: "stdio" | "sse" | "streamable-http";
|
|
1926
1928
|
timeout: number;
|
|
1927
1929
|
sessionId: string;
|
|
1928
1930
|
url: string;
|
|
1929
|
-
args: string[];
|
|
1930
|
-
command: string;
|
|
1931
1931
|
env?: Record<string, string> | undefined;
|
|
1932
1932
|
};
|
|
1933
1933
|
discovery: {
|
|
@@ -2076,9 +2076,9 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
2076
2076
|
badgeOnly: boolean;
|
|
2077
2077
|
};
|
|
2078
2078
|
contract: {
|
|
2079
|
+
mode: "strict" | "lenient" | "report";
|
|
2079
2080
|
format: "text" | "json" | "markdown";
|
|
2080
2081
|
timeout: number;
|
|
2081
|
-
mode: "strict" | "lenient" | "report";
|
|
2082
2082
|
failOnViolation: boolean;
|
|
2083
2083
|
path?: string | undefined;
|
|
2084
2084
|
};
|
|
@@ -2104,12 +2104,12 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
2104
2104
|
defaultServerCommand?: string | undefined;
|
|
2105
2105
|
} | undefined;
|
|
2106
2106
|
server?: {
|
|
2107
|
+
command?: string | undefined;
|
|
2108
|
+
args?: string[] | undefined;
|
|
2107
2109
|
transport?: "stdio" | "sse" | "streamable-http" | undefined;
|
|
2108
2110
|
timeout?: number | undefined;
|
|
2109
2111
|
sessionId?: string | undefined;
|
|
2110
2112
|
url?: string | undefined;
|
|
2111
|
-
args?: string[] | undefined;
|
|
2112
|
-
command?: string | undefined;
|
|
2113
2113
|
env?: Record<string, string> | undefined;
|
|
2114
2114
|
} | undefined;
|
|
2115
2115
|
discovery?: {
|
|
@@ -2258,10 +2258,10 @@ export declare const bellwetherConfigSchema: z.ZodObject<{
|
|
|
2258
2258
|
badgeOnly?: boolean | undefined;
|
|
2259
2259
|
} | undefined;
|
|
2260
2260
|
contract?: {
|
|
2261
|
+
mode?: "strict" | "lenient" | "report" | undefined;
|
|
2261
2262
|
format?: "text" | "json" | "markdown" | undefined;
|
|
2262
2263
|
timeout?: number | undefined;
|
|
2263
2264
|
path?: string | undefined;
|
|
2264
|
-
mode?: "strict" | "lenient" | "report" | undefined;
|
|
2265
2265
|
failOnViolation?: boolean | undefined;
|
|
2266
2266
|
} | undefined;
|
|
2267
2267
|
}>;
|
|
@@ -474,6 +474,55 @@ export declare const STATEFUL_TESTING: {
|
|
|
474
474
|
/** Maximum number of stored values across tool calls */
|
|
475
475
|
readonly MAX_STORED_VALUES: 50;
|
|
476
476
|
};
|
|
477
|
+
/**
|
|
478
|
+
* Configuration for detecting tool patterns that commonly cause false positives.
|
|
479
|
+
* Used by schema-test-generator.ts to adjust test expectations.
|
|
480
|
+
*
|
|
481
|
+
* These patterns help Bellwether distinguish between:
|
|
482
|
+
* - Actual tool bugs (should be flagged)
|
|
483
|
+
* - Expected behavior for specialized tool patterns (should not be flagged)
|
|
484
|
+
*/
|
|
485
|
+
/**
|
|
486
|
+
* Operation-based tool detection patterns.
|
|
487
|
+
* Tools with operation enum + args object pattern dispatch to different handlers
|
|
488
|
+
* where each operation has different required arguments.
|
|
489
|
+
*/
|
|
490
|
+
export declare const OPERATION_BASED_DETECTION: {
|
|
491
|
+
/** Parameter names that indicate an operation/action discriminator */
|
|
492
|
+
readonly OPERATION_PARAM_NAMES: readonly string[];
|
|
493
|
+
/** Parameter names that indicate a dynamic arguments object */
|
|
494
|
+
readonly ARGS_PARAM_NAMES: readonly string[];
|
|
495
|
+
/** Minimum enum values to consider a dispatch pattern (single value is not dispatch) */
|
|
496
|
+
readonly MIN_ENUM_VALUES: 2;
|
|
497
|
+
};
|
|
498
|
+
/**
|
|
499
|
+
* Self-stateful tool detection patterns.
|
|
500
|
+
* Tools that require prior invocation to establish state (sessions, chains, contexts).
|
|
501
|
+
* These tools need an active session/chain before they can be used.
|
|
502
|
+
*/
|
|
503
|
+
export declare const SELF_STATEFUL_DETECTION: {
|
|
504
|
+
/** Description patterns indicating the tool requires prior state */
|
|
505
|
+
readonly DESCRIPTION_PATTERNS: readonly RegExp[];
|
|
506
|
+
/** Parameter names that suggest session/state dependency */
|
|
507
|
+
readonly STATE_PARAM_PATTERNS: readonly RegExp[];
|
|
508
|
+
/** Tool name patterns that suggest stateful behavior */
|
|
509
|
+
readonly STATEFUL_TOOL_NAME_PATTERNS: readonly RegExp[];
|
|
510
|
+
};
|
|
511
|
+
/**
|
|
512
|
+
* Complex array schema detection patterns.
|
|
513
|
+
* Tools with arrays whose items have complex nested structures with required properties.
|
|
514
|
+
* These require properly structured input data that simple test generation can't provide.
|
|
515
|
+
*/
|
|
516
|
+
export declare const COMPLEX_SCHEMA_DETECTION: {
|
|
517
|
+
/** Maximum nesting depth before considering schema "complex" */
|
|
518
|
+
readonly MAX_SIMPLE_DEPTH: 2;
|
|
519
|
+
/** Minimum required properties in array items to consider "complex" */
|
|
520
|
+
readonly MIN_REQUIRED_PROPERTIES: 1;
|
|
521
|
+
/** Property names that typically require structured data (chart/data visualization) */
|
|
522
|
+
readonly STRUCTURED_DATA_PATTERNS: readonly RegExp[];
|
|
523
|
+
/** Minimum array items in schema examples/defaults to use instead of generating */
|
|
524
|
+
readonly MIN_EXAMPLE_ITEMS: 2;
|
|
525
|
+
};
|
|
477
526
|
/**
|
|
478
527
|
* Security testing configuration for check mode.
|
|
479
528
|
* Used by security-tester.ts for deterministic vulnerability detection.
|
|
@@ -550,6 +599,14 @@ export declare const SEMANTIC_VALIDATION: {
|
|
|
550
599
|
readonly MAX_INVALID_VALUES_PER_PARAM: 2;
|
|
551
600
|
/** Maximum semantic tests per tool */
|
|
552
601
|
readonly MAX_SEMANTIC_TESTS_PER_TOOL: 6;
|
|
602
|
+
/**
|
|
603
|
+
* Enable flexible semantic validation by default.
|
|
604
|
+
* When true, semantic tests use 'either' outcome, allowing tools to
|
|
605
|
+
* accept flexible formats (e.g., dayjs accepting various date strings).
|
|
606
|
+
* This reduces false positives for tools with lenient parsing libraries.
|
|
607
|
+
* Set to false for strict semantic validation enforcement.
|
|
608
|
+
*/
|
|
609
|
+
readonly FLEXIBLE_BY_DEFAULT: true;
|
|
553
610
|
/** Confidence scores for different inference sources */
|
|
554
611
|
readonly CONFIDENCE: {
|
|
555
612
|
/** Confidence when schema format explicitly specifies type */
|
|
@@ -719,7 +776,9 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
719
776
|
};
|
|
720
777
|
readonly stripe: {
|
|
721
778
|
readonly name: "Stripe";
|
|
779
|
+
/** Patterns in tool names/descriptions that indicate Stripe usage (high confidence = has 'stripe' in name) */
|
|
722
780
|
readonly toolPatterns: readonly RegExp[];
|
|
781
|
+
/** Low confidence patterns - may match non-Stripe tools (removed: payment, charge, subscription) */
|
|
723
782
|
readonly errorPatterns: readonly RegExp[];
|
|
724
783
|
readonly statusCodes: readonly number[];
|
|
725
784
|
readonly remediation: "Configure Stripe API keys (STRIPE_SECRET_KEY)";
|
|
@@ -747,6 +806,7 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
747
806
|
};
|
|
748
807
|
readonly openai: {
|
|
749
808
|
readonly name: "OpenAI";
|
|
809
|
+
/** Patterns in tool names that indicate OpenAI usage (removed generic: completion, embedding) */
|
|
750
810
|
readonly toolPatterns: readonly RegExp[];
|
|
751
811
|
readonly errorPatterns: readonly RegExp[];
|
|
752
812
|
readonly statusCodes: readonly number[];
|
|
@@ -789,6 +849,7 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
789
849
|
};
|
|
790
850
|
readonly twilio: {
|
|
791
851
|
readonly name: "Twilio";
|
|
852
|
+
/** Patterns in tool names that indicate Twilio usage (removed generic: sms) */
|
|
792
853
|
readonly toolPatterns: readonly RegExp[];
|
|
793
854
|
readonly errorPatterns: readonly RegExp[];
|
|
794
855
|
readonly statusCodes: readonly number[];
|
|
@@ -803,6 +864,7 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
803
864
|
};
|
|
804
865
|
readonly sendgrid: {
|
|
805
866
|
readonly name: "SendGrid";
|
|
867
|
+
/** Patterns in tool names that indicate SendGrid usage (removed generic: email.*send) */
|
|
806
868
|
readonly toolPatterns: readonly RegExp[];
|
|
807
869
|
readonly errorPatterns: readonly RegExp[];
|
|
808
870
|
readonly statusCodes: readonly number[];
|
|
@@ -817,6 +879,7 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
817
879
|
};
|
|
818
880
|
readonly github: {
|
|
819
881
|
readonly name: "GitHub";
|
|
882
|
+
/** Patterns in tool names that indicate GitHub usage (removed generic: repository, pull.*request) */
|
|
820
883
|
readonly toolPatterns: readonly RegExp[];
|
|
821
884
|
readonly errorPatterns: readonly RegExp[];
|
|
822
885
|
readonly statusCodes: readonly number[];
|
|
@@ -831,6 +894,7 @@ export declare const EXTERNAL_DEPENDENCIES: {
|
|
|
831
894
|
};
|
|
832
895
|
readonly database: {
|
|
833
896
|
readonly name: "Database";
|
|
897
|
+
/** Patterns in tool names that indicate database usage (removed generic: sql) */
|
|
834
898
|
readonly toolPatterns: readonly RegExp[];
|
|
835
899
|
readonly errorPatterns: readonly RegExp[];
|
|
836
900
|
readonly statusCodes: readonly number[];
|