@dotsetlabs/bellwether 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -0
- package/README.md +8 -2
- package/dist/baseline/accessors.d.ts +1 -1
- package/dist/baseline/accessors.js +1 -3
- package/dist/baseline/baseline-format.d.ts +287 -0
- package/dist/baseline/baseline-format.js +12 -0
- package/dist/baseline/comparator.js +249 -11
- package/dist/baseline/converter.d.ts +15 -15
- package/dist/baseline/converter.js +46 -34
- package/dist/baseline/diff.d.ts +1 -1
- package/dist/baseline/diff.js +45 -28
- package/dist/baseline/error-analyzer.d.ts +1 -1
- package/dist/baseline/error-analyzer.js +90 -17
- package/dist/baseline/incremental-checker.js +8 -5
- package/dist/baseline/index.d.ts +2 -12
- package/dist/baseline/index.js +3 -23
- package/dist/baseline/performance-tracker.d.ts +0 -1
- package/dist/baseline/performance-tracker.js +13 -20
- package/dist/baseline/response-fingerprint.js +39 -2
- package/dist/baseline/saver.js +41 -10
- package/dist/baseline/schema-compare.d.ts +22 -0
- package/dist/baseline/schema-compare.js +259 -16
- package/dist/baseline/types.d.ts +10 -7
- package/dist/cache/response-cache.d.ts +8 -0
- package/dist/cache/response-cache.js +110 -0
- package/dist/cli/commands/check.js +23 -6
- package/dist/cli/commands/explore.js +34 -14
- package/dist/cli/index.js +8 -0
- package/dist/config/template.js +8 -7
- package/dist/config/validator.d.ts +59 -59
- package/dist/config/validator.js +245 -90
- package/dist/constants/core.d.ts +4 -0
- package/dist/constants/core.js +8 -19
- package/dist/constants/registry.d.ts +17 -0
- package/dist/constants/registry.js +18 -0
- package/dist/constants/testing.d.ts +0 -369
- package/dist/constants/testing.js +18 -456
- package/dist/constants.d.ts +1 -1
- package/dist/constants.js +1 -1
- package/dist/docs/contract.js +131 -83
- package/dist/docs/report.js +8 -5
- package/dist/interview/insights.d.ts +17 -0
- package/dist/interview/insights.js +52 -0
- package/dist/interview/interviewer.js +52 -10
- package/dist/interview/prompt-test-generator.d.ts +12 -0
- package/dist/interview/prompt-test-generator.js +77 -0
- package/dist/interview/resource-test-generator.d.ts +12 -0
- package/dist/interview/resource-test-generator.js +20 -0
- package/dist/interview/schema-inferrer.js +26 -4
- package/dist/interview/schema-test-generator.js +278 -31
- package/dist/interview/stateful-test-runner.d.ts +3 -0
- package/dist/interview/stateful-test-runner.js +80 -0
- package/dist/interview/types.d.ts +12 -0
- package/dist/transport/mcp-client.js +1 -1
- package/dist/transport/sse-transport.d.ts +7 -3
- package/dist/transport/sse-transport.js +157 -67
- package/dist/version.js +1 -1
- package/man/bellwether.1 +1 -1
- package/man/bellwether.1.md +2 -2
- package/package.json +1 -1
- package/schemas/bellwether-check.schema.json +185 -0
- package/schemas/bellwether-explore.schema.json +837 -0
- package/scripts/completions/bellwether.bash +10 -4
- package/scripts/completions/bellwether.zsh +55 -2
|
@@ -103,7 +103,11 @@ function extractResponseContent(response) {
|
|
|
103
103
|
return item.text;
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
|
-
|
|
106
|
+
const decoded = decodeDataContent(item.data, item.mimeType);
|
|
107
|
+
if (decoded !== null) {
|
|
108
|
+
return decoded;
|
|
109
|
+
}
|
|
110
|
+
return summarizeBinaryItem(item);
|
|
107
111
|
}
|
|
108
112
|
// Multiple content items - return as array
|
|
109
113
|
return response.content.map((item) => {
|
|
@@ -115,9 +119,42 @@ function extractResponseContent(response) {
|
|
|
115
119
|
return item.text;
|
|
116
120
|
}
|
|
117
121
|
}
|
|
118
|
-
|
|
122
|
+
const decoded = decodeDataContent(item.data, item.mimeType);
|
|
123
|
+
if (decoded !== null) {
|
|
124
|
+
return decoded;
|
|
125
|
+
}
|
|
126
|
+
return summarizeBinaryItem(item);
|
|
119
127
|
});
|
|
120
128
|
}
|
|
129
|
+
function decodeDataContent(data, mimeType) {
|
|
130
|
+
if (!data || typeof data !== 'string')
|
|
131
|
+
return null;
|
|
132
|
+
const mime = (mimeType ?? '').toLowerCase();
|
|
133
|
+
if (mime.includes('json') || mime.startsWith('text/')) {
|
|
134
|
+
try {
|
|
135
|
+
const decoded = Buffer.from(data, 'base64').toString('utf8');
|
|
136
|
+
try {
|
|
137
|
+
return JSON.parse(decoded);
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
return decoded;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
function summarizeBinaryItem(item) {
|
|
150
|
+
const size = item.data ? Buffer.byteLength(item.data, 'utf8') : 0;
|
|
151
|
+
return {
|
|
152
|
+
type: item.type ?? 'unknown',
|
|
153
|
+
mimeType: item.mimeType,
|
|
154
|
+
uri: item.uri,
|
|
155
|
+
size,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
121
158
|
/**
|
|
122
159
|
* Compute a structure hash that captures shape but not values.
|
|
123
160
|
*/
|
package/dist/baseline/saver.js
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { readFileSync, writeFileSync, existsSync, statSync } from 'fs';
|
|
5
5
|
import { z } from 'zod';
|
|
6
|
-
import { getBaselineVersion, parseVersion, formatVersion
|
|
7
|
-
import {
|
|
6
|
+
import { getBaselineVersion, parseVersion, formatVersion } from './version.js';
|
|
7
|
+
import { createBaselineFromInterview } from './converter.js';
|
|
8
8
|
import { calculateBaselineHash } from './baseline-hash.js';
|
|
9
9
|
import { PAYLOAD_LIMITS } from '../constants.js';
|
|
10
10
|
import { getLogger } from '../logging/logger.js';
|
|
11
|
-
const
|
|
11
|
+
const baselineAssertionSchema = z.object({
|
|
12
12
|
type: z.enum(['expects', 'requires', 'warns', 'notes']),
|
|
13
13
|
condition: z.string(),
|
|
14
14
|
tool: z.string().optional(),
|
|
@@ -19,7 +19,16 @@ const cloudAssertionSchema = z.object({
|
|
|
19
19
|
*/
|
|
20
20
|
const responseFingerprintSchema = z.object({
|
|
21
21
|
structureHash: z.string(),
|
|
22
|
-
contentType: z.enum([
|
|
22
|
+
contentType: z.enum([
|
|
23
|
+
'text',
|
|
24
|
+
'object',
|
|
25
|
+
'array',
|
|
26
|
+
'primitive',
|
|
27
|
+
'empty',
|
|
28
|
+
'error',
|
|
29
|
+
'mixed',
|
|
30
|
+
'binary',
|
|
31
|
+
]),
|
|
23
32
|
fields: z.array(z.string()).optional(),
|
|
24
33
|
arrayItemStructure: z.string().optional(),
|
|
25
34
|
size: z.enum(['tiny', 'small', 'medium', 'large']),
|
|
@@ -68,6 +77,9 @@ const toolCapabilitySchema = z.object({
|
|
|
68
77
|
description: z.string(),
|
|
69
78
|
inputSchema: z.record(z.unknown()),
|
|
70
79
|
schemaHash: z.string(),
|
|
80
|
+
observedArgsSchemaHash: z.string().optional(),
|
|
81
|
+
observedArgsSchemaConsistency: z.number().min(0).max(1).optional(),
|
|
82
|
+
observedArgsSchemaVariations: z.number().int().min(0).optional(),
|
|
71
83
|
responseFingerprint: responseFingerprintSchema.optional(),
|
|
72
84
|
inferredOutputSchema: inferredSchemaSchema.optional(),
|
|
73
85
|
errorPatterns: z.array(errorPatternSchema).optional(),
|
|
@@ -137,21 +149,40 @@ const baselineSchema = z.object({
|
|
|
137
149
|
server: serverFingerprintSchema,
|
|
138
150
|
capabilities: z.object({
|
|
139
151
|
tools: z.array(toolCapabilitySchema),
|
|
140
|
-
resources: z
|
|
141
|
-
|
|
152
|
+
resources: z
|
|
153
|
+
.array(z.object({
|
|
154
|
+
uri: z.string(),
|
|
155
|
+
name: z.string(),
|
|
156
|
+
description: z.string().optional(),
|
|
157
|
+
mimeType: z.string().optional(),
|
|
158
|
+
}))
|
|
159
|
+
.optional(),
|
|
160
|
+
prompts: z
|
|
161
|
+
.array(z.object({
|
|
162
|
+
name: z.string(),
|
|
163
|
+
description: z.string().optional(),
|
|
164
|
+
arguments: z
|
|
165
|
+
.array(z.object({
|
|
166
|
+
name: z.string(),
|
|
167
|
+
description: z.string().optional(),
|
|
168
|
+
required: z.boolean().optional(),
|
|
169
|
+
}))
|
|
170
|
+
.optional(),
|
|
171
|
+
}))
|
|
172
|
+
.optional(),
|
|
142
173
|
}),
|
|
143
174
|
interviews: z.array(z.record(z.unknown())),
|
|
144
175
|
toolProfiles: z.array(z.object({
|
|
145
176
|
name: z.string(),
|
|
146
177
|
description: z.string().optional(),
|
|
147
178
|
schemaHash: z.string().optional(),
|
|
148
|
-
assertions: z.array(
|
|
179
|
+
assertions: z.array(baselineAssertionSchema),
|
|
149
180
|
securityNotes: z.array(z.string()).optional(),
|
|
150
181
|
limitations: z.array(z.string()).optional(),
|
|
151
182
|
behavioralNotes: z.array(z.string()).optional(),
|
|
152
183
|
})),
|
|
153
184
|
workflows: z.array(workflowSignatureSchema).optional(),
|
|
154
|
-
assertions: z.array(
|
|
185
|
+
assertions: z.array(baselineAssertionSchema),
|
|
155
186
|
summary: z.string(),
|
|
156
187
|
hash: z.string(),
|
|
157
188
|
acceptance: driftAcceptanceSchema.optional(),
|
|
@@ -164,7 +195,7 @@ const baselineSchema = z.object({
|
|
|
164
195
|
* Explore mode results are for documentation only.
|
|
165
196
|
*/
|
|
166
197
|
export function createBaseline(result, serverCommand) {
|
|
167
|
-
return
|
|
198
|
+
return createBaselineFromInterview(result, serverCommand);
|
|
168
199
|
}
|
|
169
200
|
/**
|
|
170
201
|
* Save baseline to a file.
|
|
@@ -280,7 +311,7 @@ export function acceptDrift(currentBaseline, diff, options = {}) {
|
|
|
280
311
|
const acceptedDiff = {
|
|
281
312
|
toolsAdded: [...diff.toolsAdded],
|
|
282
313
|
toolsRemoved: [...diff.toolsRemoved],
|
|
283
|
-
toolsModified: diff.toolsModified.map(t => t.tool),
|
|
314
|
+
toolsModified: diff.toolsModified.map((t) => t.tool),
|
|
284
315
|
severity: diff.severity,
|
|
285
316
|
breakingCount: diff.breakingCount,
|
|
286
317
|
warningCount: diff.warningCount,
|
|
@@ -25,8 +25,19 @@ interface SchemaProperty {
|
|
|
25
25
|
default?: unknown;
|
|
26
26
|
items?: SchemaProperty;
|
|
27
27
|
properties?: Record<string, SchemaProperty>;
|
|
28
|
+
patternProperties?: Record<string, SchemaProperty>;
|
|
29
|
+
dependentRequired?: Record<string, string[]>;
|
|
30
|
+
if?: SchemaProperty;
|
|
31
|
+
then?: SchemaProperty;
|
|
32
|
+
else?: SchemaProperty;
|
|
33
|
+
oneOf?: SchemaProperty[];
|
|
34
|
+
anyOf?: SchemaProperty[];
|
|
35
|
+
allOf?: SchemaProperty[];
|
|
28
36
|
required?: string[];
|
|
29
37
|
additionalProperties?: boolean | SchemaProperty;
|
|
38
|
+
$ref?: string;
|
|
39
|
+
minProperties?: number;
|
|
40
|
+
maxProperties?: number;
|
|
30
41
|
}
|
|
31
42
|
/**
|
|
32
43
|
* Input schema for a tool.
|
|
@@ -34,8 +45,19 @@ interface SchemaProperty {
|
|
|
34
45
|
interface InputSchema {
|
|
35
46
|
type?: string;
|
|
36
47
|
properties?: Record<string, SchemaProperty>;
|
|
48
|
+
patternProperties?: Record<string, SchemaProperty>;
|
|
49
|
+
dependentRequired?: Record<string, string[]>;
|
|
50
|
+
if?: SchemaProperty;
|
|
51
|
+
then?: SchemaProperty;
|
|
52
|
+
else?: SchemaProperty;
|
|
53
|
+
oneOf?: SchemaProperty[];
|
|
54
|
+
anyOf?: SchemaProperty[];
|
|
55
|
+
allOf?: SchemaProperty[];
|
|
37
56
|
required?: string[];
|
|
38
57
|
additionalProperties?: boolean | SchemaProperty;
|
|
58
|
+
$ref?: string;
|
|
59
|
+
minProperties?: number;
|
|
60
|
+
maxProperties?: number;
|
|
39
61
|
}
|
|
40
62
|
/**
|
|
41
63
|
* Schema change type.
|
|
@@ -25,7 +25,7 @@ export function computeSchemaHash(schema) {
|
|
|
25
25
|
return 'empty';
|
|
26
26
|
// Create normalized representation for hashing with circular reference protection
|
|
27
27
|
const seen = new WeakSet();
|
|
28
|
-
const normalized = normalizeSchema(schema, 0, seen);
|
|
28
|
+
const normalized = normalizeSchema(schema, 0, seen, schema);
|
|
29
29
|
const serialized = JSON.stringify(normalized);
|
|
30
30
|
return createHash('sha256').update(serialized).digest('hex').slice(0, 16);
|
|
31
31
|
}
|
|
@@ -70,7 +70,7 @@ function checkCircularRef(obj, seen) {
|
|
|
70
70
|
* @param depth - Current recursion depth
|
|
71
71
|
* @param seen - WeakSet tracking visited objects for circular reference detection
|
|
72
72
|
*/
|
|
73
|
-
function normalizeSchema(schema, depth = 0, seen = new WeakSet()) {
|
|
73
|
+
function normalizeSchema(schema, depth = 0, seen = new WeakSet(), root = schema) {
|
|
74
74
|
// Check depth limit
|
|
75
75
|
const depthLimit = checkDepthLimit(depth);
|
|
76
76
|
if (depthLimit)
|
|
@@ -80,6 +80,16 @@ function normalizeSchema(schema, depth = 0, seen = new WeakSet()) {
|
|
|
80
80
|
if (circularRef)
|
|
81
81
|
return circularRef;
|
|
82
82
|
const result = {};
|
|
83
|
+
// Handle $ref (resolve local refs to include referenced structure in hash)
|
|
84
|
+
if (schema.$ref) {
|
|
85
|
+
const ref = schema.$ref;
|
|
86
|
+
const resolved = resolveLocalRef(root, ref);
|
|
87
|
+
const refResult = { $ref: ref };
|
|
88
|
+
if (resolved && typeof resolved === 'object') {
|
|
89
|
+
refResult.$ref_resolved = normalizeSchema(resolved, depth + 1, seen, root);
|
|
90
|
+
}
|
|
91
|
+
return refResult;
|
|
92
|
+
}
|
|
83
93
|
// Sort and normalize simple fields
|
|
84
94
|
if (schema.type !== undefined) {
|
|
85
95
|
result.type = Array.isArray(schema.type) ? schema.type.sort() : schema.type;
|
|
@@ -91,7 +101,16 @@ function normalizeSchema(schema, depth = 0, seen = new WeakSet()) {
|
|
|
91
101
|
result.enum = [...schema.enum].sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b)));
|
|
92
102
|
}
|
|
93
103
|
// Constraints - normalize numeric values to handle 1.0 vs 1
|
|
94
|
-
const constraintFields = [
|
|
104
|
+
const constraintFields = [
|
|
105
|
+
'minimum',
|
|
106
|
+
'maximum',
|
|
107
|
+
'minLength',
|
|
108
|
+
'maxLength',
|
|
109
|
+
'pattern',
|
|
110
|
+
'default',
|
|
111
|
+
'minProperties',
|
|
112
|
+
'maxProperties',
|
|
113
|
+
];
|
|
95
114
|
for (const field of constraintFields) {
|
|
96
115
|
const value = schema[field];
|
|
97
116
|
if (value !== undefined) {
|
|
@@ -112,21 +131,61 @@ function normalizeSchema(schema, depth = 0, seen = new WeakSet()) {
|
|
|
112
131
|
if (schema.properties) {
|
|
113
132
|
const props = {};
|
|
114
133
|
// Normalize Unicode in property keys and sort
|
|
115
|
-
const sortedKeys = Object.keys(schema.properties)
|
|
116
|
-
.map(normalizeUnicodeKey)
|
|
117
|
-
.sort();
|
|
134
|
+
const sortedKeys = Object.keys(schema.properties).map(normalizeUnicodeKey).sort();
|
|
118
135
|
for (const key of sortedKeys) {
|
|
119
136
|
// Find the original key (may differ in Unicode representation)
|
|
120
|
-
const originalKey = Object.keys(schema.properties).find(k => normalizeUnicodeKey(k) === key);
|
|
137
|
+
const originalKey = Object.keys(schema.properties).find((k) => normalizeUnicodeKey(k) === key);
|
|
121
138
|
if (originalKey) {
|
|
122
|
-
props[key] = normalizeSchema(schema.properties[originalKey], depth + 1, seen);
|
|
139
|
+
props[key] = normalizeSchema(schema.properties[originalKey], depth + 1, seen, root);
|
|
123
140
|
}
|
|
124
141
|
}
|
|
125
142
|
result.properties = props;
|
|
126
143
|
}
|
|
144
|
+
// Pattern properties - normalize keys and values
|
|
145
|
+
if (schema.patternProperties) {
|
|
146
|
+
const patternProps = schema.patternProperties ?? {};
|
|
147
|
+
const props = {};
|
|
148
|
+
const sortedKeys = Object.keys(patternProps).sort();
|
|
149
|
+
for (const key of sortedKeys) {
|
|
150
|
+
const prop = patternProps[key];
|
|
151
|
+
props[key] = normalizeSchema(prop, depth + 1, seen, root);
|
|
152
|
+
}
|
|
153
|
+
result.patternProperties = props;
|
|
154
|
+
}
|
|
155
|
+
// Dependent required - normalize keys and sort arrays
|
|
156
|
+
if (schema.dependentRequired) {
|
|
157
|
+
const deps = schema.dependentRequired ?? {};
|
|
158
|
+
const normalizedDeps = {};
|
|
159
|
+
const sortedKeys = Object.keys(deps).map(normalizeUnicodeKey).sort();
|
|
160
|
+
for (const key of sortedKeys) {
|
|
161
|
+
const values = deps[key] ?? [];
|
|
162
|
+
normalizedDeps[key] = [...values].map(normalizeUnicodeKey).sort();
|
|
163
|
+
}
|
|
164
|
+
result.dependentRequired = normalizedDeps;
|
|
165
|
+
}
|
|
166
|
+
// Conditional schemas (if/then/else)
|
|
167
|
+
if (schema.if) {
|
|
168
|
+
result.if = normalizeSchema(schema.if, depth + 1, seen, root);
|
|
169
|
+
}
|
|
170
|
+
if (schema.then) {
|
|
171
|
+
result.then = normalizeSchema(schema.then, depth + 1, seen, root);
|
|
172
|
+
}
|
|
173
|
+
if (schema.else) {
|
|
174
|
+
result.else = normalizeSchema(schema.else, depth + 1, seen, root);
|
|
175
|
+
}
|
|
176
|
+
// oneOf/anyOf/allOf variants
|
|
177
|
+
if (schema.oneOf) {
|
|
178
|
+
result.oneOf = normalizeSchemaArray(schema.oneOf, depth, seen, root);
|
|
179
|
+
}
|
|
180
|
+
if (schema.anyOf) {
|
|
181
|
+
result.anyOf = normalizeSchemaArray(schema.anyOf, depth, seen, root);
|
|
182
|
+
}
|
|
183
|
+
if (schema.allOf) {
|
|
184
|
+
result.allOf = normalizeSchemaArray(schema.allOf, depth, seen, root);
|
|
185
|
+
}
|
|
127
186
|
// Items for arrays
|
|
128
187
|
if (schema.items) {
|
|
129
|
-
result.items = normalizeSchema(schema.items, depth + 1, seen);
|
|
188
|
+
result.items = normalizeSchema(schema.items, depth + 1, seen, root);
|
|
130
189
|
}
|
|
131
190
|
// Additional properties
|
|
132
191
|
if (schema.additionalProperties !== undefined) {
|
|
@@ -134,11 +193,39 @@ function normalizeSchema(schema, depth = 0, seen = new WeakSet()) {
|
|
|
134
193
|
result.additionalProperties = schema.additionalProperties;
|
|
135
194
|
}
|
|
136
195
|
else {
|
|
137
|
-
result.additionalProperties = normalizeSchema(schema.additionalProperties, depth + 1, seen);
|
|
196
|
+
result.additionalProperties = normalizeSchema(schema.additionalProperties, depth + 1, seen, root);
|
|
138
197
|
}
|
|
139
198
|
}
|
|
140
199
|
return result;
|
|
141
200
|
}
|
|
201
|
+
/**
|
|
202
|
+
* Normalize a list of schemas (oneOf/anyOf/allOf) into a stable, sorted array.
|
|
203
|
+
*/
|
|
204
|
+
function normalizeSchemaArray(variants, depth, seen, root) {
|
|
205
|
+
const normalized = variants.map((variant) => normalizeSchema(variant, depth + 1, seen, root));
|
|
206
|
+
// Sort by JSON string for stable hashing (order-insensitive for oneOf/anyOf/allOf)
|
|
207
|
+
return normalized.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b)));
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Resolve a local JSON pointer reference (e.g., "#/properties/foo").
|
|
211
|
+
*/
|
|
212
|
+
function resolveLocalRef(root, ref) {
|
|
213
|
+
if (!ref.startsWith('#/')) {
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
const pointer = ref.slice(2);
|
|
217
|
+
const parts = pointer.split('/').map((part) => part.replace(/~1/g, '/').replace(/~0/g, '~'));
|
|
218
|
+
let current = root;
|
|
219
|
+
for (const part of parts) {
|
|
220
|
+
if (current && typeof current === 'object' && part in current) {
|
|
221
|
+
current = current[part];
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return current ?? null;
|
|
228
|
+
}
|
|
142
229
|
/**
|
|
143
230
|
* Compare two schemas and return detailed differences.
|
|
144
231
|
*/
|
|
@@ -262,8 +349,8 @@ function compareProperties(prev, curr, path, changes) {
|
|
|
262
349
|
if (!arraysEqual(prev.enum, curr.enum)) {
|
|
263
350
|
const prevSet = new Set((prev.enum ?? []).map(String));
|
|
264
351
|
const currSet = new Set((curr.enum ?? []).map(String));
|
|
265
|
-
const removed = [...prevSet].filter(v => !currSet.has(v));
|
|
266
|
-
const added = [...currSet].filter(v => !prevSet.has(v));
|
|
352
|
+
const removed = [...prevSet].filter((v) => !currSet.has(v));
|
|
353
|
+
const added = [...currSet].filter((v) => !prevSet.has(v));
|
|
267
354
|
changes.push({
|
|
268
355
|
path,
|
|
269
356
|
changeType: 'enum_changed',
|
|
@@ -279,6 +366,8 @@ function compareProperties(prev, curr, path, changes) {
|
|
|
279
366
|
compareConstraint(prev, curr, path, 'minLength', changes);
|
|
280
367
|
compareConstraint(prev, curr, path, 'maxLength', changes);
|
|
281
368
|
compareConstraint(prev, curr, path, 'pattern', changes);
|
|
369
|
+
compareConstraint(prev, curr, path, 'minProperties', changes);
|
|
370
|
+
compareConstraint(prev, curr, path, 'maxProperties', changes);
|
|
282
371
|
// Compare nested properties
|
|
283
372
|
if (prev.properties || curr.properties) {
|
|
284
373
|
const prevNested = prev.properties ?? {};
|
|
@@ -339,6 +428,18 @@ function compareProperties(prev, curr, path, changes) {
|
|
|
339
428
|
});
|
|
340
429
|
}
|
|
341
430
|
}
|
|
431
|
+
// Compare patternProperties
|
|
432
|
+
comparePatternProperties(prev, curr, path, changes);
|
|
433
|
+
// Compare dependentRequired
|
|
434
|
+
compareDependentRequired(prev, curr, path, changes);
|
|
435
|
+
// Compare conditional schemas (if/then/else)
|
|
436
|
+
compareConditionalSchemas(prev, curr, path, changes);
|
|
437
|
+
// Compare compositional schemas (oneOf/anyOf/allOf)
|
|
438
|
+
compareSchemaVariants(prev, curr, path, changes, 'oneOf');
|
|
439
|
+
compareSchemaVariants(prev, curr, path, changes, 'anyOf');
|
|
440
|
+
compareSchemaVariants(prev, curr, path, changes, 'allOf');
|
|
441
|
+
// Compare additionalProperties
|
|
442
|
+
compareAdditionalProperties(prev, curr, path, changes);
|
|
342
443
|
}
|
|
343
444
|
/**
|
|
344
445
|
* Compare a single constraint.
|
|
@@ -349,8 +450,8 @@ function compareConstraint(prev, curr, path, field, changes) {
|
|
|
349
450
|
if (prevValue !== currValue) {
|
|
350
451
|
// Determine if breaking
|
|
351
452
|
let breaking = false;
|
|
352
|
-
const isMinConstraint = field === 'minimum' || field === 'minLength';
|
|
353
|
-
const isMaxConstraint = field === 'maximum' || field === 'maxLength';
|
|
453
|
+
const isMinConstraint = field === 'minimum' || field === 'minLength' || field === 'minProperties';
|
|
454
|
+
const isMaxConstraint = field === 'maximum' || field === 'maxLength' || field === 'maxProperties';
|
|
354
455
|
if (isMinConstraint) {
|
|
355
456
|
// Increasing minimum is breaking (more restrictive)
|
|
356
457
|
breaking = currValue !== undefined && (prevValue === undefined || currValue > prevValue);
|
|
@@ -373,6 +474,148 @@ function compareConstraint(prev, curr, path, field, changes) {
|
|
|
373
474
|
});
|
|
374
475
|
}
|
|
375
476
|
}
|
|
477
|
+
/**
|
|
478
|
+
* Compare patternProperties between two schemas.
|
|
479
|
+
*/
|
|
480
|
+
function comparePatternProperties(prev, curr, path, changes) {
|
|
481
|
+
const prevPatterns = prev.patternProperties ?? {};
|
|
482
|
+
const currPatterns = curr.patternProperties ?? {};
|
|
483
|
+
const prevKeys = new Set(Object.keys(prevPatterns));
|
|
484
|
+
const currKeys = new Set(Object.keys(currPatterns));
|
|
485
|
+
for (const key of currKeys) {
|
|
486
|
+
if (!prevKeys.has(key)) {
|
|
487
|
+
changes.push({
|
|
488
|
+
path: `${path}{${key}}`,
|
|
489
|
+
changeType: 'property_added',
|
|
490
|
+
before: undefined,
|
|
491
|
+
after: summarizeProperty(currPatterns[key]),
|
|
492
|
+
breaking: false,
|
|
493
|
+
description: `Pattern property "${key}" added`,
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
for (const key of prevKeys) {
|
|
498
|
+
if (!currKeys.has(key)) {
|
|
499
|
+
changes.push({
|
|
500
|
+
path: `${path}{${key}}`,
|
|
501
|
+
changeType: 'property_removed',
|
|
502
|
+
before: summarizeProperty(prevPatterns[key]),
|
|
503
|
+
after: undefined,
|
|
504
|
+
breaking: true,
|
|
505
|
+
description: `Pattern property "${key}" removed`,
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
for (const key of prevKeys) {
|
|
510
|
+
if (currKeys.has(key)) {
|
|
511
|
+
compareProperties(prevPatterns[key], currPatterns[key], `${path}{${key}}`, changes);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Compare dependentRequired constraints.
|
|
517
|
+
*/
|
|
518
|
+
function compareDependentRequired(prev, curr, path, changes) {
|
|
519
|
+
const prevDeps = prev.dependentRequired ?? {};
|
|
520
|
+
const currDeps = curr.dependentRequired ?? {};
|
|
521
|
+
const keys = new Set([...Object.keys(prevDeps), ...Object.keys(currDeps)]);
|
|
522
|
+
for (const key of keys) {
|
|
523
|
+
const prevReqs = prevDeps[key] ?? [];
|
|
524
|
+
const currReqs = currDeps[key] ?? [];
|
|
525
|
+
if (!arraysEqual(prevReqs, currReqs)) {
|
|
526
|
+
const added = currReqs.filter((req) => !prevReqs.includes(req));
|
|
527
|
+
const removed = prevReqs.filter((req) => !currReqs.includes(req));
|
|
528
|
+
const breaking = added.length > 0;
|
|
529
|
+
changes.push({
|
|
530
|
+
path: `${path}.dependentRequired.${key}`,
|
|
531
|
+
changeType: 'constraint_changed',
|
|
532
|
+
before: prevReqs,
|
|
533
|
+
after: currReqs,
|
|
534
|
+
breaking,
|
|
535
|
+
description: `Dependent required fields for "${key}" changed (${added.length} added, ${removed.length} removed)`,
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Compare conditional schemas (if/then/else).
|
|
542
|
+
*/
|
|
543
|
+
function compareConditionalSchemas(prev, curr, path, changes) {
|
|
544
|
+
const prevIf = prev.if;
|
|
545
|
+
const currIf = curr.if;
|
|
546
|
+
const prevThen = prev.then;
|
|
547
|
+
const currThen = curr.then;
|
|
548
|
+
const prevElse = prev.else;
|
|
549
|
+
const currElse = curr.else;
|
|
550
|
+
if (prevIf || currIf || prevThen || currThen || prevElse || currElse) {
|
|
551
|
+
const prevSig = JSON.stringify(normalizeSchema(prev, 0, new WeakSet(), prev));
|
|
552
|
+
const currSig = JSON.stringify(normalizeSchema(curr, 0, new WeakSet(), curr));
|
|
553
|
+
if (prevSig !== currSig) {
|
|
554
|
+
const breaking = !!currIf && !prevIf;
|
|
555
|
+
changes.push({
|
|
556
|
+
path: `${path}.ifThenElse`,
|
|
557
|
+
changeType: 'constraint_changed',
|
|
558
|
+
before: prevIf ? 'conditional present' : 'none',
|
|
559
|
+
after: currIf ? 'conditional present' : 'none',
|
|
560
|
+
breaking,
|
|
561
|
+
description: 'Conditional schema (if/then/else) changed',
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
/**
|
|
567
|
+
* Compare oneOf/anyOf/allOf variants.
|
|
568
|
+
*/
|
|
569
|
+
function compareSchemaVariants(prev, curr, path, changes, field) {
|
|
570
|
+
const prevVariants = prev[field] ?? [];
|
|
571
|
+
const currVariants = curr[field] ?? [];
|
|
572
|
+
if (prevVariants.length === 0 && currVariants.length === 0) {
|
|
573
|
+
return;
|
|
574
|
+
}
|
|
575
|
+
const prevNormalized = normalizeSchemaArray(prevVariants, 0, new WeakSet(), prev);
|
|
576
|
+
const currNormalized = normalizeSchemaArray(currVariants, 0, new WeakSet(), curr);
|
|
577
|
+
const prevSet = new Set(prevNormalized.map((v) => JSON.stringify(v)));
|
|
578
|
+
const currSet = new Set(currNormalized.map((v) => JSON.stringify(v)));
|
|
579
|
+
const removed = [...prevSet].filter((v) => !currSet.has(v));
|
|
580
|
+
const added = [...currSet].filter((v) => !prevSet.has(v));
|
|
581
|
+
if (removed.length > 0 || added.length > 0) {
|
|
582
|
+
changes.push({
|
|
583
|
+
path: `${path}.${field}`,
|
|
584
|
+
changeType: 'constraint_changed',
|
|
585
|
+
before: `variants:${prevVariants.length}`,
|
|
586
|
+
after: `variants:${currVariants.length}`,
|
|
587
|
+
breaking: removed.length > 0,
|
|
588
|
+
description: `${field} variants changed (${removed.length} removed, ${added.length} added)`,
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Compare additionalProperties between schemas.
|
|
594
|
+
*/
|
|
595
|
+
function compareAdditionalProperties(prev, curr, path, changes) {
|
|
596
|
+
const prevAdditional = prev.additionalProperties;
|
|
597
|
+
const currAdditional = curr.additionalProperties;
|
|
598
|
+
if (prevAdditional === undefined && currAdditional === undefined) {
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
const prevSig = typeof prevAdditional === 'boolean'
|
|
602
|
+
? String(prevAdditional)
|
|
603
|
+
: JSON.stringify(normalizeSchema(prevAdditional ?? {}, 0, new WeakSet(), prev));
|
|
604
|
+
const currSig = typeof currAdditional === 'boolean'
|
|
605
|
+
? String(currAdditional)
|
|
606
|
+
: JSON.stringify(normalizeSchema(currAdditional ?? {}, 0, new WeakSet(), curr));
|
|
607
|
+
if (prevSig !== currSig) {
|
|
608
|
+
const breaking = currAdditional === false || currAdditional === undefined;
|
|
609
|
+
changes.push({
|
|
610
|
+
path: `${path}.additionalProperties`,
|
|
611
|
+
changeType: 'constraint_changed',
|
|
612
|
+
before: prevAdditional ?? 'unspecified',
|
|
613
|
+
after: currAdditional ?? 'unspecified',
|
|
614
|
+
breaking,
|
|
615
|
+
description: 'additionalProperties constraint changed',
|
|
616
|
+
});
|
|
617
|
+
}
|
|
618
|
+
}
|
|
376
619
|
/**
|
|
377
620
|
* Normalize type to string for comparison.
|
|
378
621
|
*/
|
|
@@ -444,7 +687,7 @@ function generateVisualDiff(_previous, _current, changes) {
|
|
|
444
687
|
}
|
|
445
688
|
// Format each path's changes
|
|
446
689
|
for (const [path, pathChanges] of byPath) {
|
|
447
|
-
const marker = pathChanges.some(c => c.breaking) ? '!' : '~';
|
|
690
|
+
const marker = pathChanges.some((c) => c.breaking) ? '!' : '~';
|
|
448
691
|
lines.push(`${marker} ${path}:`);
|
|
449
692
|
for (const change of pathChanges) {
|
|
450
693
|
const prefix = change.breaking ? ' [BREAKING]' : ' [info]';
|
|
@@ -458,7 +701,7 @@ function generateVisualDiff(_previous, _current, changes) {
|
|
|
458
701
|
}
|
|
459
702
|
}
|
|
460
703
|
// Summary
|
|
461
|
-
const breakingCount = changes.filter(c => c.breaking).length;
|
|
704
|
+
const breakingCount = changes.filter((c) => c.breaking).length;
|
|
462
705
|
const nonBreakingCount = changes.length - breakingCount;
|
|
463
706
|
lines.push('');
|
|
464
707
|
lines.push(`Summary: ${breakingCount} breaking, ${nonBreakingCount} non-breaking change(s)`);
|
package/dist/baseline/types.d.ts
CHANGED
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { ToolProfile } from '../interview/types.js';
|
|
5
5
|
import type { ResponseFingerprint, InferredSchema, ErrorPattern } from './response-fingerprint.js';
|
|
6
|
-
import type { BellwetherBaseline,
|
|
6
|
+
import type { BellwetherBaseline, DriftAcceptance, AcceptedDiff } from './baseline-format.js';
|
|
7
7
|
export type { DriftAcceptance, AcceptedDiff };
|
|
8
|
+
export type { BaselineMode } from './baseline-format.js';
|
|
8
9
|
/**
|
|
9
10
|
* Re-export ErrorPattern for use by other modules.
|
|
10
11
|
*/
|
|
@@ -30,7 +31,7 @@ export type ChangeSeverity = 'none' | 'info' | 'warning' | 'breaking';
|
|
|
30
31
|
/**
|
|
31
32
|
* Aspect of tool behavior that changed.
|
|
32
33
|
*/
|
|
33
|
-
export type BehaviorAspect = 'response_format' | 'response_structure' | 'response_schema_evolution' | 'error_handling' | 'error_pattern' | 'security' | 'performance' | 'schema' | 'description';
|
|
34
|
+
export type BehaviorAspect = 'response_format' | 'response_structure' | 'response_schema_evolution' | 'error_handling' | 'error_pattern' | 'security' | 'performance' | 'schema' | 'description' | 'prompt' | 'resource' | 'server' | 'capability';
|
|
34
35
|
/**
|
|
35
36
|
* A single behavioral assertion about a tool.
|
|
36
37
|
*/
|
|
@@ -228,8 +229,14 @@ export interface ToolFingerprint {
|
|
|
228
229
|
name: string;
|
|
229
230
|
description: string;
|
|
230
231
|
schemaHash: string;
|
|
231
|
-
/** Full input schema for the tool (preserved for
|
|
232
|
+
/** Full input schema for the tool (preserved for export/analysis) */
|
|
232
233
|
inputSchema?: Record<string, unknown>;
|
|
234
|
+
/** Hash of observed arguments schema (from actual calls) */
|
|
235
|
+
observedArgsSchemaHash?: string;
|
|
236
|
+
/** Consistency of observed argument schemas (0-1) */
|
|
237
|
+
observedArgsSchemaConsistency?: number;
|
|
238
|
+
/** Number of observed schema variations */
|
|
239
|
+
observedArgsSchemaVariations?: number;
|
|
233
240
|
assertions: BehavioralAssertion[];
|
|
234
241
|
securityNotes: string[];
|
|
235
242
|
limitations: string[];
|
|
@@ -280,10 +287,6 @@ export interface SemanticInferenceRecord {
|
|
|
280
287
|
/** Confidence level (0-1) */
|
|
281
288
|
confidence: number;
|
|
282
289
|
}
|
|
283
|
-
/**
|
|
284
|
-
* Server fingerprint for baseline comparison.
|
|
285
|
-
*/
|
|
286
|
-
export type BaselineMode = CloudBaselineMode;
|
|
287
290
|
/**
|
|
288
291
|
* Baseline for an MCP server.
|
|
289
292
|
*/
|
|
@@ -48,6 +48,8 @@ export interface CacheConfig {
|
|
|
48
48
|
maxSizeBytes?: number;
|
|
49
49
|
/** Whether to enable cache (default: true) */
|
|
50
50
|
enabled?: boolean;
|
|
51
|
+
/** Optional cache directory for persistence */
|
|
52
|
+
dir?: string;
|
|
51
53
|
}
|
|
52
54
|
/**
|
|
53
55
|
* In-memory content-addressable cache.
|
|
@@ -57,6 +59,7 @@ export declare class ResponseCache {
|
|
|
57
59
|
private config;
|
|
58
60
|
private stats;
|
|
59
61
|
private totalSizeBytes;
|
|
62
|
+
private cacheDir?;
|
|
60
63
|
constructor(config?: CacheConfig);
|
|
61
64
|
/**
|
|
62
65
|
* Generate a cache key from input data.
|
|
@@ -108,6 +111,11 @@ export declare class ResponseCache {
|
|
|
108
111
|
* Estimate the size of a value in bytes.
|
|
109
112
|
*/
|
|
110
113
|
private estimateSize;
|
|
114
|
+
private ensureCacheDir;
|
|
115
|
+
private getCachePath;
|
|
116
|
+
private saveToDisk;
|
|
117
|
+
private loadFromDisk;
|
|
118
|
+
private deleteFromDisk;
|
|
111
119
|
}
|
|
112
120
|
/**
|
|
113
121
|
* Specialized cache for tool responses.
|