@dotsetlabs/bellwether 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/README.md +9 -0
  3. package/dist/auth/credentials.js +2 -0
  4. package/dist/baseline/accessors.js +12 -0
  5. package/dist/baseline/baseline-format.d.ts +48 -0
  6. package/dist/baseline/comparator.js +263 -20
  7. package/dist/baseline/converter.js +52 -4
  8. package/dist/baseline/response-fingerprint.js +1 -1
  9. package/dist/baseline/saver.js +34 -0
  10. package/dist/baseline/types.d.ts +21 -1
  11. package/dist/cache/response-cache.js +9 -2
  12. package/dist/cli/commands/baseline.js +70 -35
  13. package/dist/cli/commands/check.js +48 -9
  14. package/dist/cli/commands/explore.js +36 -3
  15. package/dist/cli/commands/init.js +10 -7
  16. package/dist/cli/commands/watch.js +5 -5
  17. package/dist/config/loader.js +2 -2
  18. package/dist/constants/core.d.ts +1 -1
  19. package/dist/constants/core.js +1 -1
  20. package/dist/discovery/discovery.js +88 -14
  21. package/dist/discovery/types.d.ts +5 -1
  22. package/dist/docs/agents.js +138 -50
  23. package/dist/docs/contract.js +63 -1
  24. package/dist/errors/retry.js +11 -5
  25. package/dist/interview/rate-limiter.js +7 -3
  26. package/dist/llm/anthropic.js +14 -4
  27. package/dist/llm/fallback.d.ts +1 -0
  28. package/dist/llm/fallback.js +7 -1
  29. package/dist/llm/openai.js +15 -4
  30. package/dist/protocol/index.d.ts +2 -0
  31. package/dist/protocol/index.js +2 -0
  32. package/dist/protocol/version-registry.d.ts +66 -0
  33. package/dist/protocol/version-registry.js +159 -0
  34. package/dist/transport/http-transport.d.ts +11 -1
  35. package/dist/transport/http-transport.js +21 -2
  36. package/dist/transport/mcp-client.d.ts +29 -1
  37. package/dist/transport/mcp-client.js +92 -7
  38. package/dist/transport/sse-transport.js +5 -4
  39. package/dist/transport/types.d.ts +134 -1
  40. package/dist/utils/concurrency.d.ts +2 -0
  41. package/dist/utils/concurrency.js +9 -2
  42. package/dist/utils/markdown.js +13 -18
  43. package/dist/utils/timeout.js +2 -1
  44. package/dist/version.js +1 -1
  45. package/man/bellwether.1 +1 -1
  46. package/man/bellwether.1.md +2 -2
  47. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -7,6 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.0.1] - 2026-02-07
11
+
12
+ ### Added
13
+
14
+ - **MCP protocol version gating**: New `src/protocol/` module with version-to-feature-flag mapping
15
+ - Supports MCP protocol versions: `2024-11-05`, `2025-03-26`, `2025-06-18`, `2025-11-25`
16
+ - `MCPFeatureFlags` interface with 9 feature flags (`toolAnnotations`, `entityTitles`, `completions`, `resourceAnnotations`, `structuredOutput`, `serverInstructions`, `httpVersionHeader`, `tasks`, `icons`)
17
+ - `getSharedFeatureFlags(v1, v2)` computes AND-intersection for cross-version baseline comparison
18
+ - All version-specific fields in baselines are now gated by protocol version during conversion and comparison
19
+ - **Version-gated drift detection**: Comparator now detects changes in version-specific fields
20
+ - Tool annotations (readOnlyHint, destructiveHint, idempotentHint, openWorldHint)
21
+ - Entity titles (tool, prompt, resource, and resource template titles)
22
+ - Output schema and structured output changes
23
+ - Execution/task support changes
24
+ - Server instructions changes
25
+ - **MCPClient protocol version tracking**: Client stores negotiated protocol version after `initialize()`, exposes via getters
26
+ - **Mock server protocol version support**: Mock MCP server now supports `MOCK_PROTOCOL_VERSION` env var for testing
27
+
28
+ ### Fixed
29
+
30
+ - **20 production-blocking bugs across all layers** (`4717ca1`):
31
+ - Transport: HTTP transport URL construction, SSE error event handling, MCP client error propagation
32
+ - Discovery: ResourceTemplate type handling, discovery error handling
33
+ - Baseline: Converter version-gated field handling, saver hash calculation, comparator severity logic
34
+ - CLI: Check command exit code handling, explore command cleanup, baseline command error paths
35
+ - Config: Environment variable expansion edge cases
36
+ - Docs: Contract and agents generator error handling
37
+ - **Protocol version gating gaps causing false negatives and data loss** (`dce73ed`):
38
+ - Fixed tool title comparison using wrong feature flag (`toolAnnotations` instead of `entityTitles`)
39
+ - Fixed tool title comparison condition (AND → OR) to detect added/removed titles
40
+ - Added missing `execution` and `baselineP99Ms` fields to `ToolFingerprint` type
41
+ - Added missing fields (`title`, `outputSchema`, `outputSchemaHash`, `annotations`, `execution`, `baselineP99Ms`) to `toToolCapability()` accessor — prevents data loss during incremental check merges
42
+ - Added `execution` and `baselineP99Ms` mapping to `getToolFingerprints()` accessor
43
+ - Added prompt title comparison gated by `entityTitles` flag
44
+ - Added resource title comparison gated by `entityTitles` flag
45
+ - Added resource template title comparison gated by `entityTitles` flag
46
+ - Added execution/task support comparison gated by `tasks` flag
47
+ - Added server instructions comparison gated by `serverInstructions` flag
48
+ - Gated resource template `title` in converter by `entityTitles` flag
49
+ - **Clean JSON output from baseline commands** (`7aab450`):
50
+ - `baseline compare --format json` no longer appends summary text after JSON object
51
+ - `baseline diff --format json` no longer prepends header or appends summary text around JSON object
52
+ - JSON output is now machine-parseable without text contamination
53
+
10
54
  ## [2.0.0] - 2026-02-04
11
55
 
12
56
  ### Breaking Changes
package/README.md CHANGED
@@ -75,6 +75,15 @@ jobs:
75
75
  | Parameter renamed | `path` to `file_path` | Breaking |
76
76
  | Description changed | Tool help text updated | Warning |
77
77
  | Performance regression | Latency increased >10% | Warning |
78
+ | Tool annotations changed | `readOnlyHint` flipped to `false` | Warning |
79
+ | Output schema changed | Return type structure modified | Warning |
80
+ | Entity title changed | Tool/prompt/resource title updated | Info |
81
+ | Task support changed | Execution mode switched to `async` | Warning |
82
+ | Server instructions changed | Server-level instructions updated | Info |
83
+ | Prompt added/removed | Prompt template appears or disappears | Breaking |
84
+ | Resource changed | Resource URI or MIME type modified | Warning |
85
+
86
+ Comparisons are **protocol-version-aware** — version-specific fields (annotations, titles, output schemas, etc.) are only compared when both baselines support the relevant MCP protocol version.
78
87
 
79
88
  ## Commands
80
89
 
@@ -58,6 +58,8 @@ function readEnvFile(filePath, envVar, options) {
58
58
  if (decrypted) {
59
59
  return decrypted;
60
60
  }
61
+ // Warn about decryption failure so users know their credential exists but can't be decrypted
62
+ console.warn(`[bellwether] Encrypted credential found for ${envVar} but decryption failed. Check your encryption key.`);
61
63
  return undefined;
62
64
  }
63
65
  if (value) {
@@ -57,9 +57,15 @@ export function toToolCapability(tool) {
57
57
  errorPatterns: tool.errorPatterns,
58
58
  baselineP50Ms: tool.baselineP50Ms,
59
59
  baselineP95Ms: tool.baselineP95Ms,
60
+ baselineP99Ms: tool.baselineP99Ms,
60
61
  baselineSuccessRate: tool.baselineSuccessRate,
61
62
  performanceConfidence: tool.performanceConfidence,
62
63
  securityFingerprint: tool.securityFingerprint,
64
+ title: tool.title,
65
+ outputSchema: tool.outputSchema,
66
+ outputSchemaHash: tool.outputSchemaHash,
67
+ annotations: tool.annotations,
68
+ execution: tool.execution,
63
69
  };
64
70
  }
65
71
  export function getToolFingerprints(baseline) {
@@ -93,6 +99,12 @@ export function getToolFingerprints(baseline) {
93
99
  baselineSuccessRate: tool.baselineSuccessRate,
94
100
  performanceConfidence: tool.performanceConfidence,
95
101
  securityFingerprint: tool.securityFingerprint,
102
+ title: tool.title,
103
+ outputSchema: tool.outputSchema,
104
+ outputSchemaHash: tool.outputSchemaHash,
105
+ annotations: tool.annotations,
106
+ execution: tool.execution,
107
+ baselineP99Ms: tool.baselineP99Ms,
96
108
  };
97
109
  });
98
110
  if (fingerprints.length > 0) {
@@ -72,6 +72,8 @@ export interface BaselineServerFingerprint {
72
72
  protocolVersion: string;
73
73
  /** Available capabilities */
74
74
  capabilities: string[];
75
+ /** Server-provided instructions (MCP 2025-11-25) */
76
+ instructions?: string;
75
77
  }
76
78
  /**
77
79
  * Tool capability from discovery.
@@ -85,6 +87,24 @@ export interface ToolCapability {
85
87
  inputSchema: Record<string, unknown>;
86
88
  /** Hash of the schema for change detection */
87
89
  schemaHash: string;
90
+ /** Human-readable title for the tool (MCP 2025-11-25) */
91
+ title?: string;
92
+ /** JSON Schema for the tool's output (MCP 2025-11-25 structured content) */
93
+ outputSchema?: Record<string, unknown>;
94
+ /** Hash of the output schema for drift detection */
95
+ outputSchemaHash?: string;
96
+ /** Behavioral annotations/hints (MCP 2025-11-25) */
97
+ annotations?: {
98
+ title?: string;
99
+ readOnlyHint?: boolean;
100
+ destructiveHint?: boolean;
101
+ idempotentHint?: boolean;
102
+ openWorldHint?: boolean;
103
+ };
104
+ /** Task execution configuration (MCP 2025-11-25) */
105
+ execution?: {
106
+ taskSupport?: string;
107
+ };
88
108
  /** Hash of observed arguments schema (from actual calls) */
89
109
  observedArgsSchemaHash?: string;
90
110
  /** Consistency of observed argument schemas (0-1) */
@@ -137,6 +157,31 @@ export interface ResourceCapability {
137
157
  description?: string;
138
158
  /** MIME type */
139
159
  mimeType?: string;
160
+ /** Human-readable title (MCP 2025-11-25) */
161
+ title?: string;
162
+ /** Resource annotations (MCP 2025-11-25) */
163
+ annotations?: {
164
+ audience?: string[];
165
+ priority?: number;
166
+ lastModified?: string;
167
+ };
168
+ /** Resource size in bytes (MCP 2025-11-25) */
169
+ size?: number;
170
+ }
171
+ /**
172
+ * Resource template capability from discovery (MCP 2025-11-25).
173
+ */
174
+ export interface ResourceTemplateCapability {
175
+ /** URI template (RFC 6570) */
176
+ uriTemplate: string;
177
+ /** Template name */
178
+ name: string;
179
+ /** Human-readable title */
180
+ title?: string;
181
+ /** Template description */
182
+ description?: string;
183
+ /** Expected MIME type */
184
+ mimeType?: string;
140
185
  }
141
186
  /**
142
187
  * Prompt capability from discovery.
@@ -146,6 +191,8 @@ export interface PromptCapability {
146
191
  name: string;
147
192
  /** Prompt description */
148
193
  description?: string;
194
+ /** Human-readable title (MCP 2025-11-25) */
195
+ title?: string;
149
196
  /** Arguments the prompt accepts */
150
197
  arguments?: Array<{
151
198
  name: string;
@@ -265,6 +312,7 @@ export interface BellwetherBaseline {
265
312
  capabilities: {
266
313
  tools: ToolCapability[];
267
314
  resources?: ResourceCapability[];
315
+ resourceTemplates?: ResourceTemplateCapability[];
268
316
  prompts?: PromptCapability[];
269
317
  };
270
318
  /** Interview results by persona */
@@ -18,6 +18,7 @@ import { compareSchemaEvolution } from './response-schema-tracker.js';
18
18
  import { checkVersionCompatibility, BaselineVersionError, parseVersion, areVersionsCompatible, getCompatibilityWarning, } from './version.js';
19
19
  import { compareSchemas, computeSchemaHash } from './schema-compare.js';
20
20
  import { PERFORMANCE_TRACKING } from '../constants.js';
21
+ import { getSharedFeatureFlags } from '../protocol/index.js';
21
22
  import { hasReliableConfidence } from './performance-tracker.js';
22
23
  import { compareDocumentationScores, scoreDocumentation } from './documentation-scorer.js';
23
24
  /**
@@ -53,6 +54,8 @@ export function compareBaselines(previous, current, options = {}) {
53
54
  `Recreate the older baseline with the current CLI version, ` +
54
55
  `or use --ignore-version-mismatch to force comparison (results may be incorrect).`, v1.raw, v2.raw);
55
56
  }
57
+ // Compute shared feature flags from both baselines' protocol versions
58
+ const sharedFeatures = getSharedFeatureFlags(previous.server.protocolVersion, current.server.protocolVersion);
56
59
  const previousTools = getToolFingerprints(previous);
57
60
  const currentTools = getToolFingerprints(current);
58
61
  const previousToolMap = new Map(previousTools.map((t) => [t.name, t]));
@@ -79,7 +82,7 @@ export function compareBaselines(previous, current, options = {}) {
79
82
  toolsAdded.push(name);
80
83
  continue;
81
84
  }
82
- const toolDiff = compareTool(previousTool, currentTool, options);
85
+ const toolDiff = compareTool(previousTool, currentTool, options, sharedFeatures);
83
86
  if (toolDiff.changes.length > 0 ||
84
87
  toolDiff.schemaChanged ||
85
88
  toolDiff.descriptionChanged ||
@@ -89,11 +92,12 @@ export function compareBaselines(previous, current, options = {}) {
89
92
  behaviorChanges.push(...toolDiff.changes);
90
93
  }
91
94
  }
92
- // Compare prompts and resources
93
- behaviorChanges.push(...comparePrompts(previous.capabilities.prompts, current.capabilities.prompts));
94
- behaviorChanges.push(...compareResources(previous.capabilities.resources, current.capabilities.resources));
95
+ // Compare prompts, resources, and resource templates
96
+ behaviorChanges.push(...comparePrompts(previous.capabilities.prompts, current.capabilities.prompts, sharedFeatures));
97
+ behaviorChanges.push(...compareResources(previous.capabilities.resources, current.capabilities.resources, sharedFeatures));
98
+ behaviorChanges.push(...compareResourceTemplates(previous.capabilities.resourceTemplates, current.capabilities.resourceTemplates, sharedFeatures));
95
99
  // Compare server metadata and capabilities
96
- behaviorChanges.push(...compareServerInfo(previous.server, current.server));
100
+ behaviorChanges.push(...compareServerInfo(previous.server, current.server, sharedFeatures));
97
101
  // Compare workflows
98
102
  const workflowChanges = compareWorkflows(previous.workflows || [], current.workflows || []);
99
103
  behaviorChanges.push(...workflowChanges);
@@ -127,7 +131,7 @@ export function compareBaselines(previous, current, options = {}) {
127
131
  documentationScoreReport,
128
132
  };
129
133
  }
130
- function compareTool(previous, current, options) {
134
+ function compareTool(previous, current, options, features) {
131
135
  const changes = [];
132
136
  let schemaChanged = false;
133
137
  let descriptionChanged = false;
@@ -309,6 +313,117 @@ function compareTool(previous, current, options) {
309
313
  }
310
314
  }
311
315
  }
316
+ // Compare tool title — only when both versions support entity titles
317
+ if (features.entityTitles) {
318
+ if (previous.title !== current.title &&
319
+ (previous.title !== undefined || current.title !== undefined)) {
320
+ changes.push({
321
+ tool: current.name,
322
+ aspect: 'tool_annotations',
323
+ before: previous.title ?? 'none',
324
+ after: current.title ?? 'none',
325
+ severity: 'info',
326
+ description: `Tool "${current.name}" title changed`,
327
+ });
328
+ }
329
+ }
330
+ // Compare tool annotations — only when both versions support them
331
+ if (features.toolAnnotations) {
332
+ // Compare annotations
333
+ const prevAnno = previous.annotations;
334
+ const currAnno = current.annotations;
335
+ if (prevAnno || currAnno) {
336
+ if (prevAnno?.readOnlyHint !== currAnno?.readOnlyHint) {
337
+ // readOnlyHint changing (e.g., tool becoming non-read-only) is breaking
338
+ changes.push({
339
+ tool: current.name,
340
+ aspect: 'tool_annotations',
341
+ before: String(prevAnno?.readOnlyHint ?? 'unset'),
342
+ after: String(currAnno?.readOnlyHint ?? 'unset'),
343
+ severity: 'breaking',
344
+ description: `Tool "${current.name}" readOnlyHint changed`,
345
+ });
346
+ }
347
+ if (prevAnno?.destructiveHint !== currAnno?.destructiveHint) {
348
+ changes.push({
349
+ tool: current.name,
350
+ aspect: 'tool_annotations',
351
+ before: String(prevAnno?.destructiveHint ?? 'unset'),
352
+ after: String(currAnno?.destructiveHint ?? 'unset'),
353
+ severity: 'warning',
354
+ description: `Tool "${current.name}" destructiveHint changed`,
355
+ });
356
+ }
357
+ if (prevAnno?.idempotentHint !== currAnno?.idempotentHint) {
358
+ changes.push({
359
+ tool: current.name,
360
+ aspect: 'tool_annotations',
361
+ before: String(prevAnno?.idempotentHint ?? 'unset'),
362
+ after: String(currAnno?.idempotentHint ?? 'unset'),
363
+ severity: 'warning',
364
+ description: `Tool "${current.name}" idempotentHint changed`,
365
+ });
366
+ }
367
+ if (prevAnno?.openWorldHint !== currAnno?.openWorldHint) {
368
+ changes.push({
369
+ tool: current.name,
370
+ aspect: 'tool_annotations',
371
+ before: String(prevAnno?.openWorldHint ?? 'unset'),
372
+ after: String(currAnno?.openWorldHint ?? 'unset'),
373
+ severity: 'info',
374
+ description: `Tool "${current.name}" openWorldHint changed`,
375
+ });
376
+ }
377
+ }
378
+ }
379
+ // Compare output schema — only when both versions support structured output
380
+ if (features.structuredOutput && previous.outputSchemaHash !== current.outputSchemaHash) {
381
+ if (!previous.outputSchemaHash && current.outputSchemaHash) {
382
+ changes.push({
383
+ tool: current.name,
384
+ aspect: 'output_schema',
385
+ before: 'none',
386
+ after: `outputSchema: ${current.outputSchemaHash}`,
387
+ severity: 'warning',
388
+ description: `Tool "${current.name}" outputSchema added`,
389
+ });
390
+ }
391
+ else if (previous.outputSchemaHash && !current.outputSchemaHash) {
392
+ changes.push({
393
+ tool: current.name,
394
+ aspect: 'output_schema',
395
+ before: `outputSchema: ${previous.outputSchemaHash}`,
396
+ after: 'none',
397
+ severity: 'warning',
398
+ description: `Tool "${current.name}" outputSchema removed`,
399
+ });
400
+ }
401
+ else {
402
+ changes.push({
403
+ tool: current.name,
404
+ aspect: 'output_schema',
405
+ before: `outputSchema: ${previous.outputSchemaHash}`,
406
+ after: `outputSchema: ${current.outputSchemaHash}`,
407
+ severity: 'breaking',
408
+ description: `Tool "${current.name}" outputSchema changed`,
409
+ });
410
+ }
411
+ }
412
+ // Compare execution/task support — only when both versions support tasks
413
+ if (features.tasks) {
414
+ const prevExec = previous.execution?.taskSupport;
415
+ const currExec = current.execution?.taskSupport;
416
+ if (prevExec !== currExec && (prevExec !== undefined || currExec !== undefined)) {
417
+ changes.push({
418
+ tool: current.name,
419
+ aspect: 'tool_annotations',
420
+ before: prevExec ?? 'none',
421
+ after: currExec ?? 'none',
422
+ severity: 'warning',
423
+ description: `Tool "${current.name}" task support changed`,
424
+ });
425
+ }
426
+ }
312
427
  return {
313
428
  tool: current.name,
314
429
  changes,
@@ -321,7 +436,7 @@ function compareTool(previous, current, options) {
321
436
  schemaEvolutionDiff,
322
437
  };
323
438
  }
324
- function comparePrompts(previous, current) {
439
+ function comparePrompts(previous, current, features) {
325
440
  const changes = [];
326
441
  const prevMap = new Map((previous ?? []).map((p) => [p.name, p]));
327
442
  const currMap = new Map((current ?? []).map((p) => [p.name, p]));
@@ -348,6 +463,18 @@ function comparePrompts(previous, current) {
348
463
  description: `Prompt "${name}" description changed`,
349
464
  });
350
465
  }
466
+ if (features?.entityTitles &&
467
+ prevPrompt.title !== currPrompt.title &&
468
+ (prevPrompt.title !== undefined || currPrompt.title !== undefined)) {
469
+ changes.push({
470
+ tool: `prompt:${name}`,
471
+ aspect: 'prompt',
472
+ before: prevPrompt.title ?? 'none',
473
+ after: currPrompt.title ?? 'none',
474
+ severity: 'info',
475
+ description: `Prompt "${name}" title changed`,
476
+ });
477
+ }
351
478
  const prevArgs = prevPrompt.arguments ?? [];
352
479
  const currArgs = currPrompt.arguments ?? [];
353
480
  const prevArgMap = new Map(prevArgs.map((a) => [a.name, a]));
@@ -413,7 +540,7 @@ function comparePrompts(previous, current) {
413
540
  }
414
541
  return changes;
415
542
  }
416
- function compareResources(previous, current) {
543
+ function compareResources(previous, current, features) {
417
544
  const changes = [];
418
545
  const prevMap = new Map((previous ?? []).map((r) => [r.uri, r]));
419
546
  const currMap = new Map((current ?? []).map((r) => [r.uri, r]));
@@ -460,6 +587,45 @@ function compareResources(previous, current) {
460
587
  description: `Resource "${uri}" mime type changed`,
461
588
  });
462
589
  }
590
+ // Compare resource title — only when both versions support entity titles
591
+ if (features?.entityTitles &&
592
+ prevResource.title !== currResource.title &&
593
+ (prevResource.title !== undefined || currResource.title !== undefined)) {
594
+ changes.push({
595
+ tool: `resource:${currResource.name ?? uri}`,
596
+ aspect: 'resource',
597
+ before: prevResource.title ?? 'none',
598
+ after: currResource.title ?? 'none',
599
+ severity: 'info',
600
+ description: `Resource "${uri}" title changed`,
601
+ });
602
+ }
603
+ // Compare resource annotations — only when both versions support them
604
+ if (features?.resourceAnnotations) {
605
+ const prevAudience = prevResource.annotations?.audience?.join(',');
606
+ const currAudience = currResource.annotations?.audience?.join(',');
607
+ if (prevAudience !== currAudience && (prevAudience || currAudience)) {
608
+ changes.push({
609
+ tool: `resource:${currResource.name ?? uri}`,
610
+ aspect: 'resource_annotations',
611
+ before: prevAudience ?? 'none',
612
+ after: currAudience ?? 'none',
613
+ severity: 'warning',
614
+ description: `Resource "${uri}" audience annotation changed`,
615
+ });
616
+ }
617
+ if (prevResource.size !== currResource.size &&
618
+ (prevResource.size !== undefined || currResource.size !== undefined)) {
619
+ changes.push({
620
+ tool: `resource:${currResource.name ?? uri}`,
621
+ aspect: 'resource_annotations',
622
+ before: prevResource.size !== undefined ? String(prevResource.size) : 'unknown',
623
+ after: currResource.size !== undefined ? String(currResource.size) : 'unknown',
624
+ severity: 'info',
625
+ description: `Resource "${uri}" size changed`,
626
+ });
627
+ }
628
+ }
463
629
  }
464
630
  for (const [uri, prevResource] of prevMap) {
465
631
  if (!currMap.has(uri)) {
@@ -475,7 +641,71 @@ function compareResources(previous, current) {
475
641
  }
476
642
  return changes;
477
643
  }
478
- function compareServerInfo(previous, current) {
644
+ function compareResourceTemplates(previous, current, features) {
645
+ const changes = [];
646
+ const prevMap = new Map((previous ?? []).map((t) => [t.uriTemplate, t]));
647
+ const currMap = new Map((current ?? []).map((t) => [t.uriTemplate, t]));
648
+ for (const [uriTemplate, currTemplate] of currMap) {
649
+ const prevTemplate = prevMap.get(uriTemplate);
650
+ if (!prevTemplate) {
651
+ changes.push({
652
+ tool: `resource_template:${currTemplate.name ?? uriTemplate}`,
653
+ aspect: 'resource_template',
654
+ before: 'absent',
655
+ after: 'present',
656
+ severity: 'info',
657
+ description: `Resource template "${uriTemplate}" added`,
658
+ });
659
+ continue;
660
+ }
661
+ if (prevTemplate.description !== currTemplate.description) {
662
+ changes.push({
663
+ tool: `resource_template:${currTemplate.name ?? uriTemplate}`,
664
+ aspect: 'resource_template',
665
+ before: prevTemplate.description ?? 'none',
666
+ after: currTemplate.description ?? 'none',
667
+ severity: 'info',
668
+ description: `Resource template "${uriTemplate}" description changed`,
669
+ });
670
+ }
671
+ if (prevTemplate.mimeType !== currTemplate.mimeType) {
672
+ changes.push({
673
+ tool: `resource_template:${currTemplate.name ?? uriTemplate}`,
674
+ aspect: 'resource_template',
675
+ before: prevTemplate.mimeType ?? 'none',
676
+ after: currTemplate.mimeType ?? 'none',
677
+ severity: 'info',
678
+ description: `Resource template "${uriTemplate}" mime type changed`,
679
+ });
680
+ }
681
+ if (features?.entityTitles &&
682
+ prevTemplate.title !== currTemplate.title &&
683
+ (prevTemplate.title !== undefined || currTemplate.title !== undefined)) {
684
+ changes.push({
685
+ tool: `resource_template:${currTemplate.name ?? uriTemplate}`,
686
+ aspect: 'resource_template',
687
+ before: prevTemplate.title ?? 'none',
688
+ after: currTemplate.title ?? 'none',
689
+ severity: 'info',
690
+ description: `Resource template "${uriTemplate}" title changed`,
691
+ });
692
+ }
693
+ }
694
+ for (const [uriTemplate, prevTemplate] of prevMap) {
695
+ if (!currMap.has(uriTemplate)) {
696
+ changes.push({
697
+ tool: `resource_template:${prevTemplate.name ?? uriTemplate}`,
698
+ aspect: 'resource_template',
699
+ before: 'present',
700
+ after: 'absent',
701
+ severity: 'breaking',
702
+ description: `Resource template "${uriTemplate}" removed`,
703
+ });
704
+ }
705
+ }
706
+ return changes;
707
+ }
708
+ function compareServerInfo(previous, current, features) {
479
709
  const changes = [];
480
710
  if (previous.name !== current.name) {
481
711
  changes.push({
@@ -498,20 +728,41 @@ function compareServerInfo(previous, current) {
498
728
  });
499
729
  }
500
730
  if (previous.protocolVersion !== current.protocolVersion) {
501
- const breaking = isMajorVersionChange(previous.protocolVersion, current.protocolVersion);
731
+ // Protocol version change is always warning severity.
732
+ // The version registry handles feature gating — the version change itself
733
+ // is informational drift, not a breaking change.
502
734
  changes.push({
503
735
  tool: 'server',
504
736
  aspect: 'server',
505
737
  before: previous.protocolVersion,
506
738
  after: current.protocolVersion,
507
- severity: breaking ? 'breaking' : 'warning',
508
- description: 'Protocol version changed',
739
+ severity: 'warning',
740
+ description: `Protocol version changed from ${previous.protocolVersion} to ${current.protocolVersion}`,
509
741
  });
510
742
  }
743
+ // Compare server instructions — only when both versions support them
744
+ if (features?.serverInstructions) {
745
+ if (previous.instructions !== current.instructions &&
746
+ (previous.instructions !== undefined || current.instructions !== undefined)) {
747
+ changes.push({
748
+ tool: 'server',
749
+ aspect: 'server',
750
+ before: previous.instructions ? `"${previous.instructions.slice(0, 50)}..."` : 'none',
751
+ after: current.instructions ? `"${current.instructions.slice(0, 50)}..."` : 'none',
752
+ severity: 'info',
753
+ description: 'Server instructions changed',
754
+ });
755
+ }
756
+ }
511
757
  const prevCaps = new Set(previous.capabilities);
512
758
  const currCaps = new Set(current.capabilities);
513
759
  for (const cap of prevCaps) {
514
760
  if (!currCaps.has(cap)) {
761
+ // Skip capabilities that are version-gated and not in the shared feature set
762
+ if (cap === 'completions' && !features?.completions)
763
+ continue;
764
+ if (cap === 'tasks' && !features?.tasks)
765
+ continue;
515
766
  changes.push({
516
767
  tool: 'server',
517
768
  aspect: 'capability',
@@ -536,14 +787,6 @@ function compareServerInfo(previous, current) {
536
787
  }
537
788
  return changes;
538
789
  }
539
- function isMajorVersionChange(previous, current) {
540
- const prevMajor = parseInt(previous.split('.')[0] ?? '0', 10);
541
- const currMajor = parseInt(current.split('.')[0] ?? '0', 10);
542
- if (Number.isNaN(prevMajor) || Number.isNaN(currMajor)) {
543
- return previous !== current;
544
- }
545
- return prevMajor !== currMajor;
546
- }
547
790
  function getDeclaredSchemaHash(tool) {
548
791
  if (tool.inputSchema && Object.keys(tool.inputSchema).length > 0) {
549
792
  return computeSchemaHash(tool.inputSchema);