@mcpspec/core 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
88
88
 
89
89
  - `SecurityScanner` — Orchestrates security audits
90
90
  - `ScanConfig` — Safety controls and mode filtering
91
- - Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`
91
+ - Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`, `ToolPoisoningRule`, `ExcessiveAgencyRule`
92
92
  - `getSafePayloads`, `getPlatformPayloads`, `getPayloadsForMode` — Payload management
93
93
 
94
94
  ### Performance
@@ -105,9 +105,15 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
105
105
 
106
106
  ### Scoring
107
107
 
108
- - `MCPScoreCalculator` — 0–100 quality score across 5 categories
108
+ - `MCPScoreCalculator` — 0–100 quality score across 5 categories; schema quality uses opinionated linting (property types, descriptions, constraints, naming conventions)
109
109
  - `BadgeGenerator` — shields.io-style SVG badges
110
110
 
111
+ ### Recording & Replay
112
+
113
+ - `RecordingStore` — Save, load, list, and delete session recordings
114
+ - `RecordingReplayer` — Replay recorded steps against a live server
115
+ - `RecordingDiffer` — Diff original recording vs replayed results (matched/changed/added/removed)
116
+
111
117
  ### Utilities
112
118
 
113
119
  - `loadYamlSafely` — FAILSAFE_SCHEMA YAML parsing
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as _mcpspec_shared from '@mcpspec/shared';
2
- import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore } from '@mcpspec/shared';
2
+ import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore, Recording, RecordingStep, RecordingDiff } from '@mcpspec/shared';
3
3
  import { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
4
4
  import { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
5
5
 
@@ -457,6 +457,22 @@ declare class InformationDisclosureRule implements SecurityRule {
457
457
  private getFirstParam;
458
458
  }
459
459
 
460
+ declare class ToolPoisoningRule implements SecurityRule {
461
+ readonly id = "tool-poisoning";
462
+ readonly name = "Tool Poisoning";
463
+ readonly description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
464
+ scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
465
+ }
466
+
467
+ declare class ExcessiveAgencyRule implements SecurityRule {
468
+ readonly id = "excessive-agency";
469
+ readonly name = "Excessive Agency";
470
+ readonly description = "Detects tools with overly broad permissions or missing safety controls";
471
+ scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
472
+ private getParamNames;
473
+ private getParamDescriptions;
474
+ }
475
+
460
476
  interface PayloadSet {
461
477
  category: string;
462
478
  label: string;
@@ -534,6 +550,8 @@ declare class MCPScoreCalculator {
534
550
  calculate(client: MCPClientInterface, progress?: ScoreProgress): Promise<MCPScore>;
535
551
  private scoreDocumentation;
536
552
  private scoreSchemaQuality;
553
+ /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
554
+ private scoreToolSchema;
537
555
  private scoreErrorHandling;
538
556
  private scoreResponsiveness;
539
557
  private scoreSecurity;
@@ -544,4 +562,33 @@ declare class BadgeGenerator {
544
562
  getColor(score: number): string;
545
563
  }
546
564
 
547
- export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DANGEROUS_TOOL_PATTERNS, DocGenerator, type DocGeneratorOptions, type DryRunResult, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
565
+ declare class RecordingStore {
566
+ private basePath;
567
+ constructor(basePath?: string);
568
+ save(name: string, recording: Recording): string;
569
+ load(name: string): Recording | null;
570
+ list(): string[];
571
+ delete(name: string): boolean;
572
+ private getFilePath;
573
+ private ensureDir;
574
+ }
575
+
576
+ interface ReplayProgress {
577
+ onStepStart?: (index: number, step: RecordingStep) => void;
578
+ onStepComplete?: (index: number, replayed: RecordingStep) => void;
579
+ }
580
+ interface ReplayResult {
581
+ originalRecording: Recording;
582
+ replayedSteps: RecordingStep[];
583
+ replayedAt: string;
584
+ }
585
+ declare class RecordingReplayer {
586
+ replay(recording: Recording, client: MCPClientInterface, progress?: ReplayProgress): Promise<ReplayResult>;
587
+ }
588
+
589
+ declare class RecordingDiffer {
590
+ diff(recording: Recording, replayedSteps: RecordingStep[], replayedAt: string): RecordingDiff;
591
+ private describeChange;
592
+ }
593
+
594
+ export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DANGEROUS_TOOL_PATTERNS, DocGenerator, type DocGeneratorOptions, type DryRunResult, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, ExcessiveAgencyRule, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, RecordingDiffer, RecordingReplayer, RecordingStore, type ReplayProgress, type ReplayResult, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, ToolPoisoningRule, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
package/dist/index.js CHANGED
@@ -2078,7 +2078,9 @@ var SEVERITY_ORDER = ["info", "low", "medium", "high", "critical"];
2078
2078
  var PASSIVE_RULES = [
2079
2079
  "path-traversal",
2080
2080
  "input-validation",
2081
- "information-disclosure"
2081
+ "information-disclosure",
2082
+ "tool-poisoning",
2083
+ "excessive-agency"
2082
2084
  ];
2083
2085
  var ACTIVE_RULES = [
2084
2086
  ...PASSIVE_RULES,
@@ -2140,7 +2142,7 @@ var ScanConfig = class {
2140
2142
  };
2141
2143
 
2142
2144
  // src/security/security-scanner.ts
2143
- import { randomUUID as randomUUID9 } from "crypto";
2145
+ import { randomUUID as randomUUID11 } from "crypto";
2144
2146
 
2145
2147
  // src/security/rules/path-traversal.ts
2146
2148
  import { randomUUID as randomUUID3 } from "crypto";
@@ -2693,6 +2695,217 @@ var InformationDisclosureRule = class {
2693
2695
  }
2694
2696
  };
2695
2697
 
2698
+ // src/security/rules/tool-poisoning.ts
2699
+ import { randomUUID as randomUUID9 } from "crypto";
2700
+ var SUSPICIOUS_PATTERNS = [
2701
+ /ignore\s+(all\s+)?previous/i,
2702
+ /do\s+not\s+tell\s+the\s+user/i,
2703
+ /override\s+instructions/i,
2704
+ /disregard\s+(all\s+)?previous/i,
2705
+ /pretend\s+(you\s+are|to\s+be)/i,
2706
+ /forget\s+everything/i,
2707
+ /ignore\s+your\s+(instructions|rules)/i,
2708
+ /you\s+must\s+not\s+(reveal|disclose|mention)/i,
2709
+ /system\s+prompt/i,
2710
+ /act\s+as\s+(if|though)/i
2711
+ ];
2712
+ var ZERO_WIDTH_CHARS = /[\u200B-\u200D\uFEFF]/;
2713
+ var BIDI_OVERRIDES = /[\u202A-\u202E\u2066-\u2069]/;
2714
+ var CROSS_TOOL_PATTERNS = [
2715
+ /when\s+using\s+\w+.*also\s+call/i,
2716
+ /before\s+calling\s+\w+.*first\s+run/i,
2717
+ /after\s+(using|calling)\s+\w+.*invoke/i,
2718
+ /always\s+call\s+\w+\s+(before|after|with)/i
2719
+ ];
2720
+ var EMBEDDED_CODE_PATTERNS = [
2721
+ /```[\s\S]*?```/,
2722
+ /<script[\s>]/i,
2723
+ /eval\s*\(/,
2724
+ /require\s*\(/,
2725
+ /import\s*\(/
2726
+ ];
2727
+ var MAX_DESCRIPTION_LENGTH = 1e3;
2728
+ var ToolPoisoningRule = class {
2729
+ id = "tool-poisoning";
2730
+ name = "Tool Poisoning";
2731
+ description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
2732
+ async scan(_client, tools, _config) {
2733
+ const findings = [];
2734
+ for (const tool of tools) {
2735
+ const desc = tool.description ?? "";
2736
+ for (const pattern of SUSPICIOUS_PATTERNS) {
2737
+ if (pattern.test(desc)) {
2738
+ findings.push({
2739
+ id: randomUUID9(),
2740
+ rule: this.id,
2741
+ severity: "high",
2742
+ title: `Suspicious instruction in tool "${tool.name}"`,
2743
+ description: `Tool description contains prompt injection pattern: ${pattern.source}`,
2744
+ evidence: desc.slice(0, 200),
2745
+ remediation: "Remove manipulative instructions from tool descriptions"
2746
+ });
2747
+ break;
2748
+ }
2749
+ }
2750
+ if (ZERO_WIDTH_CHARS.test(desc) || BIDI_OVERRIDES.test(desc)) {
2751
+ findings.push({
2752
+ id: randomUUID9(),
2753
+ rule: this.id,
2754
+ severity: "high",
2755
+ title: `Hidden Unicode characters in tool "${tool.name}"`,
2756
+ description: "Tool description contains zero-width or bidirectional override characters that can hide malicious content",
2757
+ evidence: `Description length: ${desc.length} characters`,
2758
+ remediation: "Remove invisible Unicode characters from tool descriptions"
2759
+ });
2760
+ }
2761
+ for (const pattern of CROSS_TOOL_PATTERNS) {
2762
+ if (pattern.test(desc)) {
2763
+ findings.push({
2764
+ id: randomUUID9(),
2765
+ rule: this.id,
2766
+ severity: "medium",
2767
+ title: `Cross-tool reference in tool "${tool.name}"`,
2768
+ description: "Tool description instructs the LLM to call other tools, which could be used to chain unauthorized actions",
2769
+ evidence: desc.slice(0, 200),
2770
+ remediation: "Remove cross-tool instructions from descriptions"
2771
+ });
2772
+ break;
2773
+ }
2774
+ }
2775
+ if (desc.length > MAX_DESCRIPTION_LENGTH) {
2776
+ findings.push({
2777
+ id: randomUUID9(),
2778
+ rule: this.id,
2779
+ severity: "low",
2780
+ title: `Overly long description for tool "${tool.name}"`,
2781
+ description: `Tool description is ${desc.length} characters (threshold: ${MAX_DESCRIPTION_LENGTH}). Long descriptions may hide malicious instructions`,
2782
+ remediation: "Keep tool descriptions concise and focused"
2783
+ });
2784
+ }
2785
+ for (const pattern of EMBEDDED_CODE_PATTERNS) {
2786
+ if (pattern.test(desc)) {
2787
+ findings.push({
2788
+ id: randomUUID9(),
2789
+ rule: this.id,
2790
+ severity: "medium",
2791
+ title: `Embedded code in tool "${tool.name}" description`,
2792
+ description: "Tool description contains code blocks or executable patterns",
2793
+ evidence: desc.slice(0, 200),
2794
+ remediation: "Remove code blocks from tool descriptions"
2795
+ });
2796
+ break;
2797
+ }
2798
+ }
2799
+ }
2800
+ return findings;
2801
+ }
2802
+ };
2803
+
2804
+ // src/security/rules/excessive-agency.ts
2805
+ import { randomUUID as randomUUID10 } from "crypto";
2806
+ var DESTRUCTIVE_TOOL_PATTERN = /delete|drop|destroy|remove|kill|purge|truncate|wipe|reset|erase|shutdown|terminate/i;
2807
+ var CONFIRMATION_PARAMS = ["confirmation", "dryrun", "dry_run", "confirm", "force"];
2808
+ var CODE_EXEC_PARAMS = ["code", "script", "command", "query", "sql", "eval", "shell", "exec", "expression", "cmd"];
2809
+ var ExcessiveAgencyRule = class {
2810
+ id = "excessive-agency";
2811
+ name = "Excessive Agency";
2812
+ description = "Detects tools with overly broad permissions or missing safety controls";
2813
+ async scan(_client, tools, _config) {
2814
+ const findings = [];
2815
+ for (const tool of tools) {
2816
+ if (DESTRUCTIVE_TOOL_PATTERN.test(tool.name)) {
2817
+ const params2 = this.getParamNames(tool);
2818
+ const hasConfirmation = params2.some((p) => CONFIRMATION_PARAMS.includes(p.toLowerCase()));
2819
+ if (!hasConfirmation) {
2820
+ findings.push({
2821
+ id: randomUUID10(),
2822
+ rule: this.id,
2823
+ severity: "medium",
2824
+ title: `Destructive tool "${tool.name}" lacks confirmation parameter`,
2825
+ description: "Tool with destructive capability does not require confirmation, dryRun, or force parameter",
2826
+ remediation: "Add a confirmation, dryRun, or force parameter to destructive tools"
2827
+ });
2828
+ }
2829
+ }
2830
+ const params = this.getParamNames(tool);
2831
+ for (const param of params) {
2832
+ if (CODE_EXEC_PARAMS.includes(param.toLowerCase())) {
2833
+ findings.push({
2834
+ id: randomUUID10(),
2835
+ rule: this.id,
2836
+ severity: "high",
2837
+ title: `Code execution parameter "${param}" in tool "${tool.name}"`,
2838
+ description: "Tool accepts arbitrary code or command input, which could enable unauthorized actions",
2839
+ remediation: "Use specific, constrained parameters instead of generic code/command inputs"
2840
+ });
2841
+ break;
2842
+ }
2843
+ }
2844
+ const schema = tool.inputSchema;
2845
+ if (schema && typeof schema === "object") {
2846
+ const props = schema.properties;
2847
+ const required = schema.required;
2848
+ if ((!props || Object.keys(props).length === 0) && (!required || required.length === 0)) {
2849
+ findings.push({
2850
+ id: randomUUID10(),
2851
+ rule: this.id,
2852
+ severity: "medium",
2853
+ title: `Overly broad schema for tool "${tool.name}"`,
2854
+ description: "Tool schema has no defined properties or required fields, accepting arbitrary input",
2855
+ remediation: "Define explicit input schema with typed properties and required fields"
2856
+ });
2857
+ }
2858
+ }
2859
+ if (!tool.description || tool.description.trim() === "") {
2860
+ findings.push({
2861
+ id: randomUUID10(),
2862
+ rule: this.id,
2863
+ severity: "low",
2864
+ title: `Missing description for tool "${tool.name}"`,
2865
+ description: "Tool lacks a description, making it difficult to understand its purpose and risks",
2866
+ remediation: "Add a clear, informative description to the tool"
2867
+ });
2868
+ }
2869
+ const paramDescs = this.getParamDescriptions(tool);
2870
+ if (paramDescs.total > 0) {
2871
+ const missingRatio = paramDescs.missing / paramDescs.total;
2872
+ if (missingRatio > 0.5) {
2873
+ findings.push({
2874
+ id: randomUUID10(),
2875
+ rule: this.id,
2876
+ severity: "low",
2877
+ title: `Missing parameter descriptions in tool "${tool.name}"`,
2878
+ description: `${paramDescs.missing} of ${paramDescs.total} parameters lack descriptions`,
2879
+ remediation: "Add descriptions to all parameters to clarify their purpose"
2880
+ });
2881
+ }
2882
+ }
2883
+ }
2884
+ return findings;
2885
+ }
2886
+ getParamNames(tool) {
2887
+ const schema = tool.inputSchema;
2888
+ if (!schema || typeof schema !== "object") return [];
2889
+ const props = schema.properties;
2890
+ if (!props) return [];
2891
+ return Object.keys(props);
2892
+ }
2893
+ getParamDescriptions(tool) {
2894
+ const schema = tool.inputSchema;
2895
+ if (!schema || typeof schema !== "object") return { total: 0, missing: 0 };
2896
+ const props = schema.properties;
2897
+ if (!props) return { total: 0, missing: 0 };
2898
+ const entries = Object.values(props);
2899
+ let missing = 0;
2900
+ for (const prop of entries) {
2901
+ if (!prop || typeof prop !== "object" || !prop.description) {
2902
+ missing++;
2903
+ }
2904
+ }
2905
+ return { total: entries.length, missing };
2906
+ }
2907
+ };
2908
+
2696
2909
  // src/security/security-scanner.ts
2697
2910
  var SEVERITY_ORDER2 = ["info", "low", "medium", "high", "critical"];
2698
2911
  var SecurityScanner = class {
@@ -2737,7 +2950,7 @@ var SecurityScanner = class {
2737
2950
  const skippedCount = allTools.length - tools.length;
2738
2951
  if (skippedCount > 0) {
2739
2952
  findings.push({
2740
- id: randomUUID9(),
2953
+ id: randomUUID11(),
2741
2954
  rule: "safety-filter",
2742
2955
  severity: "info",
2743
2956
  title: `${skippedCount} tool(s) excluded from scan`,
@@ -2757,7 +2970,7 @@ var SecurityScanner = class {
2757
2970
  progress?.onRuleComplete?.(rule.id, ruleFindings.length);
2758
2971
  } catch (err) {
2759
2972
  const errorFinding = {
2760
- id: randomUUID9(),
2973
+ id: randomUUID11(),
2761
2974
  rule: ruleId,
2762
2975
  severity: "info",
2763
2976
  title: `Rule "${ruleId}" failed to complete`,
@@ -2774,7 +2987,7 @@ var SecurityScanner = class {
2774
2987
  const completedAt = /* @__PURE__ */ new Date();
2775
2988
  const serverInfo = client.getServerInfo();
2776
2989
  return {
2777
- id: randomUUID9(),
2990
+ id: randomUUID11(),
2778
2991
  serverName: serverInfo?.name ?? "unknown",
2779
2992
  mode: config.mode,
2780
2993
  startedAt,
@@ -2809,6 +3022,8 @@ var SecurityScanner = class {
2809
3022
  this.registerRule(new AuthBypassRule());
2810
3023
  this.registerRule(new InjectionRule());
2811
3024
  this.registerRule(new InformationDisclosureRule());
3025
+ this.registerRule(new ToolPoisoningRule());
3026
+ this.registerRule(new ExcessiveAgencyRule());
2812
3027
  }
2813
3028
  };
2814
3029
 
@@ -3204,16 +3419,50 @@ var MCPScoreCalculator = class {
3204
3419
  if (tools.length === 0) return 0;
3205
3420
  let totalPoints = 0;
3206
3421
  for (const tool of tools) {
3207
- const schema = tool.inputSchema;
3208
- if (!schema) continue;
3209
- let toolPoints = 0;
3210
- if (schema.type) toolPoints += 1 / 3;
3211
- if (schema.properties && typeof schema.properties === "object") toolPoints += 1 / 3;
3212
- if (schema.required && Array.isArray(schema.required)) toolPoints += 1 / 3;
3213
- totalPoints += toolPoints;
3422
+ totalPoints += this.scoreToolSchema(tool);
3214
3423
  }
3215
3424
  return Math.round(totalPoints / tools.length * 100);
3216
3425
  }
3426
+ /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
3427
+ scoreToolSchema(tool) {
3428
+ const schema = tool.inputSchema;
3429
+ if (!schema) return 0;
3430
+ let score = 0;
3431
+ const hasType = !!schema.type;
3432
+ const properties = schema.properties;
3433
+ const hasProperties = properties && typeof properties === "object" && Object.keys(properties).length > 0;
3434
+ score += (hasType ? 0.1 : 0) + (hasProperties ? 0.1 : 0);
3435
+ if (!hasProperties || !properties) return score;
3436
+ const propEntries = Object.entries(properties);
3437
+ const withType = propEntries.filter(([, prop]) => !!prop.type).length;
3438
+ score += withType / propEntries.length * 0.2;
3439
+ const withDesc = propEntries.filter(([, prop]) => {
3440
+ const desc = prop.description;
3441
+ return typeof desc === "string" && desc.trim().length > 0;
3442
+ }).length;
3443
+ score += withDesc / propEntries.length * 0.2;
3444
+ const required = schema.required;
3445
+ if (Array.isArray(required) && required.length > 0) {
3446
+ score += 0.15;
3447
+ }
3448
+ const constraintKeys = ["enum", "pattern", "minimum", "maximum", "minLength", "maxLength", "minItems", "maxItems", "format", "default"];
3449
+ const withConstraints = propEntries.filter(([, prop]) => {
3450
+ if (constraintKeys.some((k) => prop[k] !== void 0)) return true;
3451
+ if (prop.type === "object" && prop.properties && typeof prop.properties === "object") {
3452
+ const nested = prop.properties;
3453
+ return Object.keys(nested).length > 0 && Object.values(nested).some((np) => !!np.type);
3454
+ }
3455
+ if (prop.type === "array" && prop.items && typeof prop.items === "object") return true;
3456
+ return false;
3457
+ }).length;
3458
+ score += withConstraints / propEntries.length * 0.15;
3459
+ const names = propEntries.map(([name]) => name);
3460
+ const camelCount = names.filter((n) => /^[a-z][a-zA-Z0-9]*$/.test(n)).length;
3461
+ const snakeCount = names.filter((n) => /^[a-z][a-z0-9_]*$/.test(n)).length;
3462
+ const bestConvention = Math.max(camelCount, snakeCount);
3463
+ score += bestConvention / names.length * 0.1;
3464
+ return score;
3465
+ }
3217
3466
  async scoreErrorHandling(client, tools) {
3218
3467
  if (tools.length === 0) return 0;
3219
3468
  const testTools = tools.slice(0, 5);
@@ -3320,6 +3569,145 @@ var BadgeGenerator = class {
3320
3569
  return "#e05d44";
3321
3570
  }
3322
3571
  };
3572
+
3573
+ // src/recording/recording-store.ts
3574
+ import { readFileSync as readFileSync2, writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, readdirSync as readdirSync2, existsSync as existsSync2, unlinkSync } from "fs";
3575
+ import { join as join4 } from "path";
3576
+ var RecordingStore = class {
3577
+ basePath;
3578
+ constructor(basePath) {
3579
+ this.basePath = basePath ?? join4(getPlatformInfo().dataDir, "recordings");
3580
+ }
3581
+ save(name, recording) {
3582
+ this.ensureDir();
3583
+ const filePath = this.getFilePath(name);
3584
+ writeFileSync3(filePath, JSON.stringify(recording, null, 2), "utf-8");
3585
+ return filePath;
3586
+ }
3587
+ load(name) {
3588
+ const filePath = this.getFilePath(name);
3589
+ if (!existsSync2(filePath)) return null;
3590
+ return JSON.parse(readFileSync2(filePath, "utf-8"));
3591
+ }
3592
+ list() {
3593
+ this.ensureDir();
3594
+ return readdirSync2(this.basePath).filter((f) => f.endsWith(".json")).map((f) => f.replace(/\.json$/, ""));
3595
+ }
3596
+ delete(name) {
3597
+ const filePath = this.getFilePath(name);
3598
+ if (!existsSync2(filePath)) return false;
3599
+ unlinkSync(filePath);
3600
+ return true;
3601
+ }
3602
+ getFilePath(name) {
3603
+ const safeName = name.replace(/[^a-zA-Z0-9_-]/g, "_");
3604
+ return join4(this.basePath, `${safeName}.json`);
3605
+ }
3606
+ ensureDir() {
3607
+ if (!existsSync2(this.basePath)) {
3608
+ mkdirSync3(this.basePath, { recursive: true });
3609
+ }
3610
+ }
3611
+ };
3612
+
3613
+ // src/recording/recording-replayer.ts
3614
+ var RecordingReplayer = class {
3615
+ async replay(recording, client, progress) {
3616
+ const replayedSteps = [];
3617
+ for (let i = 0; i < recording.steps.length; i++) {
3618
+ const step = recording.steps[i];
3619
+ progress?.onStepStart?.(i, step);
3620
+ const start = performance.now();
3621
+ let output = [];
3622
+ let isError = false;
3623
+ try {
3624
+ const result = await client.callTool(step.tool, step.input);
3625
+ output = result.content;
3626
+ isError = result.isError === true;
3627
+ } catch (err) {
3628
+ output = [{ type: "text", text: err instanceof Error ? err.message : String(err) }];
3629
+ isError = true;
3630
+ }
3631
+ const durationMs = Math.round(performance.now() - start);
3632
+ const replayed = {
3633
+ tool: step.tool,
3634
+ input: step.input,
3635
+ output,
3636
+ isError,
3637
+ durationMs
3638
+ };
3639
+ replayedSteps.push(replayed);
3640
+ progress?.onStepComplete?.(i, replayed);
3641
+ }
3642
+ return {
3643
+ originalRecording: recording,
3644
+ replayedSteps,
3645
+ replayedAt: (/* @__PURE__ */ new Date()).toISOString()
3646
+ };
3647
+ }
3648
+ };
3649
+
3650
+ // src/recording/recording-differ.ts
3651
+ var RecordingDiffer = class {
3652
+ diff(recording, replayedSteps, replayedAt) {
3653
+ const steps = [];
3654
+ const maxLen = Math.max(recording.steps.length, replayedSteps.length);
3655
+ for (let i = 0; i < maxLen; i++) {
3656
+ const original = recording.steps[i];
3657
+ const replayed = replayedSteps[i];
3658
+ if (original && replayed) {
3659
+ const outputMatch = JSON.stringify(original.output) === JSON.stringify(replayed.output);
3660
+ const errorMatch = (original.isError ?? false) === (replayed.isError ?? false);
3661
+ const isMatched = outputMatch && errorMatch;
3662
+ steps.push({
3663
+ index: i,
3664
+ tool: original.tool,
3665
+ type: isMatched ? "matched" : "changed",
3666
+ original,
3667
+ replayed,
3668
+ outputDiff: isMatched ? void 0 : this.describeChange(original, replayed)
3669
+ });
3670
+ } else if (original && !replayed) {
3671
+ steps.push({
3672
+ index: i,
3673
+ tool: original.tool,
3674
+ type: "removed",
3675
+ original
3676
+ });
3677
+ } else if (!original && replayed) {
3678
+ steps.push({
3679
+ index: i,
3680
+ tool: replayed.tool,
3681
+ type: "added",
3682
+ replayed
3683
+ });
3684
+ }
3685
+ }
3686
+ const summary = {
3687
+ matched: steps.filter((s) => s.type === "matched").length,
3688
+ changed: steps.filter((s) => s.type === "changed").length,
3689
+ added: steps.filter((s) => s.type === "added").length,
3690
+ removed: steps.filter((s) => s.type === "removed").length
3691
+ };
3692
+ return {
3693
+ recordingId: recording.id,
3694
+ recordingName: recording.name,
3695
+ replayedAt,
3696
+ steps,
3697
+ summary
3698
+ };
3699
+ }
3700
+ describeChange(original, replayed) {
3701
+ const parts = [];
3702
+ if ((original.isError ?? false) !== (replayed.isError ?? false)) {
3703
+ parts.push(`error state: ${original.isError ?? false} \u2192 ${replayed.isError ?? false}`);
3704
+ }
3705
+ if (JSON.stringify(original.output) !== JSON.stringify(replayed.output)) {
3706
+ parts.push("output content changed");
3707
+ }
3708
+ return parts.join("; ");
3709
+ }
3710
+ };
3323
3711
  export {
3324
3712
  AuthBypassRule,
3325
3713
  BadgeGenerator,
@@ -3331,6 +3719,7 @@ export {
3331
3719
  DocGenerator,
3332
3720
  ERROR_CODE_MAP,
3333
3721
  ERROR_TEMPLATES,
3722
+ ExcessiveAgencyRule,
3334
3723
  HtmlDocGenerator,
3335
3724
  HtmlReporter,
3336
3725
  InformationDisclosureRule,
@@ -3349,6 +3738,9 @@ export {
3349
3738
  ProcessRegistry,
3350
3739
  Profiler,
3351
3740
  RateLimiter,
3741
+ RecordingDiffer,
3742
+ RecordingReplayer,
3743
+ RecordingStore,
3352
3744
  ResourceExhaustionRule,
3353
3745
  ResultDiffer,
3354
3746
  ScanConfig,
@@ -3358,6 +3750,7 @@ export {
3358
3750
  TestExecutor,
3359
3751
  TestRunner,
3360
3752
  TestScheduler,
3753
+ ToolPoisoningRule,
3361
3754
  WaterfallGenerator,
3362
3755
  YAML_LIMITS,
3363
3756
  computeStats,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mcpspec/core",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -31,7 +31,7 @@
31
31
  "expr-eval": "^2.0.2",
32
32
  "handlebars": "^4.7.8",
33
33
  "zod": "^3.22.0",
34
- "@mcpspec/shared": "1.0.3"
34
+ "@mcpspec/shared": "1.1.0"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/js-yaml": "^4.0.9",