@mcpspec/core 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
88
88
 
89
89
  - `SecurityScanner` — Orchestrates security audits
90
90
  - `ScanConfig` — Safety controls and mode filtering
91
- - Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`
91
+ - Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`, `ToolPoisoningRule`, `ExcessiveAgencyRule`
92
92
  - `getSafePayloads`, `getPlatformPayloads`, `getPayloadsForMode` — Payload management
93
93
 
94
94
  ### Performance
@@ -105,9 +105,15 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
105
105
 
106
106
  ### Scoring
107
107
 
108
- - `MCPScoreCalculator` — 0–100 quality score across 5 categories
108
+ - `MCPScoreCalculator` — 0–100 quality score across 5 categories; schema quality uses opinionated linting (property types, descriptions, constraints, naming conventions)
109
109
  - `BadgeGenerator` — shields.io-style SVG badges
110
110
 
111
+ ### Recording & Replay
112
+
113
+ - `RecordingStore` — Save, load, list, and delete session recordings
114
+ - `RecordingReplayer` — Replay recorded steps against a live server
115
+ - `RecordingDiffer` — Diff original recording vs replayed results (matched/changed/added/removed)
116
+
111
117
  ### Utilities
112
118
 
113
119
  - `loadYamlSafely` — FAILSAFE_SCHEMA YAML parsing
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as _mcpspec_shared from '@mcpspec/shared';
2
- import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore } from '@mcpspec/shared';
2
+ import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore, Recording, RecordingStep, RecordingDiff } from '@mcpspec/shared';
3
3
  import { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
4
4
  import { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
5
5
 
@@ -352,6 +352,7 @@ declare class ResultDiffer {
352
352
  diff(baseline: TestRunResult, current: TestRunResult, baselineName?: string): RunDiff;
353
353
  }
354
354
 
355
+ declare const DANGEROUS_TOOL_PATTERNS: RegExp;
355
356
  declare class ScanConfig {
356
357
  readonly mode: SecurityScanMode;
357
358
  readonly rules: string[];
@@ -359,9 +360,12 @@ declare class ScanConfig {
359
360
  readonly acknowledgeRisk: boolean;
360
361
  readonly timeout: number;
361
362
  readonly maxProbesPerTool: number;
363
+ readonly excludeTools: string[];
364
+ readonly dryRun: boolean;
362
365
  constructor(config?: Partial<SecurityScanConfig>);
363
366
  requiresConfirmation(): boolean;
364
367
  meetsThreshold(severity: SeverityLevel): boolean;
368
+ isToolExcluded(toolName: string): boolean;
365
369
  private getRulesForMode;
366
370
  }
367
371
 
@@ -377,10 +381,27 @@ interface ScanProgress {
377
381
  onRuleComplete?: (ruleId: string, findingCount: number) => void;
378
382
  onFinding?: (finding: SecurityFinding) => void;
379
383
  }
384
+ interface DryRunResult {
385
+ tools: Array<{
386
+ name: string;
387
+ included: boolean;
388
+ reason?: string;
389
+ }>;
390
+ rules: string[];
391
+ mode: string;
392
+ }
380
393
  declare class SecurityScanner {
381
394
  private readonly rules;
382
395
  constructor();
383
396
  registerRule(rule: SecurityRule): void;
397
+ /**
398
+ * Preview which tools will be scanned without actually running payloads.
399
+ */
400
+ dryRun(client: MCPClientInterface, config: ScanConfig): Promise<DryRunResult>;
401
+ /**
402
+ * Filter tools based on config exclusions.
403
+ */
404
+ filterTools(tools: ToolInfo[], config: ScanConfig): ToolInfo[];
384
405
  scan(client: MCPClientInterface, config: ScanConfig, progress?: ScanProgress): Promise<SecurityScanResult>;
385
406
  private buildSummary;
386
407
  private registerBuiltinRules;
@@ -436,6 +457,22 @@ declare class InformationDisclosureRule implements SecurityRule {
436
457
  private getFirstParam;
437
458
  }
438
459
 
460
+ declare class ToolPoisoningRule implements SecurityRule {
461
+ readonly id = "tool-poisoning";
462
+ readonly name = "Tool Poisoning";
463
+ readonly description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
464
+ scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
465
+ }
466
+
467
+ declare class ExcessiveAgencyRule implements SecurityRule {
468
+ readonly id = "excessive-agency";
469
+ readonly name = "Excessive Agency";
470
+ readonly description = "Detects tools with overly broad permissions or missing safety controls";
471
+ scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
472
+ private getParamNames;
473
+ private getParamDescriptions;
474
+ }
475
+
439
476
  interface PayloadSet {
440
477
  category: string;
441
478
  label: string;
@@ -513,8 +550,10 @@ declare class MCPScoreCalculator {
513
550
  calculate(client: MCPClientInterface, progress?: ScoreProgress): Promise<MCPScore>;
514
551
  private scoreDocumentation;
515
552
  private scoreSchemaQuality;
553
+ /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
554
+ private scoreToolSchema;
516
555
  private scoreErrorHandling;
517
- private scorePerformance;
556
+ private scoreResponsiveness;
518
557
  private scoreSecurity;
519
558
  }
520
559
 
@@ -523,4 +562,33 @@ declare class BadgeGenerator {
523
562
  getColor(score: number): string;
524
563
  }
525
564
 
526
- export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DocGenerator, type DocGeneratorOptions, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
565
+ declare class RecordingStore {
566
+ private basePath;
567
+ constructor(basePath?: string);
568
+ save(name: string, recording: Recording): string;
569
+ load(name: string): Recording | null;
570
+ list(): string[];
571
+ delete(name: string): boolean;
572
+ private getFilePath;
573
+ private ensureDir;
574
+ }
575
+
576
+ interface ReplayProgress {
577
+ onStepStart?: (index: number, step: RecordingStep) => void;
578
+ onStepComplete?: (index: number, replayed: RecordingStep) => void;
579
+ }
580
+ interface ReplayResult {
581
+ originalRecording: Recording;
582
+ replayedSteps: RecordingStep[];
583
+ replayedAt: string;
584
+ }
585
+ declare class RecordingReplayer {
586
+ replay(recording: Recording, client: MCPClientInterface, progress?: ReplayProgress): Promise<ReplayResult>;
587
+ }
588
+
589
+ declare class RecordingDiffer {
590
+ diff(recording: Recording, replayedSteps: RecordingStep[], replayedAt: string): RecordingDiff;
591
+ private describeChange;
592
+ }
593
+
594
+ export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DANGEROUS_TOOL_PATTERNS, DocGenerator, type DocGeneratorOptions, type DryRunResult, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, ExcessiveAgencyRule, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, RecordingDiffer, RecordingReplayer, RecordingStore, type ReplayProgress, type ReplayResult, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, ToolPoisoningRule, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
package/dist/index.js CHANGED
@@ -1230,7 +1230,7 @@ var TestExecutor = class {
1230
1230
  assertions: assertionResults
1231
1231
  };
1232
1232
  }
1233
- const response = this.buildResponse(result);
1233
+ const response = this.buildResponse(result, test.rawResponse);
1234
1234
  if (test.assertions) {
1235
1235
  for (const assertion of test.assertions) {
1236
1236
  assertionResults.push(this.runAssertion(assertion, response, Date.now() - startTime));
@@ -1273,7 +1273,7 @@ var TestExecutor = class {
1273
1273
  };
1274
1274
  }
1275
1275
  }
1276
- buildResponse(result) {
1276
+ buildResponse(result, rawResponse) {
1277
1277
  const contents = result.content;
1278
1278
  if (!Array.isArray(contents) || contents.length === 0) {
1279
1279
  return {};
@@ -1281,6 +1281,9 @@ var TestExecutor = class {
1281
1281
  if (contents.length === 1) {
1282
1282
  const item = contents[0];
1283
1283
  if (item["type"] === "text" && typeof item["text"] === "string") {
1284
+ if (rawResponse) {
1285
+ return { content: item["text"], text: item["text"] };
1286
+ }
1284
1287
  try {
1285
1288
  return JSON.parse(item["text"]);
1286
1289
  } catch {
@@ -1420,17 +1423,16 @@ var TestScheduler = class {
1420
1423
  return skippedResults;
1421
1424
  }
1422
1425
  if (parallelism <= 1) {
1423
- const executor2 = new TestExecutor(initialVariables, rateLimiter);
1426
+ const executor = new TestExecutor(initialVariables, rateLimiter);
1424
1427
  const results2 = [];
1425
1428
  for (const test of filteredTests) {
1426
1429
  reporter?.onTestStart(test.name);
1427
- const result = await executor2.execute(test, client);
1430
+ const result = await executor.execute(test, client);
1428
1431
  results2.push(result);
1429
1432
  reporter?.onTestComplete(result);
1430
1433
  }
1431
1434
  return [...results2, ...skippedResults];
1432
1435
  }
1433
- const executor = new TestExecutor(initialVariables, rateLimiter);
1434
1436
  let running = 0;
1435
1437
  const results = new Array(filteredTests.length);
1436
1438
  const waitQueue = [];
@@ -1455,6 +1457,7 @@ var TestScheduler = class {
1455
1457
  return (async () => {
1456
1458
  await acquire();
1457
1459
  try {
1460
+ const executor = new TestExecutor(initialVariables, rateLimiter);
1458
1461
  reporter?.onTestStart(test.name);
1459
1462
  const result = await executor.execute(test, client);
1460
1463
  results[i] = result;
@@ -2075,7 +2078,9 @@ var SEVERITY_ORDER = ["info", "low", "medium", "high", "critical"];
2075
2078
  var PASSIVE_RULES = [
2076
2079
  "path-traversal",
2077
2080
  "input-validation",
2078
- "information-disclosure"
2081
+ "information-disclosure",
2082
+ "tool-poisoning",
2083
+ "excessive-agency"
2079
2084
  ];
2080
2085
  var ACTIVE_RULES = [
2081
2086
  ...PASSIVE_RULES,
@@ -2086,6 +2091,7 @@ var ACTIVE_RULES = [
2086
2091
  var AGGRESSIVE_RULES = [...ACTIVE_RULES];
2087
2092
  var DEFAULT_TIMEOUT = 1e4;
2088
2093
  var DEFAULT_MAX_PROBES = 50;
2094
+ var DANGEROUS_TOOL_PATTERNS = /^(delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)[_-]|[_-](delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)$/i;
2089
2095
  var ScanConfig = class {
2090
2096
  mode;
2091
2097
  rules;
@@ -2093,12 +2099,16 @@ var ScanConfig = class {
2093
2099
  acknowledgeRisk;
2094
2100
  timeout;
2095
2101
  maxProbesPerTool;
2102
+ excludeTools;
2103
+ dryRun;
2096
2104
  constructor(config = {}) {
2097
2105
  this.mode = config.mode ?? "passive";
2098
2106
  this.severityThreshold = config.severityThreshold ?? "info";
2099
2107
  this.acknowledgeRisk = config.acknowledgeRisk ?? false;
2100
2108
  this.timeout = config.timeout ?? DEFAULT_TIMEOUT;
2101
2109
  this.maxProbesPerTool = config.maxProbesPerTool ?? DEFAULT_MAX_PROBES;
2110
+ this.excludeTools = config.excludeTools ?? [];
2111
+ this.dryRun = config.dryRun ?? false;
2102
2112
  const allRulesForMode = this.getRulesForMode(this.mode);
2103
2113
  if (config.rules && config.rules.length > 0) {
2104
2114
  this.rules = config.rules.filter((r) => allRulesForMode.includes(r));
@@ -2114,6 +2124,11 @@ var ScanConfig = class {
2114
2124
  const severityIdx = SEVERITY_ORDER.indexOf(severity);
2115
2125
  return severityIdx >= thresholdIdx;
2116
2126
  }
2127
+ isToolExcluded(toolName) {
2128
+ if (this.excludeTools.includes(toolName)) return true;
2129
+ if (this.mode !== "passive" && DANGEROUS_TOOL_PATTERNS.test(toolName)) return true;
2130
+ return false;
2131
+ }
2117
2132
  getRulesForMode(mode) {
2118
2133
  switch (mode) {
2119
2134
  case "passive":
@@ -2127,7 +2142,7 @@ var ScanConfig = class {
2127
2142
  };
2128
2143
 
2129
2144
  // src/security/security-scanner.ts
2130
- import { randomUUID as randomUUID9 } from "crypto";
2145
+ import { randomUUID as randomUUID11 } from "crypto";
2131
2146
 
2132
2147
  // src/security/rules/path-traversal.ts
2133
2148
  import { randomUUID as randomUUID3 } from "crypto";
@@ -2680,6 +2695,217 @@ var InformationDisclosureRule = class {
2680
2695
  }
2681
2696
  };
2682
2697
 
2698
+ // src/security/rules/tool-poisoning.ts
2699
+ import { randomUUID as randomUUID9 } from "crypto";
2700
+ var SUSPICIOUS_PATTERNS = [
2701
+ /ignore\s+(all\s+)?previous/i,
2702
+ /do\s+not\s+tell\s+the\s+user/i,
2703
+ /override\s+instructions/i,
2704
+ /disregard\s+(all\s+)?previous/i,
2705
+ /pretend\s+(you\s+are|to\s+be)/i,
2706
+ /forget\s+everything/i,
2707
+ /ignore\s+your\s+(instructions|rules)/i,
2708
+ /you\s+must\s+not\s+(reveal|disclose|mention)/i,
2709
+ /system\s+prompt/i,
2710
+ /act\s+as\s+(if|though)/i
2711
+ ];
2712
+ var ZERO_WIDTH_CHARS = /[\u200B-\u200D\uFEFF]/;
2713
+ var BIDI_OVERRIDES = /[\u202A-\u202E\u2066-\u2069]/;
2714
+ var CROSS_TOOL_PATTERNS = [
2715
+ /when\s+using\s+\w+.*also\s+call/i,
2716
+ /before\s+calling\s+\w+.*first\s+run/i,
2717
+ /after\s+(using|calling)\s+\w+.*invoke/i,
2718
+ /always\s+call\s+\w+\s+(before|after|with)/i
2719
+ ];
2720
+ var EMBEDDED_CODE_PATTERNS = [
2721
+ /```[\s\S]*?```/,
2722
+ /<script[\s>]/i,
2723
+ /eval\s*\(/,
2724
+ /require\s*\(/,
2725
+ /import\s*\(/
2726
+ ];
2727
+ var MAX_DESCRIPTION_LENGTH = 1e3;
2728
+ var ToolPoisoningRule = class {
2729
+ id = "tool-poisoning";
2730
+ name = "Tool Poisoning";
2731
+ description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
2732
+ async scan(_client, tools, _config) {
2733
+ const findings = [];
2734
+ for (const tool of tools) {
2735
+ const desc = tool.description ?? "";
2736
+ for (const pattern of SUSPICIOUS_PATTERNS) {
2737
+ if (pattern.test(desc)) {
2738
+ findings.push({
2739
+ id: randomUUID9(),
2740
+ rule: this.id,
2741
+ severity: "high",
2742
+ title: `Suspicious instruction in tool "${tool.name}"`,
2743
+ description: `Tool description contains prompt injection pattern: ${pattern.source}`,
2744
+ evidence: desc.slice(0, 200),
2745
+ remediation: "Remove manipulative instructions from tool descriptions"
2746
+ });
2747
+ break;
2748
+ }
2749
+ }
2750
+ if (ZERO_WIDTH_CHARS.test(desc) || BIDI_OVERRIDES.test(desc)) {
2751
+ findings.push({
2752
+ id: randomUUID9(),
2753
+ rule: this.id,
2754
+ severity: "high",
2755
+ title: `Hidden Unicode characters in tool "${tool.name}"`,
2756
+ description: "Tool description contains zero-width or bidirectional override characters that can hide malicious content",
2757
+ evidence: `Description length: ${desc.length} characters`,
2758
+ remediation: "Remove invisible Unicode characters from tool descriptions"
2759
+ });
2760
+ }
2761
+ for (const pattern of CROSS_TOOL_PATTERNS) {
2762
+ if (pattern.test(desc)) {
2763
+ findings.push({
2764
+ id: randomUUID9(),
2765
+ rule: this.id,
2766
+ severity: "medium",
2767
+ title: `Cross-tool reference in tool "${tool.name}"`,
2768
+ description: "Tool description instructs the LLM to call other tools, which could be used to chain unauthorized actions",
2769
+ evidence: desc.slice(0, 200),
2770
+ remediation: "Remove cross-tool instructions from descriptions"
2771
+ });
2772
+ break;
2773
+ }
2774
+ }
2775
+ if (desc.length > MAX_DESCRIPTION_LENGTH) {
2776
+ findings.push({
2777
+ id: randomUUID9(),
2778
+ rule: this.id,
2779
+ severity: "low",
2780
+ title: `Overly long description for tool "${tool.name}"`,
2781
+ description: `Tool description is ${desc.length} characters (threshold: ${MAX_DESCRIPTION_LENGTH}). Long descriptions may hide malicious instructions`,
2782
+ remediation: "Keep tool descriptions concise and focused"
2783
+ });
2784
+ }
2785
+ for (const pattern of EMBEDDED_CODE_PATTERNS) {
2786
+ if (pattern.test(desc)) {
2787
+ findings.push({
2788
+ id: randomUUID9(),
2789
+ rule: this.id,
2790
+ severity: "medium",
2791
+ title: `Embedded code in tool "${tool.name}" description`,
2792
+ description: "Tool description contains code blocks or executable patterns",
2793
+ evidence: desc.slice(0, 200),
2794
+ remediation: "Remove code blocks from tool descriptions"
2795
+ });
2796
+ break;
2797
+ }
2798
+ }
2799
+ }
2800
+ return findings;
2801
+ }
2802
+ };
2803
+
2804
+ // src/security/rules/excessive-agency.ts
2805
+ import { randomUUID as randomUUID10 } from "crypto";
2806
+ var DESTRUCTIVE_TOOL_PATTERN = /delete|drop|destroy|remove|kill|purge|truncate|wipe|reset|erase|shutdown|terminate/i;
2807
+ var CONFIRMATION_PARAMS = ["confirmation", "dryrun", "dry_run", "confirm", "force"];
2808
+ var CODE_EXEC_PARAMS = ["code", "script", "command", "query", "sql", "eval", "shell", "exec", "expression", "cmd"];
2809
+ var ExcessiveAgencyRule = class {
2810
+ id = "excessive-agency";
2811
+ name = "Excessive Agency";
2812
+ description = "Detects tools with overly broad permissions or missing safety controls";
2813
+ async scan(_client, tools, _config) {
2814
+ const findings = [];
2815
+ for (const tool of tools) {
2816
+ if (DESTRUCTIVE_TOOL_PATTERN.test(tool.name)) {
2817
+ const params2 = this.getParamNames(tool);
2818
+ const hasConfirmation = params2.some((p) => CONFIRMATION_PARAMS.includes(p.toLowerCase()));
2819
+ if (!hasConfirmation) {
2820
+ findings.push({
2821
+ id: randomUUID10(),
2822
+ rule: this.id,
2823
+ severity: "medium",
2824
+ title: `Destructive tool "${tool.name}" lacks confirmation parameter`,
2825
+ description: "Tool with destructive capability does not require confirmation, dryRun, or force parameter",
2826
+ remediation: "Add a confirmation, dryRun, or force parameter to destructive tools"
2827
+ });
2828
+ }
2829
+ }
2830
+ const params = this.getParamNames(tool);
2831
+ for (const param of params) {
2832
+ if (CODE_EXEC_PARAMS.includes(param.toLowerCase())) {
2833
+ findings.push({
2834
+ id: randomUUID10(),
2835
+ rule: this.id,
2836
+ severity: "high",
2837
+ title: `Code execution parameter "${param}" in tool "${tool.name}"`,
2838
+ description: "Tool accepts arbitrary code or command input, which could enable unauthorized actions",
2839
+ remediation: "Use specific, constrained parameters instead of generic code/command inputs"
2840
+ });
2841
+ break;
2842
+ }
2843
+ }
2844
+ const schema = tool.inputSchema;
2845
+ if (schema && typeof schema === "object") {
2846
+ const props = schema.properties;
2847
+ const required = schema.required;
2848
+ if ((!props || Object.keys(props).length === 0) && (!required || required.length === 0)) {
2849
+ findings.push({
2850
+ id: randomUUID10(),
2851
+ rule: this.id,
2852
+ severity: "medium",
2853
+ title: `Overly broad schema for tool "${tool.name}"`,
2854
+ description: "Tool schema has no defined properties or required fields, accepting arbitrary input",
2855
+ remediation: "Define explicit input schema with typed properties and required fields"
2856
+ });
2857
+ }
2858
+ }
2859
+ if (!tool.description || tool.description.trim() === "") {
2860
+ findings.push({
2861
+ id: randomUUID10(),
2862
+ rule: this.id,
2863
+ severity: "low",
2864
+ title: `Missing description for tool "${tool.name}"`,
2865
+ description: "Tool lacks a description, making it difficult to understand its purpose and risks",
2866
+ remediation: "Add a clear, informative description to the tool"
2867
+ });
2868
+ }
2869
+ const paramDescs = this.getParamDescriptions(tool);
2870
+ if (paramDescs.total > 0) {
2871
+ const missingRatio = paramDescs.missing / paramDescs.total;
2872
+ if (missingRatio > 0.5) {
2873
+ findings.push({
2874
+ id: randomUUID10(),
2875
+ rule: this.id,
2876
+ severity: "low",
2877
+ title: `Missing parameter descriptions in tool "${tool.name}"`,
2878
+ description: `${paramDescs.missing} of ${paramDescs.total} parameters lack descriptions`,
2879
+ remediation: "Add descriptions to all parameters to clarify their purpose"
2880
+ });
2881
+ }
2882
+ }
2883
+ }
2884
+ return findings;
2885
+ }
2886
+ getParamNames(tool) {
2887
+ const schema = tool.inputSchema;
2888
+ if (!schema || typeof schema !== "object") return [];
2889
+ const props = schema.properties;
2890
+ if (!props) return [];
2891
+ return Object.keys(props);
2892
+ }
2893
+ getParamDescriptions(tool) {
2894
+ const schema = tool.inputSchema;
2895
+ if (!schema || typeof schema !== "object") return { total: 0, missing: 0 };
2896
+ const props = schema.properties;
2897
+ if (!props) return { total: 0, missing: 0 };
2898
+ const entries = Object.values(props);
2899
+ let missing = 0;
2900
+ for (const prop of entries) {
2901
+ if (!prop || typeof prop !== "object" || !prop.description) {
2902
+ missing++;
2903
+ }
2904
+ }
2905
+ return { total: entries.length, missing };
2906
+ }
2907
+ };
2908
+
2683
2909
  // src/security/security-scanner.ts
2684
2910
  var SEVERITY_ORDER2 = ["info", "low", "medium", "high", "critical"];
2685
2911
  var SecurityScanner = class {
@@ -2690,10 +2916,47 @@ var SecurityScanner = class {
2690
2916
  registerRule(rule) {
2691
2917
  this.rules.set(rule.id, rule);
2692
2918
  }
2919
+ /**
2920
+ * Preview which tools will be scanned without actually running payloads.
2921
+ */
2922
+ async dryRun(client, config) {
2923
+ const allTools = await client.listTools();
2924
+ const toolResults = allTools.map((tool) => {
2925
+ if (config.excludeTools.includes(tool.name)) {
2926
+ return { name: tool.name, included: false, reason: "excluded by --exclude-tools" };
2927
+ }
2928
+ if (config.isToolExcluded(tool.name)) {
2929
+ return { name: tool.name, included: false, reason: "auto-skipped (destructive name)" };
2930
+ }
2931
+ return { name: tool.name, included: true };
2932
+ });
2933
+ return {
2934
+ tools: toolResults,
2935
+ rules: [...config.rules],
2936
+ mode: config.mode
2937
+ };
2938
+ }
2939
+ /**
2940
+ * Filter tools based on config exclusions.
2941
+ */
2942
+ filterTools(tools, config) {
2943
+ return tools.filter((tool) => !config.isToolExcluded(tool.name));
2944
+ }
2693
2945
  async scan(client, config, progress) {
2694
2946
  const startedAt = /* @__PURE__ */ new Date();
2695
2947
  const findings = [];
2696
- const tools = await client.listTools();
2948
+ const allTools = await client.listTools();
2949
+ const tools = this.filterTools(allTools, config);
2950
+ const skippedCount = allTools.length - tools.length;
2951
+ if (skippedCount > 0) {
2952
+ findings.push({
2953
+ id: randomUUID11(),
2954
+ rule: "safety-filter",
2955
+ severity: "info",
2956
+ title: `${skippedCount} tool(s) excluded from scan`,
2957
+ description: `${skippedCount} tool(s) were excluded from scanning due to safety filters or --exclude-tools.`
2958
+ });
2959
+ }
2697
2960
  for (const ruleId of config.rules) {
2698
2961
  const rule = this.rules.get(ruleId);
2699
2962
  if (!rule) continue;
@@ -2707,7 +2970,7 @@ var SecurityScanner = class {
2707
2970
  progress?.onRuleComplete?.(rule.id, ruleFindings.length);
2708
2971
  } catch (err) {
2709
2972
  const errorFinding = {
2710
- id: randomUUID9(),
2973
+ id: randomUUID11(),
2711
2974
  rule: ruleId,
2712
2975
  severity: "info",
2713
2976
  title: `Rule "${ruleId}" failed to complete`,
@@ -2724,7 +2987,7 @@ var SecurityScanner = class {
2724
2987
  const completedAt = /* @__PURE__ */ new Date();
2725
2988
  const serverInfo = client.getServerInfo();
2726
2989
  return {
2727
- id: randomUUID9(),
2990
+ id: randomUUID11(),
2728
2991
  serverName: serverInfo?.name ?? "unknown",
2729
2992
  mode: config.mode,
2730
2993
  startedAt,
@@ -2759,6 +3022,8 @@ var SecurityScanner = class {
2759
3022
  this.registerRule(new AuthBypassRule());
2760
3023
  this.registerRule(new InjectionRule());
2761
3024
  this.registerRule(new InformationDisclosureRule());
3025
+ this.registerRule(new ToolPoisoningRule());
3026
+ this.registerRule(new ExcessiveAgencyRule());
2762
3027
  }
2763
3028
  };
2764
3029
 
@@ -3121,14 +3386,14 @@ var MCPScoreCalculator = class {
3121
3386
  progress?.onCategoryStart?.("errorHandling");
3122
3387
  const errorHandling = await this.scoreErrorHandling(client, tools);
3123
3388
  progress?.onCategoryComplete?.("errorHandling", errorHandling);
3124
- progress?.onCategoryStart?.("performance");
3125
- const performance4 = await this.scorePerformance(client, tools);
3126
- progress?.onCategoryComplete?.("performance", performance4);
3389
+ progress?.onCategoryStart?.("responsiveness");
3390
+ const responsiveness = await this.scoreResponsiveness(client, tools);
3391
+ progress?.onCategoryComplete?.("responsiveness", responsiveness);
3127
3392
  progress?.onCategoryStart?.("security");
3128
3393
  const security = await this.scoreSecurity(client);
3129
3394
  progress?.onCategoryComplete?.("security", security);
3130
3395
  const overall = Math.round(
3131
- documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 + performance4 * 0.15 + security * 0.15
3396
+ documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 + responsiveness * 0.15 + security * 0.15
3132
3397
  );
3133
3398
  return {
3134
3399
  overall,
@@ -3136,7 +3401,7 @@ var MCPScoreCalculator = class {
3136
3401
  documentation,
3137
3402
  schemaQuality,
3138
3403
  errorHandling,
3139
- performance: performance4,
3404
+ responsiveness,
3140
3405
  security
3141
3406
  }
3142
3407
  };
@@ -3154,16 +3419,50 @@ var MCPScoreCalculator = class {
3154
3419
  if (tools.length === 0) return 0;
3155
3420
  let totalPoints = 0;
3156
3421
  for (const tool of tools) {
3157
- const schema = tool.inputSchema;
3158
- if (!schema) continue;
3159
- let toolPoints = 0;
3160
- if (schema.type) toolPoints += 1 / 3;
3161
- if (schema.properties && typeof schema.properties === "object") toolPoints += 1 / 3;
3162
- if (schema.required && Array.isArray(schema.required)) toolPoints += 1 / 3;
3163
- totalPoints += toolPoints;
3422
+ totalPoints += this.scoreToolSchema(tool);
3164
3423
  }
3165
3424
  return Math.round(totalPoints / tools.length * 100);
3166
3425
  }
3426
+ /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
3427
+ scoreToolSchema(tool) {
3428
+ const schema = tool.inputSchema;
3429
+ if (!schema) return 0;
3430
+ let score = 0;
3431
+ const hasType = !!schema.type;
3432
+ const properties = schema.properties;
3433
+ const hasProperties = properties && typeof properties === "object" && Object.keys(properties).length > 0;
3434
+ score += (hasType ? 0.1 : 0) + (hasProperties ? 0.1 : 0);
3435
+ if (!hasProperties || !properties) return score;
3436
+ const propEntries = Object.entries(properties);
3437
+ const withType = propEntries.filter(([, prop]) => !!prop.type).length;
3438
+ score += withType / propEntries.length * 0.2;
3439
+ const withDesc = propEntries.filter(([, prop]) => {
3440
+ const desc = prop.description;
3441
+ return typeof desc === "string" && desc.trim().length > 0;
3442
+ }).length;
3443
+ score += withDesc / propEntries.length * 0.2;
3444
+ const required = schema.required;
3445
+ if (Array.isArray(required) && required.length > 0) {
3446
+ score += 0.15;
3447
+ }
3448
+ const constraintKeys = ["enum", "pattern", "minimum", "maximum", "minLength", "maxLength", "minItems", "maxItems", "format", "default"];
3449
+ const withConstraints = propEntries.filter(([, prop]) => {
3450
+ if (constraintKeys.some((k) => prop[k] !== void 0)) return true;
3451
+ if (prop.type === "object" && prop.properties && typeof prop.properties === "object") {
3452
+ const nested = prop.properties;
3453
+ return Object.keys(nested).length > 0 && Object.values(nested).some((np) => !!np.type);
3454
+ }
3455
+ if (prop.type === "array" && prop.items && typeof prop.items === "object") return true;
3456
+ return false;
3457
+ }).length;
3458
+ score += withConstraints / propEntries.length * 0.15;
3459
+ const names = propEntries.map(([name]) => name);
3460
+ const camelCount = names.filter((n) => /^[a-z][a-zA-Z0-9]*$/.test(n)).length;
3461
+ const snakeCount = names.filter((n) => /^[a-z][a-z0-9_]*$/.test(n)).length;
3462
+ const bestConvention = Math.max(camelCount, snakeCount);
3463
+ score += bestConvention / names.length * 0.1;
3464
+ return score;
3465
+ }
3167
3466
  async scoreErrorHandling(client, tools) {
3168
3467
  if (tools.length === 0) return 0;
3169
3468
  const testTools = tools.slice(0, 5);
@@ -3172,9 +3471,26 @@ var MCPScoreCalculator = class {
3172
3471
  try {
3173
3472
  const result = await client.callTool(tool.name, {});
3174
3473
  if (result.isError) {
3175
- totalScore += 100;
3474
+ const content = result.content;
3475
+ let isStructured = false;
3476
+ if (Array.isArray(content) && content.length > 0) {
3477
+ isStructured = content.some((c) => {
3478
+ const item = c;
3479
+ const text = item["text"];
3480
+ if (typeof text !== "string") return false;
3481
+ try {
3482
+ const parsed = JSON.parse(text);
3483
+ return typeof parsed === "object" && parsed !== null && ("code" in parsed || "message" in parsed || "error" in parsed);
3484
+ } catch {
3485
+ return false;
3486
+ }
3487
+ });
3488
+ }
3489
+ totalScore += isStructured ? 100 : 80;
3176
3490
  } else {
3177
- totalScore += 50;
3491
+ const schema = tool.inputSchema;
3492
+ const hasRequired = schema && Array.isArray(schema.required) && schema.required.length > 0;
3493
+ totalScore += hasRequired ? 30 : 50;
3178
3494
  }
3179
3495
  } catch {
3180
3496
  totalScore += 0;
@@ -3182,7 +3498,7 @@ var MCPScoreCalculator = class {
3182
3498
  }
3183
3499
  return Math.round(totalScore / testTools.length);
3184
3500
  }
3185
- async scorePerformance(client, tools) {
3501
+ async scoreResponsiveness(client, tools) {
3186
3502
  if (tools.length === 0) return 20;
3187
3503
  const tool = tools[0];
3188
3504
  const latencies = [];
@@ -3253,6 +3569,145 @@ var BadgeGenerator = class {
3253
3569
  return "#e05d44";
3254
3570
  }
3255
3571
  };
3572
+
3573
+ // src/recording/recording-store.ts
3574
+ import { readFileSync as readFileSync2, writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, readdirSync as readdirSync2, existsSync as existsSync2, unlinkSync } from "fs";
3575
+ import { join as join4 } from "path";
3576
+ var RecordingStore = class {
3577
+ basePath;
3578
+ constructor(basePath) {
3579
+ this.basePath = basePath ?? join4(getPlatformInfo().dataDir, "recordings");
3580
+ }
3581
+ save(name, recording) {
3582
+ this.ensureDir();
3583
+ const filePath = this.getFilePath(name);
3584
+ writeFileSync3(filePath, JSON.stringify(recording, null, 2), "utf-8");
3585
+ return filePath;
3586
+ }
3587
+ load(name) {
3588
+ const filePath = this.getFilePath(name);
3589
+ if (!existsSync2(filePath)) return null;
3590
+ return JSON.parse(readFileSync2(filePath, "utf-8"));
3591
+ }
3592
+ list() {
3593
+ this.ensureDir();
3594
+ return readdirSync2(this.basePath).filter((f) => f.endsWith(".json")).map((f) => f.replace(/\.json$/, ""));
3595
+ }
3596
+ delete(name) {
3597
+ const filePath = this.getFilePath(name);
3598
+ if (!existsSync2(filePath)) return false;
3599
+ unlinkSync(filePath);
3600
+ return true;
3601
+ }
3602
+ getFilePath(name) {
3603
+ const safeName = name.replace(/[^a-zA-Z0-9_-]/g, "_");
3604
+ return join4(this.basePath, `${safeName}.json`);
3605
+ }
3606
+ ensureDir() {
3607
+ if (!existsSync2(this.basePath)) {
3608
+ mkdirSync3(this.basePath, { recursive: true });
3609
+ }
3610
+ }
3611
+ };
3612
+
3613
+ // src/recording/recording-replayer.ts
3614
+ var RecordingReplayer = class {
3615
+ async replay(recording, client, progress) {
3616
+ const replayedSteps = [];
3617
+ for (let i = 0; i < recording.steps.length; i++) {
3618
+ const step = recording.steps[i];
3619
+ progress?.onStepStart?.(i, step);
3620
+ const start = performance.now();
3621
+ let output = [];
3622
+ let isError = false;
3623
+ try {
3624
+ const result = await client.callTool(step.tool, step.input);
3625
+ output = result.content;
3626
+ isError = result.isError === true;
3627
+ } catch (err) {
3628
+ output = [{ type: "text", text: err instanceof Error ? err.message : String(err) }];
3629
+ isError = true;
3630
+ }
3631
+ const durationMs = Math.round(performance.now() - start);
3632
+ const replayed = {
3633
+ tool: step.tool,
3634
+ input: step.input,
3635
+ output,
3636
+ isError,
3637
+ durationMs
3638
+ };
3639
+ replayedSteps.push(replayed);
3640
+ progress?.onStepComplete?.(i, replayed);
3641
+ }
3642
+ return {
3643
+ originalRecording: recording,
3644
+ replayedSteps,
3645
+ replayedAt: (/* @__PURE__ */ new Date()).toISOString()
3646
+ };
3647
+ }
3648
+ };
3649
+
3650
+ // src/recording/recording-differ.ts
3651
+ var RecordingDiffer = class {
3652
+ diff(recording, replayedSteps, replayedAt) {
3653
+ const steps = [];
3654
+ const maxLen = Math.max(recording.steps.length, replayedSteps.length);
3655
+ for (let i = 0; i < maxLen; i++) {
3656
+ const original = recording.steps[i];
3657
+ const replayed = replayedSteps[i];
3658
+ if (original && replayed) {
3659
+ const outputMatch = JSON.stringify(original.output) === JSON.stringify(replayed.output);
3660
+ const errorMatch = (original.isError ?? false) === (replayed.isError ?? false);
3661
+ const isMatched = outputMatch && errorMatch;
3662
+ steps.push({
3663
+ index: i,
3664
+ tool: original.tool,
3665
+ type: isMatched ? "matched" : "changed",
3666
+ original,
3667
+ replayed,
3668
+ outputDiff: isMatched ? void 0 : this.describeChange(original, replayed)
3669
+ });
3670
+ } else if (original && !replayed) {
3671
+ steps.push({
3672
+ index: i,
3673
+ tool: original.tool,
3674
+ type: "removed",
3675
+ original
3676
+ });
3677
+ } else if (!original && replayed) {
3678
+ steps.push({
3679
+ index: i,
3680
+ tool: replayed.tool,
3681
+ type: "added",
3682
+ replayed
3683
+ });
3684
+ }
3685
+ }
3686
+ const summary = {
3687
+ matched: steps.filter((s) => s.type === "matched").length,
3688
+ changed: steps.filter((s) => s.type === "changed").length,
3689
+ added: steps.filter((s) => s.type === "added").length,
3690
+ removed: steps.filter((s) => s.type === "removed").length
3691
+ };
3692
+ return {
3693
+ recordingId: recording.id,
3694
+ recordingName: recording.name,
3695
+ replayedAt,
3696
+ steps,
3697
+ summary
3698
+ };
3699
+ }
3700
+ describeChange(original, replayed) {
3701
+ const parts = [];
3702
+ if ((original.isError ?? false) !== (replayed.isError ?? false)) {
3703
+ parts.push(`error state: ${original.isError ?? false} \u2192 ${replayed.isError ?? false}`);
3704
+ }
3705
+ if (JSON.stringify(original.output) !== JSON.stringify(replayed.output)) {
3706
+ parts.push("output content changed");
3707
+ }
3708
+ return parts.join("; ");
3709
+ }
3710
+ };
3256
3711
  export {
3257
3712
  AuthBypassRule,
3258
3713
  BadgeGenerator,
@@ -3260,9 +3715,11 @@ export {
3260
3715
  BenchmarkRunner,
3261
3716
  ConnectionManager,
3262
3717
  ConsoleReporter,
3718
+ DANGEROUS_TOOL_PATTERNS,
3263
3719
  DocGenerator,
3264
3720
  ERROR_CODE_MAP,
3265
3721
  ERROR_TEMPLATES,
3722
+ ExcessiveAgencyRule,
3266
3723
  HtmlDocGenerator,
3267
3724
  HtmlReporter,
3268
3725
  InformationDisclosureRule,
@@ -3281,6 +3738,9 @@ export {
3281
3738
  ProcessRegistry,
3282
3739
  Profiler,
3283
3740
  RateLimiter,
3741
+ RecordingDiffer,
3742
+ RecordingReplayer,
3743
+ RecordingStore,
3284
3744
  ResourceExhaustionRule,
3285
3745
  ResultDiffer,
3286
3746
  ScanConfig,
@@ -3290,6 +3750,7 @@ export {
3290
3750
  TestExecutor,
3291
3751
  TestRunner,
3292
3752
  TestScheduler,
3753
+ ToolPoisoningRule,
3293
3754
  WaterfallGenerator,
3294
3755
  YAML_LIMITS,
3295
3756
  computeStats,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mcpspec/core",
3
- "version": "1.0.2",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -31,7 +31,7 @@
31
31
  "expr-eval": "^2.0.2",
32
32
  "handlebars": "^4.7.8",
33
33
  "zod": "^3.22.0",
34
- "@mcpspec/shared": "1.0.2"
34
+ "@mcpspec/shared": "1.1.0"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/js-yaml": "^4.0.9",