npm - @mcpspec/core - Versions diffs - 1.0.2 → 1.1.0 - Mend

@mcpspec/core 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -88,7 +88,7 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
 - `SecurityScanner` — Orchestrates security audits
 - `ScanConfig` — Safety controls and mode filtering
-- Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`
+- Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`, `ToolPoisoningRule`, `ExcessiveAgencyRule`
 - `getSafePayloads`, `getPlatformPayloads`, `getPayloadsForMode` — Payload management
 ### Performance
@@ -105,9 +105,15 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
 ### Scoring
-- `MCPScoreCalculator` — 0–100 quality score across 5 categories
+- `MCPScoreCalculator` — 0–100 quality score across 5 categories; schema quality uses opinionated linting (property types, descriptions, constraints, naming conventions)
 - `BadgeGenerator` — shields.io-style SVG badges
+### Recording & Replay
+- `RecordingStore` — Save, load, list, and delete session recordings
+- `RecordingReplayer` — Replay recorded steps against a live server
+- `RecordingDiffer` — Diff original recording vs replayed results (matched/changed/added/removed)
 ### Utilities
 - `loadYamlSafely` — FAILSAFE_SCHEMA YAML parsing

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as _mcpspec_shared from '@mcpspec/shared';
-import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore } from '@mcpspec/shared';
+import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore, Recording, RecordingStep, RecordingDiff } from '@mcpspec/shared';
 import { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
 import { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
@@ -352,6 +352,7 @@ declare class ResultDiffer {
     diff(baseline: TestRunResult, current: TestRunResult, baselineName?: string): RunDiff;
 }
+declare const DANGEROUS_TOOL_PATTERNS: RegExp;
 declare class ScanConfig {
     readonly mode: SecurityScanMode;
     readonly rules: string[];
@@ -359,9 +360,12 @@ declare class ScanConfig {
     readonly acknowledgeRisk: boolean;
     readonly timeout: number;
     readonly maxProbesPerTool: number;
+    readonly excludeTools: string[];
+    readonly dryRun: boolean;
     constructor(config?: Partial<SecurityScanConfig>);
     requiresConfirmation(): boolean;
     meetsThreshold(severity: SeverityLevel): boolean;
+    isToolExcluded(toolName: string): boolean;
     private getRulesForMode;
 }
@@ -377,10 +381,27 @@ interface ScanProgress {
     onRuleComplete?: (ruleId: string, findingCount: number) => void;
     onFinding?: (finding: SecurityFinding) => void;
 }
+interface DryRunResult {
+    tools: Array<{
+        name: string;
+        included: boolean;
+        reason?: string;
+    }>;
+    rules: string[];
+    mode: string;
+}
 declare class SecurityScanner {
     private readonly rules;
     constructor();
     registerRule(rule: SecurityRule): void;
+    /**
+     * Preview which tools will be scanned without actually running payloads.
+     */
+    dryRun(client: MCPClientInterface, config: ScanConfig): Promise<DryRunResult>;
+    /**
+     * Filter tools based on config exclusions.
+     */
+    filterTools(tools: ToolInfo[], config: ScanConfig): ToolInfo[];
     scan(client: MCPClientInterface, config: ScanConfig, progress?: ScanProgress): Promise<SecurityScanResult>;
     private buildSummary;
     private registerBuiltinRules;
@@ -436,6 +457,22 @@ declare class InformationDisclosureRule implements SecurityRule {
     private getFirstParam;
 }
+declare class ToolPoisoningRule implements SecurityRule {
+    readonly id = "tool-poisoning";
+    readonly name = "Tool Poisoning";
+    readonly description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
+    scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
+}
+declare class ExcessiveAgencyRule implements SecurityRule {
+    readonly id = "excessive-agency";
+    readonly name = "Excessive Agency";
+    readonly description = "Detects tools with overly broad permissions or missing safety controls";
+    scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
+    private getParamNames;
+    private getParamDescriptions;
+}
 interface PayloadSet {
     category: string;
     label: string;
@@ -513,8 +550,10 @@ declare class MCPScoreCalculator {
     calculate(client: MCPClientInterface, progress?: ScoreProgress): Promise<MCPScore>;
     private scoreDocumentation;
     private scoreSchemaQuality;
+    /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
+    private scoreToolSchema;
     private scoreErrorHandling;
-    private scorePerformance;
+    private scoreResponsiveness;
     private scoreSecurity;
 }
@@ -523,4 +562,33 @@ declare class BadgeGenerator {
     getColor(score: number): string;
 }
-export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DocGenerator, type DocGeneratorOptions, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
+declare class RecordingStore {
+    private basePath;
+    constructor(basePath?: string);
+    save(name: string, recording: Recording): string;
+    load(name: string): Recording | null;
+    list(): string[];
+    delete(name: string): boolean;
+    private getFilePath;
+    private ensureDir;
+}
+interface ReplayProgress {
+    onStepStart?: (index: number, step: RecordingStep) => void;
+    onStepComplete?: (index: number, replayed: RecordingStep) => void;
+}
+interface ReplayResult {
+    originalRecording: Recording;
+    replayedSteps: RecordingStep[];
+    replayedAt: string;
+}
+declare class RecordingReplayer {
+    replay(recording: Recording, client: MCPClientInterface, progress?: ReplayProgress): Promise<ReplayResult>;
+}
+declare class RecordingDiffer {
+    diff(recording: Recording, replayedSteps: RecordingStep[], replayedAt: string): RecordingDiff;
+    private describeChange;
+}
+export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DANGEROUS_TOOL_PATTERNS, DocGenerator, type DocGeneratorOptions, type DryRunResult, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, ExcessiveAgencyRule, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, RecordingDiffer, RecordingReplayer, RecordingStore, type ReplayProgress, type ReplayResult, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, ToolPoisoningRule, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };

package/dist/index.js CHANGED Viewed

@@ -1230,7 +1230,7 @@ var TestExecutor = class {
           assertions: assertionResults
         };
       }
-      const response = this.buildResponse(result);
+      const response = this.buildResponse(result, test.rawResponse);
       if (test.assertions) {
         for (const assertion of test.assertions) {
           assertionResults.push(this.runAssertion(assertion, response, Date.now() - startTime));
@@ -1273,7 +1273,7 @@ var TestExecutor = class {
       };
     }
   }
-  buildResponse(result) {
+  buildResponse(result, rawResponse) {
     const contents = result.content;
     if (!Array.isArray(contents) || contents.length === 0) {
       return {};
@@ -1281,6 +1281,9 @@ var TestExecutor = class {
     if (contents.length === 1) {
       const item = contents[0];
       if (item["type"] === "text" && typeof item["text"] === "string") {
+        if (rawResponse) {
+          return { content: item["text"], text: item["text"] };
+        }
         try {
           return JSON.parse(item["text"]);
         } catch {
@@ -1420,17 +1423,16 @@ var TestScheduler = class {
       return skippedResults;
     }
     if (parallelism <= 1) {
-      const executor2 = new TestExecutor(initialVariables, rateLimiter);
+      const executor = new TestExecutor(initialVariables, rateLimiter);
       const results2 = [];
       for (const test of filteredTests) {
         reporter?.onTestStart(test.name);
-        const result = await executor2.execute(test, client);
+        const result = await executor.execute(test, client);
         results2.push(result);
         reporter?.onTestComplete(result);
       }
       return [...results2, ...skippedResults];
     }
-    const executor = new TestExecutor(initialVariables, rateLimiter);
     let running = 0;
     const results = new Array(filteredTests.length);
     const waitQueue = [];
@@ -1455,6 +1457,7 @@ var TestScheduler = class {
       return (async () => {
         await acquire();
         try {
+          const executor = new TestExecutor(initialVariables, rateLimiter);
           reporter?.onTestStart(test.name);
           const result = await executor.execute(test, client);
           results[i] = result;
@@ -2075,7 +2078,9 @@ var SEVERITY_ORDER = ["info", "low", "medium", "high", "critical"];
 var PASSIVE_RULES = [
   "path-traversal",
   "input-validation",
-  "information-disclosure"
+  "information-disclosure",
+  "tool-poisoning",
+  "excessive-agency"
 ];
 var ACTIVE_RULES = [
   ...PASSIVE_RULES,
@@ -2086,6 +2091,7 @@ var ACTIVE_RULES = [
 var AGGRESSIVE_RULES = [...ACTIVE_RULES];
 var DEFAULT_TIMEOUT = 1e4;
 var DEFAULT_MAX_PROBES = 50;
+var DANGEROUS_TOOL_PATTERNS = /^(delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)[_-]|[_-](delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)$/i;
 var ScanConfig = class {
   mode;
   rules;
@@ -2093,12 +2099,16 @@ var ScanConfig = class {
   acknowledgeRisk;
   timeout;
   maxProbesPerTool;
+  excludeTools;
+  dryRun;
   constructor(config = {}) {
     this.mode = config.mode ?? "passive";
     this.severityThreshold = config.severityThreshold ?? "info";
     this.acknowledgeRisk = config.acknowledgeRisk ?? false;
     this.timeout = config.timeout ?? DEFAULT_TIMEOUT;
     this.maxProbesPerTool = config.maxProbesPerTool ?? DEFAULT_MAX_PROBES;
+    this.excludeTools = config.excludeTools ?? [];
+    this.dryRun = config.dryRun ?? false;
     const allRulesForMode = this.getRulesForMode(this.mode);
     if (config.rules && config.rules.length > 0) {
       this.rules = config.rules.filter((r) => allRulesForMode.includes(r));
@@ -2114,6 +2124,11 @@ var ScanConfig = class {
     const severityIdx = SEVERITY_ORDER.indexOf(severity);
     return severityIdx >= thresholdIdx;
   }
+  isToolExcluded(toolName) {
+    if (this.excludeTools.includes(toolName)) return true;
+    if (this.mode !== "passive" && DANGEROUS_TOOL_PATTERNS.test(toolName)) return true;
+    return false;
+  }
   getRulesForMode(mode) {
     switch (mode) {
       case "passive":
@@ -2127,7 +2142,7 @@ var ScanConfig = class {
 };
 // src/security/security-scanner.ts
-import { randomUUID as randomUUID9 } from "crypto";
+import { randomUUID as randomUUID11 } from "crypto";
 // src/security/rules/path-traversal.ts
 import { randomUUID as randomUUID3 } from "crypto";
@@ -2680,6 +2695,217 @@ var InformationDisclosureRule = class {
   }
 };
+// src/security/rules/tool-poisoning.ts
+import { randomUUID as randomUUID9 } from "crypto";
+var SUSPICIOUS_PATTERNS = [
+  /ignore\s+(all\s+)?previous/i,
+  /do\s+not\s+tell\s+the\s+user/i,
+  /override\s+instructions/i,
+  /disregard\s+(all\s+)?previous/i,
+  /pretend\s+(you\s+are|to\s+be)/i,
+  /forget\s+everything/i,
+  /ignore\s+your\s+(instructions|rules)/i,
+  /you\s+must\s+not\s+(reveal|disclose|mention)/i,
+  /system\s+prompt/i,
+  /act\s+as\s+(if|though)/i
+];
+var ZERO_WIDTH_CHARS = /[\u200B-\u200D\uFEFF]/;
+var BIDI_OVERRIDES = /[\u202A-\u202E\u2066-\u2069]/;
+var CROSS_TOOL_PATTERNS = [
+  /when\s+using\s+\w+.*also\s+call/i,
+  /before\s+calling\s+\w+.*first\s+run/i,
+  /after\s+(using|calling)\s+\w+.*invoke/i,
+  /always\s+call\s+\w+\s+(before|after|with)/i
+];
+var EMBEDDED_CODE_PATTERNS = [
+  /```[\s\S]*?```/,
+  /<script[\s>]/i,
+  /eval\s*\(/,
+  /require\s*\(/,
+  /import\s*\(/
+];
+var MAX_DESCRIPTION_LENGTH = 1e3;
+var ToolPoisoningRule = class {
+  id = "tool-poisoning";
+  name = "Tool Poisoning";
+  description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
+  async scan(_client, tools, _config) {
+    const findings = [];
+    for (const tool of tools) {
+      const desc = tool.description ?? "";
+      for (const pattern of SUSPICIOUS_PATTERNS) {
+        if (pattern.test(desc)) {
+          findings.push({
+            id: randomUUID9(),
+            rule: this.id,
+            severity: "high",
+            title: `Suspicious instruction in tool "${tool.name}"`,
+            description: `Tool description contains prompt injection pattern: ${pattern.source}`,
+            evidence: desc.slice(0, 200),
+            remediation: "Remove manipulative instructions from tool descriptions"
+          });
+          break;
+        }
+      }
+      if (ZERO_WIDTH_CHARS.test(desc) || BIDI_OVERRIDES.test(desc)) {
+        findings.push({
+          id: randomUUID9(),
+          rule: this.id,
+          severity: "high",
+          title: `Hidden Unicode characters in tool "${tool.name}"`,
+          description: "Tool description contains zero-width or bidirectional override characters that can hide malicious content",
+          evidence: `Description length: ${desc.length} characters`,
+          remediation: "Remove invisible Unicode characters from tool descriptions"
+        });
+      }
+      for (const pattern of CROSS_TOOL_PATTERNS) {
+        if (pattern.test(desc)) {
+          findings.push({
+            id: randomUUID9(),
+            rule: this.id,
+            severity: "medium",
+            title: `Cross-tool reference in tool "${tool.name}"`,
+            description: "Tool description instructs the LLM to call other tools, which could be used to chain unauthorized actions",
+            evidence: desc.slice(0, 200),
+            remediation: "Remove cross-tool instructions from descriptions"
+          });
+          break;
+        }
+      }
+      if (desc.length > MAX_DESCRIPTION_LENGTH) {
+        findings.push({
+          id: randomUUID9(),
+          rule: this.id,
+          severity: "low",
+          title: `Overly long description for tool "${tool.name}"`,
+          description: `Tool description is ${desc.length} characters (threshold: ${MAX_DESCRIPTION_LENGTH}). Long descriptions may hide malicious instructions`,
+          remediation: "Keep tool descriptions concise and focused"
+        });
+      }
+      for (const pattern of EMBEDDED_CODE_PATTERNS) {
+        if (pattern.test(desc)) {
+          findings.push({
+            id: randomUUID9(),
+            rule: this.id,
+            severity: "medium",
+            title: `Embedded code in tool "${tool.name}" description`,
+            description: "Tool description contains code blocks or executable patterns",
+            evidence: desc.slice(0, 200),
+            remediation: "Remove code blocks from tool descriptions"
+          });
+          break;
+        }
+      }
+    }
+    return findings;
+  }
+};
+// src/security/rules/excessive-agency.ts
+import { randomUUID as randomUUID10 } from "crypto";
+var DESTRUCTIVE_TOOL_PATTERN = /delete|drop|destroy|remove|kill|purge|truncate|wipe|reset|erase|shutdown|terminate/i;
+var CONFIRMATION_PARAMS = ["confirmation", "dryrun", "dry_run", "confirm", "force"];
+var CODE_EXEC_PARAMS = ["code", "script", "command", "query", "sql", "eval", "shell", "exec", "expression", "cmd"];
+var ExcessiveAgencyRule = class {
+  id = "excessive-agency";
+  name = "Excessive Agency";
+  description = "Detects tools with overly broad permissions or missing safety controls";
+  async scan(_client, tools, _config) {
+    const findings = [];
+    for (const tool of tools) {
+      if (DESTRUCTIVE_TOOL_PATTERN.test(tool.name)) {
+        const params2 = this.getParamNames(tool);
+        const hasConfirmation = params2.some((p) => CONFIRMATION_PARAMS.includes(p.toLowerCase()));
+        if (!hasConfirmation) {
+          findings.push({
+            id: randomUUID10(),
+            rule: this.id,
+            severity: "medium",
+            title: `Destructive tool "${tool.name}" lacks confirmation parameter`,
+            description: "Tool with destructive capability does not require confirmation, dryRun, or force parameter",
+            remediation: "Add a confirmation, dryRun, or force parameter to destructive tools"
+          });
+        }
+      }
+      const params = this.getParamNames(tool);
+      for (const param of params) {
+        if (CODE_EXEC_PARAMS.includes(param.toLowerCase())) {
+          findings.push({
+            id: randomUUID10(),
+            rule: this.id,
+            severity: "high",
+            title: `Code execution parameter "${param}" in tool "${tool.name}"`,
+            description: "Tool accepts arbitrary code or command input, which could enable unauthorized actions",
+            remediation: "Use specific, constrained parameters instead of generic code/command inputs"
+          });
+          break;
+        }
+      }
+      const schema = tool.inputSchema;
+      if (schema && typeof schema === "object") {
+        const props = schema.properties;
+        const required = schema.required;
+        if ((!props || Object.keys(props).length === 0) && (!required || required.length === 0)) {
+          findings.push({
+            id: randomUUID10(),
+            rule: this.id,
+            severity: "medium",
+            title: `Overly broad schema for tool "${tool.name}"`,
+            description: "Tool schema has no defined properties or required fields, accepting arbitrary input",
+            remediation: "Define explicit input schema with typed properties and required fields"
+          });
+        }
+      }
+      if (!tool.description || tool.description.trim() === "") {
+        findings.push({
+          id: randomUUID10(),
+          rule: this.id,
+          severity: "low",
+          title: `Missing description for tool "${tool.name}"`,
+          description: "Tool lacks a description, making it difficult to understand its purpose and risks",
+          remediation: "Add a clear, informative description to the tool"
+        });
+      }
+      const paramDescs = this.getParamDescriptions(tool);
+      if (paramDescs.total > 0) {
+        const missingRatio = paramDescs.missing / paramDescs.total;
+        if (missingRatio > 0.5) {
+          findings.push({
+            id: randomUUID10(),
+            rule: this.id,
+            severity: "low",
+            title: `Missing parameter descriptions in tool "${tool.name}"`,
+            description: `${paramDescs.missing} of ${paramDescs.total} parameters lack descriptions`,
+            remediation: "Add descriptions to all parameters to clarify their purpose"
+          });
+        }
+      }
+    }
+    return findings;
+  }
+  getParamNames(tool) {
+    const schema = tool.inputSchema;
+    if (!schema || typeof schema !== "object") return [];
+    const props = schema.properties;
+    if (!props) return [];
+    return Object.keys(props);
+  }
+  getParamDescriptions(tool) {
+    const schema = tool.inputSchema;
+    if (!schema || typeof schema !== "object") return { total: 0, missing: 0 };
+    const props = schema.properties;
+    if (!props) return { total: 0, missing: 0 };
+    const entries = Object.values(props);
+    let missing = 0;
+    for (const prop of entries) {
+      if (!prop || typeof prop !== "object" || !prop.description) {
+        missing++;
+      }
+    }
+    return { total: entries.length, missing };
+  }
+};
 // src/security/security-scanner.ts
 var SEVERITY_ORDER2 = ["info", "low", "medium", "high", "critical"];
 var SecurityScanner = class {
@@ -2690,10 +2916,47 @@ var SecurityScanner = class {
   registerRule(rule) {
     this.rules.set(rule.id, rule);
   }
+  /**
+   * Preview which tools will be scanned without actually running payloads.
+   */
+  async dryRun(client, config) {
+    const allTools = await client.listTools();
+    const toolResults = allTools.map((tool) => {
+      if (config.excludeTools.includes(tool.name)) {
+        return { name: tool.name, included: false, reason: "excluded by --exclude-tools" };
+      }
+      if (config.isToolExcluded(tool.name)) {
+        return { name: tool.name, included: false, reason: "auto-skipped (destructive name)" };
+      }
+      return { name: tool.name, included: true };
+    });
+    return {
+      tools: toolResults,
+      rules: [...config.rules],
+      mode: config.mode
+    };
+  }
+  /**
+   * Filter tools based on config exclusions.
+   */
+  filterTools(tools, config) {
+    return tools.filter((tool) => !config.isToolExcluded(tool.name));
+  }
   async scan(client, config, progress) {
     const startedAt = /* @__PURE__ */ new Date();
     const findings = [];
-    const tools = await client.listTools();
+    const allTools = await client.listTools();
+    const tools = this.filterTools(allTools, config);
+    const skippedCount = allTools.length - tools.length;
+    if (skippedCount > 0) {
+      findings.push({
+        id: randomUUID11(),
+        rule: "safety-filter",
+        severity: "info",
+        title: `${skippedCount} tool(s) excluded from scan`,
+        description: `${skippedCount} tool(s) were excluded from scanning due to safety filters or --exclude-tools.`
+      });
+    }
     for (const ruleId of config.rules) {
       const rule = this.rules.get(ruleId);
       if (!rule) continue;
@@ -2707,7 +2970,7 @@ var SecurityScanner = class {
         progress?.onRuleComplete?.(rule.id, ruleFindings.length);
       } catch (err) {
         const errorFinding = {
-          id: randomUUID9(),
+          id: randomUUID11(),
           rule: ruleId,
           severity: "info",
           title: `Rule "${ruleId}" failed to complete`,
@@ -2724,7 +2987,7 @@ var SecurityScanner = class {
     const completedAt = /* @__PURE__ */ new Date();
     const serverInfo = client.getServerInfo();
     return {
-      id: randomUUID9(),
+      id: randomUUID11(),
       serverName: serverInfo?.name ?? "unknown",
       mode: config.mode,
       startedAt,
@@ -2759,6 +3022,8 @@ var SecurityScanner = class {
     this.registerRule(new AuthBypassRule());
     this.registerRule(new InjectionRule());
     this.registerRule(new InformationDisclosureRule());
+    this.registerRule(new ToolPoisoningRule());
+    this.registerRule(new ExcessiveAgencyRule());
   }
 };
@@ -3121,14 +3386,14 @@ var MCPScoreCalculator = class {
     progress?.onCategoryStart?.("errorHandling");
     const errorHandling = await this.scoreErrorHandling(client, tools);
     progress?.onCategoryComplete?.("errorHandling", errorHandling);
-    progress?.onCategoryStart?.("performance");
-    const performance4 = await this.scorePerformance(client, tools);
-    progress?.onCategoryComplete?.("performance", performance4);
+    progress?.onCategoryStart?.("responsiveness");
+    const responsiveness = await this.scoreResponsiveness(client, tools);
+    progress?.onCategoryComplete?.("responsiveness", responsiveness);
     progress?.onCategoryStart?.("security");
     const security = await this.scoreSecurity(client);
     progress?.onCategoryComplete?.("security", security);
     const overall = Math.round(
-      documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 + performance4 * 0.15 + security * 0.15
+      documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 + responsiveness * 0.15 + security * 0.15
     );
     return {
       overall,
@@ -3136,7 +3401,7 @@ var MCPScoreCalculator = class {
         documentation,
         schemaQuality,
         errorHandling,
-        performance: performance4,
+        responsiveness,
         security
       }
     };
@@ -3154,16 +3419,50 @@ var MCPScoreCalculator = class {
     if (tools.length === 0) return 0;
     let totalPoints = 0;
     for (const tool of tools) {
-      const schema = tool.inputSchema;
-      if (!schema) continue;
-      let toolPoints = 0;
-      if (schema.type) toolPoints += 1 / 3;
-      if (schema.properties && typeof schema.properties === "object") toolPoints += 1 / 3;
-      if (schema.required && Array.isArray(schema.required)) toolPoints += 1 / 3;
-      totalPoints += toolPoints;
+      totalPoints += this.scoreToolSchema(tool);
     }
     return Math.round(totalPoints / tools.length * 100);
   }
+  /** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
+  scoreToolSchema(tool) {
+    const schema = tool.inputSchema;
+    if (!schema) return 0;
+    let score = 0;
+    const hasType = !!schema.type;
+    const properties = schema.properties;
+    const hasProperties = properties && typeof properties === "object" && Object.keys(properties).length > 0;
+    score += (hasType ? 0.1 : 0) + (hasProperties ? 0.1 : 0);
+    if (!hasProperties || !properties) return score;
+    const propEntries = Object.entries(properties);
+    const withType = propEntries.filter(([, prop]) => !!prop.type).length;
+    score += withType / propEntries.length * 0.2;
+    const withDesc = propEntries.filter(([, prop]) => {
+      const desc = prop.description;
+      return typeof desc === "string" && desc.trim().length > 0;
+    }).length;
+    score += withDesc / propEntries.length * 0.2;
+    const required = schema.required;
+    if (Array.isArray(required) && required.length > 0) {
+      score += 0.15;
+    }
+    const constraintKeys = ["enum", "pattern", "minimum", "maximum", "minLength", "maxLength", "minItems", "maxItems", "format", "default"];
+    const withConstraints = propEntries.filter(([, prop]) => {
+      if (constraintKeys.some((k) => prop[k] !== void 0)) return true;
+      if (prop.type === "object" && prop.properties && typeof prop.properties === "object") {
+        const nested = prop.properties;
+        return Object.keys(nested).length > 0 && Object.values(nested).some((np) => !!np.type);
+      }
+      if (prop.type === "array" && prop.items && typeof prop.items === "object") return true;
+      return false;
+    }).length;
+    score += withConstraints / propEntries.length * 0.15;
+    const names = propEntries.map(([name]) => name);
+    const camelCount = names.filter((n) => /^[a-z][a-zA-Z0-9]*$/.test(n)).length;
+    const snakeCount = names.filter((n) => /^[a-z][a-z0-9_]*$/.test(n)).length;
+    const bestConvention = Math.max(camelCount, snakeCount);
+    score += bestConvention / names.length * 0.1;
+    return score;
+  }
   async scoreErrorHandling(client, tools) {
     if (tools.length === 0) return 0;
     const testTools = tools.slice(0, 5);
@@ -3172,9 +3471,26 @@ var MCPScoreCalculator = class {
       try {
         const result = await client.callTool(tool.name, {});
         if (result.isError) {
-          totalScore += 100;
+          const content = result.content;
+          let isStructured = false;
+          if (Array.isArray(content) && content.length > 0) {
+            isStructured = content.some((c) => {
+              const item = c;
+              const text = item["text"];
+              if (typeof text !== "string") return false;
+              try {
+                const parsed = JSON.parse(text);
+                return typeof parsed === "object" && parsed !== null && ("code" in parsed || "message" in parsed || "error" in parsed);
+              } catch {
+                return false;
+              }
+            });
+          }
+          totalScore += isStructured ? 100 : 80;
         } else {
-          totalScore += 50;
+          const schema = tool.inputSchema;
+          const hasRequired = schema && Array.isArray(schema.required) && schema.required.length > 0;
+          totalScore += hasRequired ? 30 : 50;
         }
       } catch {
         totalScore += 0;
@@ -3182,7 +3498,7 @@ var MCPScoreCalculator = class {
     }
     return Math.round(totalScore / testTools.length);
   }
-  async scorePerformance(client, tools) {
+  async scoreResponsiveness(client, tools) {
     if (tools.length === 0) return 20;
     const tool = tools[0];
     const latencies = [];
@@ -3253,6 +3569,145 @@ var BadgeGenerator = class {
     return "#e05d44";
   }
 };
+// src/recording/recording-store.ts
+import { readFileSync as readFileSync2, writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, readdirSync as readdirSync2, existsSync as existsSync2, unlinkSync } from "fs";
+import { join as join4 } from "path";
+var RecordingStore = class {
+  basePath;
+  constructor(basePath) {
+    this.basePath = basePath ?? join4(getPlatformInfo().dataDir, "recordings");
+  }
+  save(name, recording) {
+    this.ensureDir();
+    const filePath = this.getFilePath(name);
+    writeFileSync3(filePath, JSON.stringify(recording, null, 2), "utf-8");
+    return filePath;
+  }
+  load(name) {
+    const filePath = this.getFilePath(name);
+    if (!existsSync2(filePath)) return null;
+    return JSON.parse(readFileSync2(filePath, "utf-8"));
+  }
+  list() {
+    this.ensureDir();
+    return readdirSync2(this.basePath).filter((f) => f.endsWith(".json")).map((f) => f.replace(/\.json$/, ""));
+  }
+  delete(name) {
+    const filePath = this.getFilePath(name);
+    if (!existsSync2(filePath)) return false;
+    unlinkSync(filePath);
+    return true;
+  }
+  getFilePath(name) {
+    const safeName = name.replace(/[^a-zA-Z0-9_-]/g, "_");
+    return join4(this.basePath, `${safeName}.json`);
+  }
+  ensureDir() {
+    if (!existsSync2(this.basePath)) {
+      mkdirSync3(this.basePath, { recursive: true });
+    }
+  }
+};
+// src/recording/recording-replayer.ts
+var RecordingReplayer = class {
+  async replay(recording, client, progress) {
+    const replayedSteps = [];
+    for (let i = 0; i < recording.steps.length; i++) {
+      const step = recording.steps[i];
+      progress?.onStepStart?.(i, step);
+      const start = performance.now();
+      let output = [];
+      let isError = false;
+      try {
+        const result = await client.callTool(step.tool, step.input);
+        output = result.content;
+        isError = result.isError === true;
+      } catch (err) {
+        output = [{ type: "text", text: err instanceof Error ? err.message : String(err) }];
+        isError = true;
+      }
+      const durationMs = Math.round(performance.now() - start);
+      const replayed = {
+        tool: step.tool,
+        input: step.input,
+        output,
+        isError,
+        durationMs
+      };
+      replayedSteps.push(replayed);
+      progress?.onStepComplete?.(i, replayed);
+    }
+    return {
+      originalRecording: recording,
+      replayedSteps,
+      replayedAt: (/* @__PURE__ */ new Date()).toISOString()
+    };
+  }
+};
+// src/recording/recording-differ.ts
+var RecordingDiffer = class {
+  diff(recording, replayedSteps, replayedAt) {
+    const steps = [];
+    const maxLen = Math.max(recording.steps.length, replayedSteps.length);
+    for (let i = 0; i < maxLen; i++) {
+      const original = recording.steps[i];
+      const replayed = replayedSteps[i];
+      if (original && replayed) {
+        const outputMatch = JSON.stringify(original.output) === JSON.stringify(replayed.output);
+        const errorMatch = (original.isError ?? false) === (replayed.isError ?? false);
+        const isMatched = outputMatch && errorMatch;
+        steps.push({
+          index: i,
+          tool: original.tool,
+          type: isMatched ? "matched" : "changed",
+          original,
+          replayed,
+          outputDiff: isMatched ? void 0 : this.describeChange(original, replayed)
+        });
+      } else if (original && !replayed) {
+        steps.push({
+          index: i,
+          tool: original.tool,
+          type: "removed",
+          original
+        });
+      } else if (!original && replayed) {
+        steps.push({
+          index: i,
+          tool: replayed.tool,
+          type: "added",
+          replayed
+        });
+      }
+    }
+    const summary = {
+      matched: steps.filter((s) => s.type === "matched").length,
+      changed: steps.filter((s) => s.type === "changed").length,
+      added: steps.filter((s) => s.type === "added").length,
+      removed: steps.filter((s) => s.type === "removed").length
+    };
+    return {
+      recordingId: recording.id,
+      recordingName: recording.name,
+      replayedAt,
+      steps,
+      summary
+    };
+  }
+  describeChange(original, replayed) {
+    const parts = [];
+    if ((original.isError ?? false) !== (replayed.isError ?? false)) {
+      parts.push(`error state: ${original.isError ?? false} \u2192 ${replayed.isError ?? false}`);
+    }
+    if (JSON.stringify(original.output) !== JSON.stringify(replayed.output)) {
+      parts.push("output content changed");
+    }
+    return parts.join("; ");
+  }
+};
 export {
   AuthBypassRule,
   BadgeGenerator,
@@ -3260,9 +3715,11 @@ export {
   BenchmarkRunner,
   ConnectionManager,
   ConsoleReporter,
+  DANGEROUS_TOOL_PATTERNS,
   DocGenerator,
   ERROR_CODE_MAP,
   ERROR_TEMPLATES,
+  ExcessiveAgencyRule,
   HtmlDocGenerator,
   HtmlReporter,
   InformationDisclosureRule,
@@ -3281,6 +3738,9 @@ export {
   ProcessRegistry,
   Profiler,
   RateLimiter,
+  RecordingDiffer,
+  RecordingReplayer,
+  RecordingStore,
   ResourceExhaustionRule,
   ResultDiffer,
   ScanConfig,
@@ -3290,6 +3750,7 @@ export {
   TestExecutor,
   TestRunner,
   TestScheduler,
+  ToolPoisoningRule,
   WaterfallGenerator,
   YAML_LIMITS,
   computeStats,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mcpspec/core",
-  "version": "1.0.2",
+  "version": "1.1.0",
   "type": "module",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
@@ -31,7 +31,7 @@
     "expr-eval": "^2.0.2",
     "handlebars": "^4.7.8",
     "zod": "^3.22.0",
-    "@mcpspec/shared": "1.0.2"
+    "@mcpspec/shared": "1.1.0"
   },
   "devDependencies": {
     "@types/js-yaml": "^4.0.9",