@mcpspec/core 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/index.d.ts +71 -3
- package/dist/index.js +486 -25
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -88,7 +88,7 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
|
|
|
88
88
|
|
|
89
89
|
- `SecurityScanner` — Orchestrates security audits
|
|
90
90
|
- `ScanConfig` — Safety controls and mode filtering
|
|
91
|
-
- Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`
|
|
91
|
+
- Rules: `PathTraversalRule`, `InputValidationRule`, `ResourceExhaustionRule`, `AuthBypassRule`, `InjectionRule`, `InformationDisclosureRule`, `ToolPoisoningRule`, `ExcessiveAgencyRule`
|
|
92
92
|
- `getSafePayloads`, `getPlatformPayloads`, `getPayloadsForMode` — Payload management
|
|
93
93
|
|
|
94
94
|
### Performance
|
|
@@ -105,9 +105,15 @@ Evaluated via `TestExecutor` — schema, equals, contains, exists, matches, type
|
|
|
105
105
|
|
|
106
106
|
### Scoring
|
|
107
107
|
|
|
108
|
-
- `MCPScoreCalculator` — 0–100 quality score across 5 categories
|
|
108
|
+
- `MCPScoreCalculator` — 0–100 quality score across 5 categories; schema quality uses opinionated linting (property types, descriptions, constraints, naming conventions)
|
|
109
109
|
- `BadgeGenerator` — shields.io-style SVG badges
|
|
110
110
|
|
|
111
|
+
### Recording & Replay
|
|
112
|
+
|
|
113
|
+
- `RecordingStore` — Save, load, list, and delete session recordings
|
|
114
|
+
- `RecordingReplayer` — Replay recorded steps against a live server
|
|
115
|
+
- `RecordingDiffer` — Diff original recording vs replayed results (matched/changed/added/removed)
|
|
116
|
+
|
|
111
117
|
### Utilities
|
|
112
118
|
|
|
113
119
|
- `loadYamlSafely` — FAILSAFE_SCHEMA YAML parsing
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _mcpspec_shared from '@mcpspec/shared';
|
|
2
|
-
import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore } from '@mcpspec/shared';
|
|
2
|
+
import { ErrorTemplate, ManagedProcess, ProcessConfig, ServerConfig, ConnectionConfig, ConnectionState, TestResult, TestRunResult, CollectionDefinition, RateLimitConfig, TestDefinition, SecurityScanMode, SeverityLevel, SecurityScanConfig, SecurityFinding, SecurityScanResult, ProfileEntry, BenchmarkStats, BenchmarkResult, BenchmarkConfig, WaterfallEntry, MCPScore, Recording, RecordingStep, RecordingDiff } from '@mcpspec/shared';
|
|
3
3
|
import { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
|
|
4
4
|
import { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
|
|
5
5
|
|
|
@@ -352,6 +352,7 @@ declare class ResultDiffer {
|
|
|
352
352
|
diff(baseline: TestRunResult, current: TestRunResult, baselineName?: string): RunDiff;
|
|
353
353
|
}
|
|
354
354
|
|
|
355
|
+
declare const DANGEROUS_TOOL_PATTERNS: RegExp;
|
|
355
356
|
declare class ScanConfig {
|
|
356
357
|
readonly mode: SecurityScanMode;
|
|
357
358
|
readonly rules: string[];
|
|
@@ -359,9 +360,12 @@ declare class ScanConfig {
|
|
|
359
360
|
readonly acknowledgeRisk: boolean;
|
|
360
361
|
readonly timeout: number;
|
|
361
362
|
readonly maxProbesPerTool: number;
|
|
363
|
+
readonly excludeTools: string[];
|
|
364
|
+
readonly dryRun: boolean;
|
|
362
365
|
constructor(config?: Partial<SecurityScanConfig>);
|
|
363
366
|
requiresConfirmation(): boolean;
|
|
364
367
|
meetsThreshold(severity: SeverityLevel): boolean;
|
|
368
|
+
isToolExcluded(toolName: string): boolean;
|
|
365
369
|
private getRulesForMode;
|
|
366
370
|
}
|
|
367
371
|
|
|
@@ -377,10 +381,27 @@ interface ScanProgress {
|
|
|
377
381
|
onRuleComplete?: (ruleId: string, findingCount: number) => void;
|
|
378
382
|
onFinding?: (finding: SecurityFinding) => void;
|
|
379
383
|
}
|
|
384
|
+
interface DryRunResult {
|
|
385
|
+
tools: Array<{
|
|
386
|
+
name: string;
|
|
387
|
+
included: boolean;
|
|
388
|
+
reason?: string;
|
|
389
|
+
}>;
|
|
390
|
+
rules: string[];
|
|
391
|
+
mode: string;
|
|
392
|
+
}
|
|
380
393
|
declare class SecurityScanner {
|
|
381
394
|
private readonly rules;
|
|
382
395
|
constructor();
|
|
383
396
|
registerRule(rule: SecurityRule): void;
|
|
397
|
+
/**
|
|
398
|
+
* Preview which tools will be scanned without actually running payloads.
|
|
399
|
+
*/
|
|
400
|
+
dryRun(client: MCPClientInterface, config: ScanConfig): Promise<DryRunResult>;
|
|
401
|
+
/**
|
|
402
|
+
* Filter tools based on config exclusions.
|
|
403
|
+
*/
|
|
404
|
+
filterTools(tools: ToolInfo[], config: ScanConfig): ToolInfo[];
|
|
384
405
|
scan(client: MCPClientInterface, config: ScanConfig, progress?: ScanProgress): Promise<SecurityScanResult>;
|
|
385
406
|
private buildSummary;
|
|
386
407
|
private registerBuiltinRules;
|
|
@@ -436,6 +457,22 @@ declare class InformationDisclosureRule implements SecurityRule {
|
|
|
436
457
|
private getFirstParam;
|
|
437
458
|
}
|
|
438
459
|
|
|
460
|
+
declare class ToolPoisoningRule implements SecurityRule {
|
|
461
|
+
readonly id = "tool-poisoning";
|
|
462
|
+
readonly name = "Tool Poisoning";
|
|
463
|
+
readonly description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
|
|
464
|
+
scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
declare class ExcessiveAgencyRule implements SecurityRule {
|
|
468
|
+
readonly id = "excessive-agency";
|
|
469
|
+
readonly name = "Excessive Agency";
|
|
470
|
+
readonly description = "Detects tools with overly broad permissions or missing safety controls";
|
|
471
|
+
scan(_client: MCPClientInterface, tools: ToolInfo[], _config: ScanConfig): Promise<SecurityFinding[]>;
|
|
472
|
+
private getParamNames;
|
|
473
|
+
private getParamDescriptions;
|
|
474
|
+
}
|
|
475
|
+
|
|
439
476
|
interface PayloadSet {
|
|
440
477
|
category: string;
|
|
441
478
|
label: string;
|
|
@@ -513,8 +550,10 @@ declare class MCPScoreCalculator {
|
|
|
513
550
|
calculate(client: MCPClientInterface, progress?: ScoreProgress): Promise<MCPScore>;
|
|
514
551
|
private scoreDocumentation;
|
|
515
552
|
private scoreSchemaQuality;
|
|
553
|
+
/** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
|
|
554
|
+
private scoreToolSchema;
|
|
516
555
|
private scoreErrorHandling;
|
|
517
|
-
private
|
|
556
|
+
private scoreResponsiveness;
|
|
518
557
|
private scoreSecurity;
|
|
519
558
|
}
|
|
520
559
|
|
|
@@ -523,4 +562,33 @@ declare class BadgeGenerator {
|
|
|
523
562
|
getColor(score: number): string;
|
|
524
563
|
}
|
|
525
564
|
|
|
526
|
-
|
|
565
|
+
declare class RecordingStore {
|
|
566
|
+
private basePath;
|
|
567
|
+
constructor(basePath?: string);
|
|
568
|
+
save(name: string, recording: Recording): string;
|
|
569
|
+
load(name: string): Recording | null;
|
|
570
|
+
list(): string[];
|
|
571
|
+
delete(name: string): boolean;
|
|
572
|
+
private getFilePath;
|
|
573
|
+
private ensureDir;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
interface ReplayProgress {
|
|
577
|
+
onStepStart?: (index: number, step: RecordingStep) => void;
|
|
578
|
+
onStepComplete?: (index: number, replayed: RecordingStep) => void;
|
|
579
|
+
}
|
|
580
|
+
interface ReplayResult {
|
|
581
|
+
originalRecording: Recording;
|
|
582
|
+
replayedSteps: RecordingStep[];
|
|
583
|
+
replayedAt: string;
|
|
584
|
+
}
|
|
585
|
+
declare class RecordingReplayer {
|
|
586
|
+
replay(recording: Recording, client: MCPClientInterface, progress?: ReplayProgress): Promise<ReplayResult>;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
declare class RecordingDiffer {
|
|
590
|
+
diff(recording: Recording, replayedSteps: RecordingStep[], replayedAt: string): RecordingDiff;
|
|
591
|
+
private describeChange;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
export { AuthBypassRule, BadgeGenerator, BaselineStore, type BenchmarkProgress, BenchmarkRunner, ConnectionManager, ConsoleReporter, DANGEROUS_TOOL_PATTERNS, DocGenerator, type DocGeneratorOptions, type DryRunResult, ERROR_CODE_MAP, ERROR_TEMPLATES, type ErrorCode, ExcessiveAgencyRule, HtmlDocGenerator, HtmlReporter, InformationDisclosureRule, InjectionRule, InputValidationRule, JsonReporter, JunitReporter, LoggingTransport, MCPClient, type MCPClientInterface, MCPScoreCalculator, MCPSpecError, MarkdownGenerator, NotImplementedError, type OnProtocolMessage, PathTraversalRule, type PayloadSet, type PlatformPayload, ProcessManagerImpl, ProcessRegistry, Profiler, RateLimiter, RecordingDiffer, RecordingReplayer, RecordingStore, type ReplayProgress, type ReplayResult, ResourceExhaustionRule, ResultDiffer, type RunDiff, ScanConfig, type ScanProgress, type ScoreProgress, SecretMasker, type SecurityRule, SecurityScanner, type ServerDocData, TapReporter, type TestDiff, TestExecutor, type TestRunReporter, TestRunner, TestScheduler, ToolPoisoningRule, WaterfallGenerator, YAML_LIMITS, computeStats, formatError, getPayloadsForMode, getPlatformInfo, getPlatformPayloads, getSafePayloads, loadYamlSafely, queryJsonPath, registerCleanupHandlers, resolveVariables };
|
package/dist/index.js
CHANGED
|
@@ -1230,7 +1230,7 @@ var TestExecutor = class {
|
|
|
1230
1230
|
assertions: assertionResults
|
|
1231
1231
|
};
|
|
1232
1232
|
}
|
|
1233
|
-
const response = this.buildResponse(result);
|
|
1233
|
+
const response = this.buildResponse(result, test.rawResponse);
|
|
1234
1234
|
if (test.assertions) {
|
|
1235
1235
|
for (const assertion of test.assertions) {
|
|
1236
1236
|
assertionResults.push(this.runAssertion(assertion, response, Date.now() - startTime));
|
|
@@ -1273,7 +1273,7 @@ var TestExecutor = class {
|
|
|
1273
1273
|
};
|
|
1274
1274
|
}
|
|
1275
1275
|
}
|
|
1276
|
-
buildResponse(result) {
|
|
1276
|
+
buildResponse(result, rawResponse) {
|
|
1277
1277
|
const contents = result.content;
|
|
1278
1278
|
if (!Array.isArray(contents) || contents.length === 0) {
|
|
1279
1279
|
return {};
|
|
@@ -1281,6 +1281,9 @@ var TestExecutor = class {
|
|
|
1281
1281
|
if (contents.length === 1) {
|
|
1282
1282
|
const item = contents[0];
|
|
1283
1283
|
if (item["type"] === "text" && typeof item["text"] === "string") {
|
|
1284
|
+
if (rawResponse) {
|
|
1285
|
+
return { content: item["text"], text: item["text"] };
|
|
1286
|
+
}
|
|
1284
1287
|
try {
|
|
1285
1288
|
return JSON.parse(item["text"]);
|
|
1286
1289
|
} catch {
|
|
@@ -1420,17 +1423,16 @@ var TestScheduler = class {
|
|
|
1420
1423
|
return skippedResults;
|
|
1421
1424
|
}
|
|
1422
1425
|
if (parallelism <= 1) {
|
|
1423
|
-
const
|
|
1426
|
+
const executor = new TestExecutor(initialVariables, rateLimiter);
|
|
1424
1427
|
const results2 = [];
|
|
1425
1428
|
for (const test of filteredTests) {
|
|
1426
1429
|
reporter?.onTestStart(test.name);
|
|
1427
|
-
const result = await
|
|
1430
|
+
const result = await executor.execute(test, client);
|
|
1428
1431
|
results2.push(result);
|
|
1429
1432
|
reporter?.onTestComplete(result);
|
|
1430
1433
|
}
|
|
1431
1434
|
return [...results2, ...skippedResults];
|
|
1432
1435
|
}
|
|
1433
|
-
const executor = new TestExecutor(initialVariables, rateLimiter);
|
|
1434
1436
|
let running = 0;
|
|
1435
1437
|
const results = new Array(filteredTests.length);
|
|
1436
1438
|
const waitQueue = [];
|
|
@@ -1455,6 +1457,7 @@ var TestScheduler = class {
|
|
|
1455
1457
|
return (async () => {
|
|
1456
1458
|
await acquire();
|
|
1457
1459
|
try {
|
|
1460
|
+
const executor = new TestExecutor(initialVariables, rateLimiter);
|
|
1458
1461
|
reporter?.onTestStart(test.name);
|
|
1459
1462
|
const result = await executor.execute(test, client);
|
|
1460
1463
|
results[i] = result;
|
|
@@ -2075,7 +2078,9 @@ var SEVERITY_ORDER = ["info", "low", "medium", "high", "critical"];
|
|
|
2075
2078
|
var PASSIVE_RULES = [
|
|
2076
2079
|
"path-traversal",
|
|
2077
2080
|
"input-validation",
|
|
2078
|
-
"information-disclosure"
|
|
2081
|
+
"information-disclosure",
|
|
2082
|
+
"tool-poisoning",
|
|
2083
|
+
"excessive-agency"
|
|
2079
2084
|
];
|
|
2080
2085
|
var ACTIVE_RULES = [
|
|
2081
2086
|
...PASSIVE_RULES,
|
|
@@ -2086,6 +2091,7 @@ var ACTIVE_RULES = [
|
|
|
2086
2091
|
var AGGRESSIVE_RULES = [...ACTIVE_RULES];
|
|
2087
2092
|
var DEFAULT_TIMEOUT = 1e4;
|
|
2088
2093
|
var DEFAULT_MAX_PROBES = 50;
|
|
2094
|
+
var DANGEROUS_TOOL_PATTERNS = /^(delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)[_-]|[_-](delete|drop|remove|destroy|kill|purge|truncate|wipe|reset|erase)$/i;
|
|
2089
2095
|
var ScanConfig = class {
|
|
2090
2096
|
mode;
|
|
2091
2097
|
rules;
|
|
@@ -2093,12 +2099,16 @@ var ScanConfig = class {
|
|
|
2093
2099
|
acknowledgeRisk;
|
|
2094
2100
|
timeout;
|
|
2095
2101
|
maxProbesPerTool;
|
|
2102
|
+
excludeTools;
|
|
2103
|
+
dryRun;
|
|
2096
2104
|
constructor(config = {}) {
|
|
2097
2105
|
this.mode = config.mode ?? "passive";
|
|
2098
2106
|
this.severityThreshold = config.severityThreshold ?? "info";
|
|
2099
2107
|
this.acknowledgeRisk = config.acknowledgeRisk ?? false;
|
|
2100
2108
|
this.timeout = config.timeout ?? DEFAULT_TIMEOUT;
|
|
2101
2109
|
this.maxProbesPerTool = config.maxProbesPerTool ?? DEFAULT_MAX_PROBES;
|
|
2110
|
+
this.excludeTools = config.excludeTools ?? [];
|
|
2111
|
+
this.dryRun = config.dryRun ?? false;
|
|
2102
2112
|
const allRulesForMode = this.getRulesForMode(this.mode);
|
|
2103
2113
|
if (config.rules && config.rules.length > 0) {
|
|
2104
2114
|
this.rules = config.rules.filter((r) => allRulesForMode.includes(r));
|
|
@@ -2114,6 +2124,11 @@ var ScanConfig = class {
|
|
|
2114
2124
|
const severityIdx = SEVERITY_ORDER.indexOf(severity);
|
|
2115
2125
|
return severityIdx >= thresholdIdx;
|
|
2116
2126
|
}
|
|
2127
|
+
isToolExcluded(toolName) {
|
|
2128
|
+
if (this.excludeTools.includes(toolName)) return true;
|
|
2129
|
+
if (this.mode !== "passive" && DANGEROUS_TOOL_PATTERNS.test(toolName)) return true;
|
|
2130
|
+
return false;
|
|
2131
|
+
}
|
|
2117
2132
|
getRulesForMode(mode) {
|
|
2118
2133
|
switch (mode) {
|
|
2119
2134
|
case "passive":
|
|
@@ -2127,7 +2142,7 @@ var ScanConfig = class {
|
|
|
2127
2142
|
};
|
|
2128
2143
|
|
|
2129
2144
|
// src/security/security-scanner.ts
|
|
2130
|
-
import { randomUUID as
|
|
2145
|
+
import { randomUUID as randomUUID11 } from "crypto";
|
|
2131
2146
|
|
|
2132
2147
|
// src/security/rules/path-traversal.ts
|
|
2133
2148
|
import { randomUUID as randomUUID3 } from "crypto";
|
|
@@ -2680,6 +2695,217 @@ var InformationDisclosureRule = class {
|
|
|
2680
2695
|
}
|
|
2681
2696
|
};
|
|
2682
2697
|
|
|
2698
|
+
// src/security/rules/tool-poisoning.ts
|
|
2699
|
+
import { randomUUID as randomUUID9 } from "crypto";
|
|
2700
|
+
var SUSPICIOUS_PATTERNS = [
|
|
2701
|
+
/ignore\s+(all\s+)?previous/i,
|
|
2702
|
+
/do\s+not\s+tell\s+the\s+user/i,
|
|
2703
|
+
/override\s+instructions/i,
|
|
2704
|
+
/disregard\s+(all\s+)?previous/i,
|
|
2705
|
+
/pretend\s+(you\s+are|to\s+be)/i,
|
|
2706
|
+
/forget\s+everything/i,
|
|
2707
|
+
/ignore\s+your\s+(instructions|rules)/i,
|
|
2708
|
+
/you\s+must\s+not\s+(reveal|disclose|mention)/i,
|
|
2709
|
+
/system\s+prompt/i,
|
|
2710
|
+
/act\s+as\s+(if|though)/i
|
|
2711
|
+
];
|
|
2712
|
+
var ZERO_WIDTH_CHARS = /[\u200B-\u200D\uFEFF]/;
|
|
2713
|
+
var BIDI_OVERRIDES = /[\u202A-\u202E\u2066-\u2069]/;
|
|
2714
|
+
var CROSS_TOOL_PATTERNS = [
|
|
2715
|
+
/when\s+using\s+\w+.*also\s+call/i,
|
|
2716
|
+
/before\s+calling\s+\w+.*first\s+run/i,
|
|
2717
|
+
/after\s+(using|calling)\s+\w+.*invoke/i,
|
|
2718
|
+
/always\s+call\s+\w+\s+(before|after|with)/i
|
|
2719
|
+
];
|
|
2720
|
+
var EMBEDDED_CODE_PATTERNS = [
|
|
2721
|
+
/```[\s\S]*?```/,
|
|
2722
|
+
/<script[\s>]/i,
|
|
2723
|
+
/eval\s*\(/,
|
|
2724
|
+
/require\s*\(/,
|
|
2725
|
+
/import\s*\(/
|
|
2726
|
+
];
|
|
2727
|
+
var MAX_DESCRIPTION_LENGTH = 1e3;
|
|
2728
|
+
var ToolPoisoningRule = class {
|
|
2729
|
+
id = "tool-poisoning";
|
|
2730
|
+
name = "Tool Poisoning";
|
|
2731
|
+
description = "Detects manipulation attempts in tool descriptions that could mislead LLMs";
|
|
2732
|
+
async scan(_client, tools, _config) {
|
|
2733
|
+
const findings = [];
|
|
2734
|
+
for (const tool of tools) {
|
|
2735
|
+
const desc = tool.description ?? "";
|
|
2736
|
+
for (const pattern of SUSPICIOUS_PATTERNS) {
|
|
2737
|
+
if (pattern.test(desc)) {
|
|
2738
|
+
findings.push({
|
|
2739
|
+
id: randomUUID9(),
|
|
2740
|
+
rule: this.id,
|
|
2741
|
+
severity: "high",
|
|
2742
|
+
title: `Suspicious instruction in tool "${tool.name}"`,
|
|
2743
|
+
description: `Tool description contains prompt injection pattern: ${pattern.source}`,
|
|
2744
|
+
evidence: desc.slice(0, 200),
|
|
2745
|
+
remediation: "Remove manipulative instructions from tool descriptions"
|
|
2746
|
+
});
|
|
2747
|
+
break;
|
|
2748
|
+
}
|
|
2749
|
+
}
|
|
2750
|
+
if (ZERO_WIDTH_CHARS.test(desc) || BIDI_OVERRIDES.test(desc)) {
|
|
2751
|
+
findings.push({
|
|
2752
|
+
id: randomUUID9(),
|
|
2753
|
+
rule: this.id,
|
|
2754
|
+
severity: "high",
|
|
2755
|
+
title: `Hidden Unicode characters in tool "${tool.name}"`,
|
|
2756
|
+
description: "Tool description contains zero-width or bidirectional override characters that can hide malicious content",
|
|
2757
|
+
evidence: `Description length: ${desc.length} characters`,
|
|
2758
|
+
remediation: "Remove invisible Unicode characters from tool descriptions"
|
|
2759
|
+
});
|
|
2760
|
+
}
|
|
2761
|
+
for (const pattern of CROSS_TOOL_PATTERNS) {
|
|
2762
|
+
if (pattern.test(desc)) {
|
|
2763
|
+
findings.push({
|
|
2764
|
+
id: randomUUID9(),
|
|
2765
|
+
rule: this.id,
|
|
2766
|
+
severity: "medium",
|
|
2767
|
+
title: `Cross-tool reference in tool "${tool.name}"`,
|
|
2768
|
+
description: "Tool description instructs the LLM to call other tools, which could be used to chain unauthorized actions",
|
|
2769
|
+
evidence: desc.slice(0, 200),
|
|
2770
|
+
remediation: "Remove cross-tool instructions from descriptions"
|
|
2771
|
+
});
|
|
2772
|
+
break;
|
|
2773
|
+
}
|
|
2774
|
+
}
|
|
2775
|
+
if (desc.length > MAX_DESCRIPTION_LENGTH) {
|
|
2776
|
+
findings.push({
|
|
2777
|
+
id: randomUUID9(),
|
|
2778
|
+
rule: this.id,
|
|
2779
|
+
severity: "low",
|
|
2780
|
+
title: `Overly long description for tool "${tool.name}"`,
|
|
2781
|
+
description: `Tool description is ${desc.length} characters (threshold: ${MAX_DESCRIPTION_LENGTH}). Long descriptions may hide malicious instructions`,
|
|
2782
|
+
remediation: "Keep tool descriptions concise and focused"
|
|
2783
|
+
});
|
|
2784
|
+
}
|
|
2785
|
+
for (const pattern of EMBEDDED_CODE_PATTERNS) {
|
|
2786
|
+
if (pattern.test(desc)) {
|
|
2787
|
+
findings.push({
|
|
2788
|
+
id: randomUUID9(),
|
|
2789
|
+
rule: this.id,
|
|
2790
|
+
severity: "medium",
|
|
2791
|
+
title: `Embedded code in tool "${tool.name}" description`,
|
|
2792
|
+
description: "Tool description contains code blocks or executable patterns",
|
|
2793
|
+
evidence: desc.slice(0, 200),
|
|
2794
|
+
remediation: "Remove code blocks from tool descriptions"
|
|
2795
|
+
});
|
|
2796
|
+
break;
|
|
2797
|
+
}
|
|
2798
|
+
}
|
|
2799
|
+
}
|
|
2800
|
+
return findings;
|
|
2801
|
+
}
|
|
2802
|
+
};
|
|
2803
|
+
|
|
2804
|
+
// src/security/rules/excessive-agency.ts
|
|
2805
|
+
import { randomUUID as randomUUID10 } from "crypto";
|
|
2806
|
+
var DESTRUCTIVE_TOOL_PATTERN = /delete|drop|destroy|remove|kill|purge|truncate|wipe|reset|erase|shutdown|terminate/i;
|
|
2807
|
+
var CONFIRMATION_PARAMS = ["confirmation", "dryrun", "dry_run", "confirm", "force"];
|
|
2808
|
+
var CODE_EXEC_PARAMS = ["code", "script", "command", "query", "sql", "eval", "shell", "exec", "expression", "cmd"];
|
|
2809
|
+
var ExcessiveAgencyRule = class {
|
|
2810
|
+
id = "excessive-agency";
|
|
2811
|
+
name = "Excessive Agency";
|
|
2812
|
+
description = "Detects tools with overly broad permissions or missing safety controls";
|
|
2813
|
+
async scan(_client, tools, _config) {
|
|
2814
|
+
const findings = [];
|
|
2815
|
+
for (const tool of tools) {
|
|
2816
|
+
if (DESTRUCTIVE_TOOL_PATTERN.test(tool.name)) {
|
|
2817
|
+
const params2 = this.getParamNames(tool);
|
|
2818
|
+
const hasConfirmation = params2.some((p) => CONFIRMATION_PARAMS.includes(p.toLowerCase()));
|
|
2819
|
+
if (!hasConfirmation) {
|
|
2820
|
+
findings.push({
|
|
2821
|
+
id: randomUUID10(),
|
|
2822
|
+
rule: this.id,
|
|
2823
|
+
severity: "medium",
|
|
2824
|
+
title: `Destructive tool "${tool.name}" lacks confirmation parameter`,
|
|
2825
|
+
description: "Tool with destructive capability does not require confirmation, dryRun, or force parameter",
|
|
2826
|
+
remediation: "Add a confirmation, dryRun, or force parameter to destructive tools"
|
|
2827
|
+
});
|
|
2828
|
+
}
|
|
2829
|
+
}
|
|
2830
|
+
const params = this.getParamNames(tool);
|
|
2831
|
+
for (const param of params) {
|
|
2832
|
+
if (CODE_EXEC_PARAMS.includes(param.toLowerCase())) {
|
|
2833
|
+
findings.push({
|
|
2834
|
+
id: randomUUID10(),
|
|
2835
|
+
rule: this.id,
|
|
2836
|
+
severity: "high",
|
|
2837
|
+
title: `Code execution parameter "${param}" in tool "${tool.name}"`,
|
|
2838
|
+
description: "Tool accepts arbitrary code or command input, which could enable unauthorized actions",
|
|
2839
|
+
remediation: "Use specific, constrained parameters instead of generic code/command inputs"
|
|
2840
|
+
});
|
|
2841
|
+
break;
|
|
2842
|
+
}
|
|
2843
|
+
}
|
|
2844
|
+
const schema = tool.inputSchema;
|
|
2845
|
+
if (schema && typeof schema === "object") {
|
|
2846
|
+
const props = schema.properties;
|
|
2847
|
+
const required = schema.required;
|
|
2848
|
+
if ((!props || Object.keys(props).length === 0) && (!required || required.length === 0)) {
|
|
2849
|
+
findings.push({
|
|
2850
|
+
id: randomUUID10(),
|
|
2851
|
+
rule: this.id,
|
|
2852
|
+
severity: "medium",
|
|
2853
|
+
title: `Overly broad schema for tool "${tool.name}"`,
|
|
2854
|
+
description: "Tool schema has no defined properties or required fields, accepting arbitrary input",
|
|
2855
|
+
remediation: "Define explicit input schema with typed properties and required fields"
|
|
2856
|
+
});
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
if (!tool.description || tool.description.trim() === "") {
|
|
2860
|
+
findings.push({
|
|
2861
|
+
id: randomUUID10(),
|
|
2862
|
+
rule: this.id,
|
|
2863
|
+
severity: "low",
|
|
2864
|
+
title: `Missing description for tool "${tool.name}"`,
|
|
2865
|
+
description: "Tool lacks a description, making it difficult to understand its purpose and risks",
|
|
2866
|
+
remediation: "Add a clear, informative description to the tool"
|
|
2867
|
+
});
|
|
2868
|
+
}
|
|
2869
|
+
const paramDescs = this.getParamDescriptions(tool);
|
|
2870
|
+
if (paramDescs.total > 0) {
|
|
2871
|
+
const missingRatio = paramDescs.missing / paramDescs.total;
|
|
2872
|
+
if (missingRatio > 0.5) {
|
|
2873
|
+
findings.push({
|
|
2874
|
+
id: randomUUID10(),
|
|
2875
|
+
rule: this.id,
|
|
2876
|
+
severity: "low",
|
|
2877
|
+
title: `Missing parameter descriptions in tool "${tool.name}"`,
|
|
2878
|
+
description: `${paramDescs.missing} of ${paramDescs.total} parameters lack descriptions`,
|
|
2879
|
+
remediation: "Add descriptions to all parameters to clarify their purpose"
|
|
2880
|
+
});
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
}
|
|
2884
|
+
return findings;
|
|
2885
|
+
}
|
|
2886
|
+
getParamNames(tool) {
|
|
2887
|
+
const schema = tool.inputSchema;
|
|
2888
|
+
if (!schema || typeof schema !== "object") return [];
|
|
2889
|
+
const props = schema.properties;
|
|
2890
|
+
if (!props) return [];
|
|
2891
|
+
return Object.keys(props);
|
|
2892
|
+
}
|
|
2893
|
+
getParamDescriptions(tool) {
|
|
2894
|
+
const schema = tool.inputSchema;
|
|
2895
|
+
if (!schema || typeof schema !== "object") return { total: 0, missing: 0 };
|
|
2896
|
+
const props = schema.properties;
|
|
2897
|
+
if (!props) return { total: 0, missing: 0 };
|
|
2898
|
+
const entries = Object.values(props);
|
|
2899
|
+
let missing = 0;
|
|
2900
|
+
for (const prop of entries) {
|
|
2901
|
+
if (!prop || typeof prop !== "object" || !prop.description) {
|
|
2902
|
+
missing++;
|
|
2903
|
+
}
|
|
2904
|
+
}
|
|
2905
|
+
return { total: entries.length, missing };
|
|
2906
|
+
}
|
|
2907
|
+
};
|
|
2908
|
+
|
|
2683
2909
|
// src/security/security-scanner.ts
|
|
2684
2910
|
var SEVERITY_ORDER2 = ["info", "low", "medium", "high", "critical"];
|
|
2685
2911
|
var SecurityScanner = class {
|
|
@@ -2690,10 +2916,47 @@ var SecurityScanner = class {
|
|
|
2690
2916
|
registerRule(rule) {
|
|
2691
2917
|
this.rules.set(rule.id, rule);
|
|
2692
2918
|
}
|
|
2919
|
+
/**
|
|
2920
|
+
* Preview which tools will be scanned without actually running payloads.
|
|
2921
|
+
*/
|
|
2922
|
+
async dryRun(client, config) {
|
|
2923
|
+
const allTools = await client.listTools();
|
|
2924
|
+
const toolResults = allTools.map((tool) => {
|
|
2925
|
+
if (config.excludeTools.includes(tool.name)) {
|
|
2926
|
+
return { name: tool.name, included: false, reason: "excluded by --exclude-tools" };
|
|
2927
|
+
}
|
|
2928
|
+
if (config.isToolExcluded(tool.name)) {
|
|
2929
|
+
return { name: tool.name, included: false, reason: "auto-skipped (destructive name)" };
|
|
2930
|
+
}
|
|
2931
|
+
return { name: tool.name, included: true };
|
|
2932
|
+
});
|
|
2933
|
+
return {
|
|
2934
|
+
tools: toolResults,
|
|
2935
|
+
rules: [...config.rules],
|
|
2936
|
+
mode: config.mode
|
|
2937
|
+
};
|
|
2938
|
+
}
|
|
2939
|
+
/**
|
|
2940
|
+
* Filter tools based on config exclusions.
|
|
2941
|
+
*/
|
|
2942
|
+
filterTools(tools, config) {
|
|
2943
|
+
return tools.filter((tool) => !config.isToolExcluded(tool.name));
|
|
2944
|
+
}
|
|
2693
2945
|
async scan(client, config, progress) {
|
|
2694
2946
|
const startedAt = /* @__PURE__ */ new Date();
|
|
2695
2947
|
const findings = [];
|
|
2696
|
-
const
|
|
2948
|
+
const allTools = await client.listTools();
|
|
2949
|
+
const tools = this.filterTools(allTools, config);
|
|
2950
|
+
const skippedCount = allTools.length - tools.length;
|
|
2951
|
+
if (skippedCount > 0) {
|
|
2952
|
+
findings.push({
|
|
2953
|
+
id: randomUUID11(),
|
|
2954
|
+
rule: "safety-filter",
|
|
2955
|
+
severity: "info",
|
|
2956
|
+
title: `${skippedCount} tool(s) excluded from scan`,
|
|
2957
|
+
description: `${skippedCount} tool(s) were excluded from scanning due to safety filters or --exclude-tools.`
|
|
2958
|
+
});
|
|
2959
|
+
}
|
|
2697
2960
|
for (const ruleId of config.rules) {
|
|
2698
2961
|
const rule = this.rules.get(ruleId);
|
|
2699
2962
|
if (!rule) continue;
|
|
@@ -2707,7 +2970,7 @@ var SecurityScanner = class {
|
|
|
2707
2970
|
progress?.onRuleComplete?.(rule.id, ruleFindings.length);
|
|
2708
2971
|
} catch (err) {
|
|
2709
2972
|
const errorFinding = {
|
|
2710
|
-
id:
|
|
2973
|
+
id: randomUUID11(),
|
|
2711
2974
|
rule: ruleId,
|
|
2712
2975
|
severity: "info",
|
|
2713
2976
|
title: `Rule "${ruleId}" failed to complete`,
|
|
@@ -2724,7 +2987,7 @@ var SecurityScanner = class {
|
|
|
2724
2987
|
const completedAt = /* @__PURE__ */ new Date();
|
|
2725
2988
|
const serverInfo = client.getServerInfo();
|
|
2726
2989
|
return {
|
|
2727
|
-
id:
|
|
2990
|
+
id: randomUUID11(),
|
|
2728
2991
|
serverName: serverInfo?.name ?? "unknown",
|
|
2729
2992
|
mode: config.mode,
|
|
2730
2993
|
startedAt,
|
|
@@ -2759,6 +3022,8 @@ var SecurityScanner = class {
|
|
|
2759
3022
|
this.registerRule(new AuthBypassRule());
|
|
2760
3023
|
this.registerRule(new InjectionRule());
|
|
2761
3024
|
this.registerRule(new InformationDisclosureRule());
|
|
3025
|
+
this.registerRule(new ToolPoisoningRule());
|
|
3026
|
+
this.registerRule(new ExcessiveAgencyRule());
|
|
2762
3027
|
}
|
|
2763
3028
|
};
|
|
2764
3029
|
|
|
@@ -3121,14 +3386,14 @@ var MCPScoreCalculator = class {
|
|
|
3121
3386
|
progress?.onCategoryStart?.("errorHandling");
|
|
3122
3387
|
const errorHandling = await this.scoreErrorHandling(client, tools);
|
|
3123
3388
|
progress?.onCategoryComplete?.("errorHandling", errorHandling);
|
|
3124
|
-
progress?.onCategoryStart?.("
|
|
3125
|
-
const
|
|
3126
|
-
progress?.onCategoryComplete?.("
|
|
3389
|
+
progress?.onCategoryStart?.("responsiveness");
|
|
3390
|
+
const responsiveness = await this.scoreResponsiveness(client, tools);
|
|
3391
|
+
progress?.onCategoryComplete?.("responsiveness", responsiveness);
|
|
3127
3392
|
progress?.onCategoryStart?.("security");
|
|
3128
3393
|
const security = await this.scoreSecurity(client);
|
|
3129
3394
|
progress?.onCategoryComplete?.("security", security);
|
|
3130
3395
|
const overall = Math.round(
|
|
3131
|
-
documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 +
|
|
3396
|
+
documentation * 0.25 + schemaQuality * 0.25 + errorHandling * 0.2 + responsiveness * 0.15 + security * 0.15
|
|
3132
3397
|
);
|
|
3133
3398
|
return {
|
|
3134
3399
|
overall,
|
|
@@ -3136,7 +3401,7 @@ var MCPScoreCalculator = class {
|
|
|
3136
3401
|
documentation,
|
|
3137
3402
|
schemaQuality,
|
|
3138
3403
|
errorHandling,
|
|
3139
|
-
|
|
3404
|
+
responsiveness,
|
|
3140
3405
|
security
|
|
3141
3406
|
}
|
|
3142
3407
|
};
|
|
@@ -3154,16 +3419,50 @@ var MCPScoreCalculator = class {
|
|
|
3154
3419
|
if (tools.length === 0) return 0;
|
|
3155
3420
|
let totalPoints = 0;
|
|
3156
3421
|
for (const tool of tools) {
|
|
3157
|
-
|
|
3158
|
-
if (!schema) continue;
|
|
3159
|
-
let toolPoints = 0;
|
|
3160
|
-
if (schema.type) toolPoints += 1 / 3;
|
|
3161
|
-
if (schema.properties && typeof schema.properties === "object") toolPoints += 1 / 3;
|
|
3162
|
-
if (schema.required && Array.isArray(schema.required)) toolPoints += 1 / 3;
|
|
3163
|
-
totalPoints += toolPoints;
|
|
3422
|
+
totalPoints += this.scoreToolSchema(tool);
|
|
3164
3423
|
}
|
|
3165
3424
|
return Math.round(totalPoints / tools.length * 100);
|
|
3166
3425
|
}
|
|
3426
|
+
/** Score a single tool's schema from 0.0 to 1.0 across 6 weighted criteria. */
|
|
3427
|
+
scoreToolSchema(tool) {
|
|
3428
|
+
const schema = tool.inputSchema;
|
|
3429
|
+
if (!schema) return 0;
|
|
3430
|
+
let score = 0;
|
|
3431
|
+
const hasType = !!schema.type;
|
|
3432
|
+
const properties = schema.properties;
|
|
3433
|
+
const hasProperties = properties && typeof properties === "object" && Object.keys(properties).length > 0;
|
|
3434
|
+
score += (hasType ? 0.1 : 0) + (hasProperties ? 0.1 : 0);
|
|
3435
|
+
if (!hasProperties || !properties) return score;
|
|
3436
|
+
const propEntries = Object.entries(properties);
|
|
3437
|
+
const withType = propEntries.filter(([, prop]) => !!prop.type).length;
|
|
3438
|
+
score += withType / propEntries.length * 0.2;
|
|
3439
|
+
const withDesc = propEntries.filter(([, prop]) => {
|
|
3440
|
+
const desc = prop.description;
|
|
3441
|
+
return typeof desc === "string" && desc.trim().length > 0;
|
|
3442
|
+
}).length;
|
|
3443
|
+
score += withDesc / propEntries.length * 0.2;
|
|
3444
|
+
const required = schema.required;
|
|
3445
|
+
if (Array.isArray(required) && required.length > 0) {
|
|
3446
|
+
score += 0.15;
|
|
3447
|
+
}
|
|
3448
|
+
const constraintKeys = ["enum", "pattern", "minimum", "maximum", "minLength", "maxLength", "minItems", "maxItems", "format", "default"];
|
|
3449
|
+
const withConstraints = propEntries.filter(([, prop]) => {
|
|
3450
|
+
if (constraintKeys.some((k) => prop[k] !== void 0)) return true;
|
|
3451
|
+
if (prop.type === "object" && prop.properties && typeof prop.properties === "object") {
|
|
3452
|
+
const nested = prop.properties;
|
|
3453
|
+
return Object.keys(nested).length > 0 && Object.values(nested).some((np) => !!np.type);
|
|
3454
|
+
}
|
|
3455
|
+
if (prop.type === "array" && prop.items && typeof prop.items === "object") return true;
|
|
3456
|
+
return false;
|
|
3457
|
+
}).length;
|
|
3458
|
+
score += withConstraints / propEntries.length * 0.15;
|
|
3459
|
+
const names = propEntries.map(([name]) => name);
|
|
3460
|
+
const camelCount = names.filter((n) => /^[a-z][a-zA-Z0-9]*$/.test(n)).length;
|
|
3461
|
+
const snakeCount = names.filter((n) => /^[a-z][a-z0-9_]*$/.test(n)).length;
|
|
3462
|
+
const bestConvention = Math.max(camelCount, snakeCount);
|
|
3463
|
+
score += bestConvention / names.length * 0.1;
|
|
3464
|
+
return score;
|
|
3465
|
+
}
|
|
3167
3466
|
async scoreErrorHandling(client, tools) {
|
|
3168
3467
|
if (tools.length === 0) return 0;
|
|
3169
3468
|
const testTools = tools.slice(0, 5);
|
|
@@ -3172,9 +3471,26 @@ var MCPScoreCalculator = class {
|
|
|
3172
3471
|
try {
|
|
3173
3472
|
const result = await client.callTool(tool.name, {});
|
|
3174
3473
|
if (result.isError) {
|
|
3175
|
-
|
|
3474
|
+
const content = result.content;
|
|
3475
|
+
let isStructured = false;
|
|
3476
|
+
if (Array.isArray(content) && content.length > 0) {
|
|
3477
|
+
isStructured = content.some((c) => {
|
|
3478
|
+
const item = c;
|
|
3479
|
+
const text = item["text"];
|
|
3480
|
+
if (typeof text !== "string") return false;
|
|
3481
|
+
try {
|
|
3482
|
+
const parsed = JSON.parse(text);
|
|
3483
|
+
return typeof parsed === "object" && parsed !== null && ("code" in parsed || "message" in parsed || "error" in parsed);
|
|
3484
|
+
} catch {
|
|
3485
|
+
return false;
|
|
3486
|
+
}
|
|
3487
|
+
});
|
|
3488
|
+
}
|
|
3489
|
+
totalScore += isStructured ? 100 : 80;
|
|
3176
3490
|
} else {
|
|
3177
|
-
|
|
3491
|
+
const schema = tool.inputSchema;
|
|
3492
|
+
const hasRequired = schema && Array.isArray(schema.required) && schema.required.length > 0;
|
|
3493
|
+
totalScore += hasRequired ? 30 : 50;
|
|
3178
3494
|
}
|
|
3179
3495
|
} catch {
|
|
3180
3496
|
totalScore += 0;
|
|
@@ -3182,7 +3498,7 @@ var MCPScoreCalculator = class {
|
|
|
3182
3498
|
}
|
|
3183
3499
|
return Math.round(totalScore / testTools.length);
|
|
3184
3500
|
}
|
|
3185
|
-
async
|
|
3501
|
+
async scoreResponsiveness(client, tools) {
|
|
3186
3502
|
if (tools.length === 0) return 20;
|
|
3187
3503
|
const tool = tools[0];
|
|
3188
3504
|
const latencies = [];
|
|
@@ -3253,6 +3569,145 @@ var BadgeGenerator = class {
|
|
|
3253
3569
|
return "#e05d44";
|
|
3254
3570
|
}
|
|
3255
3571
|
};
|
|
3572
|
+
|
|
3573
|
+
// src/recording/recording-store.ts
|
|
3574
|
+
import { readFileSync as readFileSync2, writeFileSync as writeFileSync3, mkdirSync as mkdirSync3, readdirSync as readdirSync2, existsSync as existsSync2, unlinkSync } from "fs";
|
|
3575
|
+
import { join as join4 } from "path";
|
|
3576
|
+
var RecordingStore = class {
|
|
3577
|
+
basePath;
|
|
3578
|
+
constructor(basePath) {
|
|
3579
|
+
this.basePath = basePath ?? join4(getPlatformInfo().dataDir, "recordings");
|
|
3580
|
+
}
|
|
3581
|
+
save(name, recording) {
|
|
3582
|
+
this.ensureDir();
|
|
3583
|
+
const filePath = this.getFilePath(name);
|
|
3584
|
+
writeFileSync3(filePath, JSON.stringify(recording, null, 2), "utf-8");
|
|
3585
|
+
return filePath;
|
|
3586
|
+
}
|
|
3587
|
+
load(name) {
|
|
3588
|
+
const filePath = this.getFilePath(name);
|
|
3589
|
+
if (!existsSync2(filePath)) return null;
|
|
3590
|
+
return JSON.parse(readFileSync2(filePath, "utf-8"));
|
|
3591
|
+
}
|
|
3592
|
+
list() {
|
|
3593
|
+
this.ensureDir();
|
|
3594
|
+
return readdirSync2(this.basePath).filter((f) => f.endsWith(".json")).map((f) => f.replace(/\.json$/, ""));
|
|
3595
|
+
}
|
|
3596
|
+
delete(name) {
|
|
3597
|
+
const filePath = this.getFilePath(name);
|
|
3598
|
+
if (!existsSync2(filePath)) return false;
|
|
3599
|
+
unlinkSync(filePath);
|
|
3600
|
+
return true;
|
|
3601
|
+
}
|
|
3602
|
+
getFilePath(name) {
|
|
3603
|
+
const safeName = name.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
3604
|
+
return join4(this.basePath, `${safeName}.json`);
|
|
3605
|
+
}
|
|
3606
|
+
ensureDir() {
|
|
3607
|
+
if (!existsSync2(this.basePath)) {
|
|
3608
|
+
mkdirSync3(this.basePath, { recursive: true });
|
|
3609
|
+
}
|
|
3610
|
+
}
|
|
3611
|
+
};
|
|
3612
|
+
|
|
3613
|
+
// src/recording/recording-replayer.ts
|
|
3614
|
+
var RecordingReplayer = class {
|
|
3615
|
+
async replay(recording, client, progress) {
|
|
3616
|
+
const replayedSteps = [];
|
|
3617
|
+
for (let i = 0; i < recording.steps.length; i++) {
|
|
3618
|
+
const step = recording.steps[i];
|
|
3619
|
+
progress?.onStepStart?.(i, step);
|
|
3620
|
+
const start = performance.now();
|
|
3621
|
+
let output = [];
|
|
3622
|
+
let isError = false;
|
|
3623
|
+
try {
|
|
3624
|
+
const result = await client.callTool(step.tool, step.input);
|
|
3625
|
+
output = result.content;
|
|
3626
|
+
isError = result.isError === true;
|
|
3627
|
+
} catch (err) {
|
|
3628
|
+
output = [{ type: "text", text: err instanceof Error ? err.message : String(err) }];
|
|
3629
|
+
isError = true;
|
|
3630
|
+
}
|
|
3631
|
+
const durationMs = Math.round(performance.now() - start);
|
|
3632
|
+
const replayed = {
|
|
3633
|
+
tool: step.tool,
|
|
3634
|
+
input: step.input,
|
|
3635
|
+
output,
|
|
3636
|
+
isError,
|
|
3637
|
+
durationMs
|
|
3638
|
+
};
|
|
3639
|
+
replayedSteps.push(replayed);
|
|
3640
|
+
progress?.onStepComplete?.(i, replayed);
|
|
3641
|
+
}
|
|
3642
|
+
return {
|
|
3643
|
+
originalRecording: recording,
|
|
3644
|
+
replayedSteps,
|
|
3645
|
+
replayedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
3646
|
+
};
|
|
3647
|
+
}
|
|
3648
|
+
};
|
|
3649
|
+
|
|
3650
|
+
// src/recording/recording-differ.ts
|
|
3651
|
+
var RecordingDiffer = class {
|
|
3652
|
+
diff(recording, replayedSteps, replayedAt) {
|
|
3653
|
+
const steps = [];
|
|
3654
|
+
const maxLen = Math.max(recording.steps.length, replayedSteps.length);
|
|
3655
|
+
for (let i = 0; i < maxLen; i++) {
|
|
3656
|
+
const original = recording.steps[i];
|
|
3657
|
+
const replayed = replayedSteps[i];
|
|
3658
|
+
if (original && replayed) {
|
|
3659
|
+
const outputMatch = JSON.stringify(original.output) === JSON.stringify(replayed.output);
|
|
3660
|
+
const errorMatch = (original.isError ?? false) === (replayed.isError ?? false);
|
|
3661
|
+
const isMatched = outputMatch && errorMatch;
|
|
3662
|
+
steps.push({
|
|
3663
|
+
index: i,
|
|
3664
|
+
tool: original.tool,
|
|
3665
|
+
type: isMatched ? "matched" : "changed",
|
|
3666
|
+
original,
|
|
3667
|
+
replayed,
|
|
3668
|
+
outputDiff: isMatched ? void 0 : this.describeChange(original, replayed)
|
|
3669
|
+
});
|
|
3670
|
+
} else if (original && !replayed) {
|
|
3671
|
+
steps.push({
|
|
3672
|
+
index: i,
|
|
3673
|
+
tool: original.tool,
|
|
3674
|
+
type: "removed",
|
|
3675
|
+
original
|
|
3676
|
+
});
|
|
3677
|
+
} else if (!original && replayed) {
|
|
3678
|
+
steps.push({
|
|
3679
|
+
index: i,
|
|
3680
|
+
tool: replayed.tool,
|
|
3681
|
+
type: "added",
|
|
3682
|
+
replayed
|
|
3683
|
+
});
|
|
3684
|
+
}
|
|
3685
|
+
}
|
|
3686
|
+
const summary = {
|
|
3687
|
+
matched: steps.filter((s) => s.type === "matched").length,
|
|
3688
|
+
changed: steps.filter((s) => s.type === "changed").length,
|
|
3689
|
+
added: steps.filter((s) => s.type === "added").length,
|
|
3690
|
+
removed: steps.filter((s) => s.type === "removed").length
|
|
3691
|
+
};
|
|
3692
|
+
return {
|
|
3693
|
+
recordingId: recording.id,
|
|
3694
|
+
recordingName: recording.name,
|
|
3695
|
+
replayedAt,
|
|
3696
|
+
steps,
|
|
3697
|
+
summary
|
|
3698
|
+
};
|
|
3699
|
+
}
|
|
3700
|
+
describeChange(original, replayed) {
|
|
3701
|
+
const parts = [];
|
|
3702
|
+
if ((original.isError ?? false) !== (replayed.isError ?? false)) {
|
|
3703
|
+
parts.push(`error state: ${original.isError ?? false} \u2192 ${replayed.isError ?? false}`);
|
|
3704
|
+
}
|
|
3705
|
+
if (JSON.stringify(original.output) !== JSON.stringify(replayed.output)) {
|
|
3706
|
+
parts.push("output content changed");
|
|
3707
|
+
}
|
|
3708
|
+
return parts.join("; ");
|
|
3709
|
+
}
|
|
3710
|
+
};
|
|
3256
3711
|
export {
|
|
3257
3712
|
AuthBypassRule,
|
|
3258
3713
|
BadgeGenerator,
|
|
@@ -3260,9 +3715,11 @@ export {
|
|
|
3260
3715
|
BenchmarkRunner,
|
|
3261
3716
|
ConnectionManager,
|
|
3262
3717
|
ConsoleReporter,
|
|
3718
|
+
DANGEROUS_TOOL_PATTERNS,
|
|
3263
3719
|
DocGenerator,
|
|
3264
3720
|
ERROR_CODE_MAP,
|
|
3265
3721
|
ERROR_TEMPLATES,
|
|
3722
|
+
ExcessiveAgencyRule,
|
|
3266
3723
|
HtmlDocGenerator,
|
|
3267
3724
|
HtmlReporter,
|
|
3268
3725
|
InformationDisclosureRule,
|
|
@@ -3281,6 +3738,9 @@ export {
|
|
|
3281
3738
|
ProcessRegistry,
|
|
3282
3739
|
Profiler,
|
|
3283
3740
|
RateLimiter,
|
|
3741
|
+
RecordingDiffer,
|
|
3742
|
+
RecordingReplayer,
|
|
3743
|
+
RecordingStore,
|
|
3284
3744
|
ResourceExhaustionRule,
|
|
3285
3745
|
ResultDiffer,
|
|
3286
3746
|
ScanConfig,
|
|
@@ -3290,6 +3750,7 @@ export {
|
|
|
3290
3750
|
TestExecutor,
|
|
3291
3751
|
TestRunner,
|
|
3292
3752
|
TestScheduler,
|
|
3753
|
+
ToolPoisoningRule,
|
|
3293
3754
|
WaterfallGenerator,
|
|
3294
3755
|
YAML_LIMITS,
|
|
3295
3756
|
computeStats,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mcpspec/core",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"expr-eval": "^2.0.2",
|
|
32
32
|
"handlebars": "^4.7.8",
|
|
33
33
|
"zod": "^3.22.0",
|
|
34
|
-
"@mcpspec/shared": "1.0
|
|
34
|
+
"@mcpspec/shared": "1.1.0"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/js-yaml": "^4.0.9",
|