@polygraphso/litmus 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp.js CHANGED
@@ -3,141 +3,61 @@ import {
3
3
  RUN_LITMUS_TOOL_DESCRIPTION,
4
4
  RUN_LITMUS_TOOL_NAME,
5
5
  RUN_LITMUS_TOOL_TITLE,
6
+ RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
7
+ RUN_SKILL_LITMUS_TOOL_NAME,
8
+ RUN_SKILL_LITMUS_TOOL_TITLE,
9
+ VERIFY_SKILL_TOOL_DESCRIPTION,
10
+ VERIFY_SKILL_TOOL_NAME,
11
+ VERIFY_SKILL_TOOL_TITLE,
12
+ VERIFY_TOOL_DESCRIPTION,
13
+ VERIFY_TOOL_NAME,
14
+ VERIFY_TOOL_TITLE,
6
15
  handleRunLitmus,
7
- readAttestation,
16
+ handleRunSkillLitmus,
17
+ handleVerify,
18
+ handleVerifySkill,
8
19
  runLitmusInputShape,
9
- selectedNetwork
10
- } from "./chunk-BPS4YCDL.js";
11
- import "./chunk-VOPISHBU.js";
12
- import "./chunk-35UOPCBW.js";
20
+ runSkillLitmusInputShape,
21
+ verifyInputShape,
22
+ verifySkillInputShape
23
+ } from "./chunk-AVF3GYCS.js";
24
+ import "./chunk-M5HXKZVN.js";
13
25
  import {
14
- parseServerRef,
15
- serverKey
16
- } from "./chunk-ZR6XRGMQ.js";
26
+ judgeFromEnv
27
+ } from "./chunk-DN2OX4RT.js";
28
+ import "./chunk-44R4ZYOE.js";
17
29
 
18
30
  // src/mcp.ts
19
31
  import { realpathSync } from "fs";
20
32
  import { fileURLToPath } from "url";
21
- import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
22
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
23
- import { z as z2 } from "zod";
24
-
25
- // ../mcp/src/index.ts
26
33
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
27
-
28
- // ../mcp/src/tools/verify-attestation.ts
34
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
29
35
  import { z } from "zod";
30
- function canonicalRef(ref) {
31
- try {
32
- return serverKey(parseServerRef(ref)).toLowerCase();
33
- } catch {
34
- return ref.trim().toLowerCase();
35
- }
36
+
37
+ // src/sampling-judge.ts
38
+ function clientSupportsSampling(server) {
39
+ return Boolean(server.server.getClientCapabilities()?.sampling);
36
40
  }
37
- var VERIFY_TOOL_NAME = "verify_attestation";
38
- var VERIFY_TOOL_TITLE = "Verify a server's polygraph attestation";
39
- var VERIFY_TOOL_DESCRIPTION = [
40
- "Read a server's already-published polygraph (litmus) grade \u2014 without running",
41
- "anything \u2014 before an agent trusts or, in agentic commerce, pays it.",
42
- "",
43
- "When a grade is published it returns the behavioral grade (A\u2013F), the attestation",
44
- "UID, the evidence CID, and the graded tool-surface fingerprint. The caller must",
45
- "still recompute the LIVE fingerprint and require it to equal the attested one",
46
- "before paying \u2014 a passing attestation can otherwise front for a tool surface the",
47
- "server no longer serves (rug pull).",
48
- "",
49
- "Grade publishing is still rolling out, so this commonly returns not_available",
50
- "today: that means UNEVALUATED (neither safe nor unsafe), not a failing grade \u2014 to",
51
- "grade the server yourself right now, use `run_litmus`. A `lookup_failed` result",
52
- "means the lookup itself failed (the index or chain was unreachable); the grade is",
53
- "unknown, which is not the same as unevaluated.",
54
- "",
55
- "Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem."
56
- ].join("\n");
57
- var verifyInputShape = {
58
- server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server.")
59
- };
60
- async function handleVerify({ server_ref }) {
61
- const found = await resolveUid(server_ref);
62
- if (found.kind === "error") {
63
- return {
64
- isError: true,
65
- content: [
66
- {
67
- type: "text",
68
- text: `lookup_failed \u2014 could not reach the polygraph grade index for ${server_ref} (${found.detail}). The lookup itself failed, so the grade is unknown \u2014 retry or report it as unchecked, NOT as unevaluated.`
69
- }
70
- ]
71
- };
72
- }
73
- let att = null;
74
- if (found.kind === "found") {
75
- try {
76
- att = await readAttestation(found.uid);
77
- } catch (err) {
78
- return {
79
- isError: true,
80
- content: [
81
- {
82
- type: "text",
83
- text: `lookup_failed \u2014 the onchain read failed for ${server_ref} (${err instanceof Error ? err.message : String(err)}). Treat as unchecked (the chain/RPC was unreachable), not as "no grade".`
84
- }
85
- ]
86
- };
41
+ function samplingJudge(server) {
42
+ return {
43
+ id: "mcp-sampling",
44
+ async complete(system, user) {
45
+ if (!clientSupportsSampling(server)) {
46
+ throw new Error("MCP client does not support sampling");
47
+ }
48
+ const res = await server.server.createMessage({
49
+ systemPrompt: system,
50
+ maxTokens: 1024,
51
+ messages: [{ role: "user", content: { type: "text", text: user } }]
52
+ });
53
+ return res.content.type === "text" ? res.content.text : "";
87
54
  }
88
- }
89
- if (!att) {
90
- return {
91
- content: [
92
- {
93
- type: "text",
94
- text: `not_available \u2014 no published polygraph grade for ${server_ref}. Grade publishing is still rolling out, so this is expected for most servers; it means unevaluated (neither safe nor unsafe), not a failing grade. To grade it now, use run_litmus.`
95
- }
96
- ]
97
- };
98
- }
99
- if (canonicalRef(att.serverRef) !== canonicalRef(server_ref)) {
100
- return {
101
- content: [
102
- {
103
- type: "text",
104
- text: `not_available \u2014 the resolved attestation is for ${att.serverRef}, not ${server_ref} (discovery mismatch; treat as unevaluated)`
105
- }
106
- ]
107
- };
108
- }
109
- const payload = {
110
- status: "attested",
111
- grade: att.overallGrade,
112
- attestationUid: att.uid,
113
- serverRef: att.serverRef,
114
- // The version the grade was run against (null for HTTP/unresolved targets).
115
- // Advisory: the live fingerprint, not this string, is the trust anchor.
116
- resolvedVersion: att.resolvedVersion,
117
- reportCID: att.reportCID,
118
- toolDefsFingerprint: att.toolDefsFingerprint,
119
- revoked: att.revoked,
120
- network: selectedNetwork(),
121
- liveFingerprintCheckRequired: "Recompute the live tool-surface fingerprint and require it to equal toolDefsFingerprint before paying."
122
55
  };
123
- return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
124
- }
125
- async function resolveUid(serverRef) {
126
- const base = process.env.POLYGRAPH_API_URL ?? "https://polygraph.so";
127
- try {
128
- const res = await fetch(`${base}/api/attestations?ref=${encodeURIComponent(serverRef)}`);
129
- if (res.status === 404) return { kind: "none" };
130
- if (!res.ok) return { kind: "error", detail: `grade index returned HTTP ${res.status}` };
131
- const row = await res.json();
132
- return row?.attestation_uid ? { kind: "found", uid: row.attestation_uid } : { kind: "none" };
133
- } catch (err) {
134
- return { kind: "error", detail: err instanceof Error ? err.message : String(err) };
135
- }
136
56
  }
137
57
 
138
58
  // src/mcp.ts
139
59
  function buildServer() {
140
- const server = new McpServer2(
60
+ const server = new McpServer(
141
61
  { name: "polygraph-litmus", version: "0.1.0" },
142
62
  {
143
63
  instructions: [
@@ -155,7 +75,12 @@ function buildServer() {
155
75
  "server, without running anything. Grade publishing is still rolling out, so",
156
76
  "it commonly returns not_available today \u2014 that means unevaluated (neither",
157
77
  "safe nor unsafe), not a failing grade; to grade the server yourself, use",
158
- "`run_litmus`."
78
+ "`run_litmus`.",
79
+ "",
80
+ "Use `run_skill_litmus` to grade a Claude Code / Agent Skill (a SKILL.md +",
81
+ "bundle) A/B/D/F. This is a STATIC read of the skill's text and bundled files \u2014",
82
+ "no execution, no network \u2014 so it is fast but not behavioral proof. Pass",
83
+ "`skill_ref` as a local path to the skill directory."
159
84
  ].join("\n")
160
85
  }
161
86
  );
@@ -179,6 +104,30 @@ function buildServer() {
179
104
  },
180
105
  handleRunLitmus
181
106
  );
107
+ server.registerTool(
108
+ RUN_SKILL_LITMUS_TOOL_NAME,
109
+ {
110
+ title: RUN_SKILL_LITMUS_TOOL_TITLE,
111
+ description: RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
112
+ inputSchema: runSkillLitmusInputShape,
113
+ annotations: {
114
+ title: RUN_SKILL_LITMUS_TOOL_TITLE,
115
+ readOnlyHint: true,
116
+ // never mutates: the safety scan reads files; quality judging is host-mediated
117
+ destructiveHint: false,
118
+ idempotentHint: false,
119
+ // the optional LLM-judged quality axes are non-deterministic
120
+ openWorldHint: true
121
+ // the optional quality judge may use the host model (sampling) or a configured endpoint
122
+ }
123
+ },
124
+ // Resolve the judge per call (the client connection is known now): the host
125
+ // agent's model via sampling if it's offered, else an operator-set env key,
126
+ // else null ⇒ deterministic quality only. The litmus core never needs a key.
127
+ (args) => handleRunSkillLitmus(args, {
128
+ judge: clientSupportsSampling(server) ? samplingJudge(server) : judgeFromEnv()
129
+ })
130
+ );
182
131
  server.registerTool(
183
132
  VERIFY_TOOL_NAME,
184
133
  {
@@ -195,13 +144,30 @@ function buildServer() {
195
144
  },
196
145
  handleVerify
197
146
  );
147
+ server.registerTool(
148
+ VERIFY_SKILL_TOOL_NAME,
149
+ {
150
+ title: VERIFY_SKILL_TOOL_TITLE,
151
+ description: VERIFY_SKILL_TOOL_DESCRIPTION,
152
+ inputSchema: verifySkillInputShape,
153
+ annotations: {
154
+ title: VERIFY_SKILL_TOOL_TITLE,
155
+ readOnlyHint: true,
156
+ destructiveHint: false,
157
+ idempotentHint: true,
158
+ openWorldHint: true
159
+ // reads the grade index + chain
160
+ }
161
+ },
162
+ handleVerifySkill
163
+ );
198
164
  server.registerPrompt(
199
165
  "grade",
200
166
  {
201
167
  title: "Grade an MCP server",
202
168
  description: "Run the open behavioral litmus against an MCP server and report its grade A\u2013F with the evidence.",
203
169
  argsSchema: {
204
- server_ref: z2.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
170
+ server_ref: z.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
205
171
  }
206
172
  },
207
173
  ({ server_ref }) => ({
@@ -222,7 +188,7 @@ function buildServer() {
222
188
  title: "Check a server's published grade",
223
189
  description: "Read a server's already-published polygraph grade without running anything.",
224
190
  argsSchema: {
225
- server_ref: z2.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
191
+ server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
226
192
  }
227
193
  },
228
194
  ({ server_ref }) => ({
@@ -237,6 +203,48 @@ function buildServer() {
237
203
  ]
238
204
  })
239
205
  );
206
+ server.registerPrompt(
207
+ "grade-skill",
208
+ {
209
+ title: "Grade a Claude Code skill",
210
+ description: "Run the open static safety litmus over a skill (SKILL.md + bundle) and report its grade A/B/D/F with the evidence.",
211
+ argsSchema: {
212
+ skill_ref: z.string().min(1).max(1024).describe("Local path to a skill directory containing SKILL.md")
213
+ }
214
+ },
215
+ ({ skill_ref }) => ({
216
+ messages: [
217
+ {
218
+ role: "user",
219
+ content: {
220
+ type: "text",
221
+ text: `Run the polygraph skill litmus on ${skill_ref} using the run_skill_litmus tool. Report the letter grade, the one-line summary, any failed category with its findings, and the contentHash. State plainly that this is a static scan, not behavioral proof.`
222
+ }
223
+ }
224
+ ]
225
+ })
226
+ );
227
+ server.registerPrompt(
228
+ "check-skill",
229
+ {
230
+ title: "Check a skill's published grade",
231
+ description: "Read a skill's already-published polygraph grade without running anything.",
232
+ argsSchema: {
233
+ skill_ref: z.string().min(1).max(1024).describe("Skill identifier, e.g. github/<owner>/<repo>#<path> or marketplace/<owner>/<name>")
234
+ }
235
+ },
236
+ ({ skill_ref }) => ({
237
+ messages: [
238
+ {
239
+ role: "user",
240
+ content: {
241
+ type: "text",
242
+ text: `Use the verify_skill_attestation tool to read the published polygraph grade for ${skill_ref}. If it returns not_available, say the skill is unevaluated (neither safe nor unsafe) and offer to grade a local copy with run_skill_litmus. If a grade is returned, report it and remind the user to recompute the skill's contentHash before installing.`
243
+ }
244
+ }
245
+ ]
246
+ })
247
+ );
240
248
  return server;
241
249
  }
242
250
  async function main() {
@@ -0,0 +1,67 @@
1
+ import {
2
+ SKILL_BUNDLE_SCHEMA_VERSION,
3
+ SKILL_METHODOLOGY_VERSION,
4
+ SKILL_QUALITY_VERSION,
5
+ SkillLoadError,
6
+ assembleBundle,
7
+ canaryMatch,
8
+ classifyTool,
9
+ connectTarget,
10
+ dangerousCommand,
11
+ exfilInstruction,
12
+ fingerprintToolDefs,
13
+ gradeFromCategories,
14
+ gradeSkillCategories,
15
+ hasHighSeverity,
16
+ instructionMimicry,
17
+ internalsLeak,
18
+ invisibleUnicode,
19
+ judgeFromEnv,
20
+ judgeSkillQuality,
21
+ loadSkill,
22
+ markdownTricks,
23
+ openAICompatJudge,
24
+ overBroadTrigger,
25
+ runLitmus,
26
+ runSkillLitmus,
27
+ runSkillQuality,
28
+ runSkillQualityJudged,
29
+ skillInjection,
30
+ skillInjectionFails,
31
+ stateChangingToolNames,
32
+ stripExamples
33
+ } from "./chunk-DN2OX4RT.js";
34
+ import "./chunk-44R4ZYOE.js";
35
+ export {
36
+ SKILL_BUNDLE_SCHEMA_VERSION,
37
+ SKILL_METHODOLOGY_VERSION,
38
+ SKILL_QUALITY_VERSION,
39
+ SkillLoadError,
40
+ assembleBundle,
41
+ canaryMatch,
42
+ classifyTool,
43
+ connectTarget,
44
+ dangerousCommand,
45
+ exfilInstruction,
46
+ fingerprintToolDefs,
47
+ gradeFromCategories,
48
+ gradeSkillCategories,
49
+ hasHighSeverity,
50
+ instructionMimicry,
51
+ internalsLeak,
52
+ invisibleUnicode,
53
+ judgeFromEnv,
54
+ judgeSkillQuality,
55
+ loadSkill,
56
+ markdownTricks,
57
+ openAICompatJudge,
58
+ overBroadTrigger,
59
+ runLitmus,
60
+ runSkillLitmus,
61
+ runSkillQuality,
62
+ runSkillQualityJudged,
63
+ skillInjection,
64
+ skillInjectionFails,
65
+ stateChangingToolNames,
66
+ stripExamples
67
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -40,6 +40,7 @@
40
40
  },
41
41
  "bin": {
42
42
  "polygraphso-litmus": "dist/cli.js",
43
+ "polygraphso-litmus-skill": "dist/cli-skill.js",
43
44
  "polygraphso-litmus-mcp": "dist/mcp.js"
44
45
  },
45
46
  "files": [
@@ -62,11 +63,11 @@
62
63
  "typescript": "^5.9.3",
63
64
  "vitest": "^2.1.0",
64
65
  "@polygraph/core": "0.0.0",
66
+ "@polygraph/probes": "0.0.0",
65
67
  "@polygraph/onchain": "0.0.0",
66
68
  "@polygraph/agent": "0.0.0",
67
- "@polygraph/probes": "0.0.0",
68
- "@polygraph/mcp": "0.0.0",
69
- "@polygraph/cli": "0.0.0"
69
+ "@polygraph/cli": "0.0.0",
70
+ "@polygraph/mcp": "0.0.0"
70
71
  },
71
72
  "publishConfig": {
72
73
  "access": "public"
@@ -1,250 +0,0 @@
1
- import {
2
- parseAuthFlags,
3
- resolveTarget
4
- } from "./chunk-VOPISHBU.js";
5
- import {
6
- runLitmus
7
- } from "./chunk-35UOPCBW.js";
8
- import {
9
- CATEGORY_STATUS_UINT8,
10
- METHODOLOGY_VERSION
11
- } from "./chunk-ZR6XRGMQ.js";
12
-
13
- // ../onchain/src/networks.ts
14
- var NETWORKS = {
15
- "base-sepolia": {
16
- chainId: 84532,
17
- rpc: "https://sepolia.base.org",
18
- eas: "0x4200000000000000000000000000000000000021",
19
- schemaRegistry: "0x4200000000000000000000000000000000000020",
20
- easscan: "https://base-sepolia.easscan.org"
21
- },
22
- base: {
23
- chainId: 8453,
24
- rpc: "https://mainnet.base.org",
25
- eas: "0x4200000000000000000000000000000000000021",
26
- schemaRegistry: "0x4200000000000000000000000000000000000020",
27
- easscan: "https://base.easscan.org"
28
- }
29
- };
30
- function selectedNetwork() {
31
- return process.env.NEXT_PUBLIC_POLYGRAPH_NETWORK === "base" ? "base" : "base-sepolia";
32
- }
33
- function networkConfig(net = selectedNetwork()) {
34
- return NETWORKS[net];
35
- }
36
- function rpcUrl(net = selectedNetwork()) {
37
- const override = net === "base" ? process.env.BASE_MAINNET_RPC_URL : process.env.BASE_SEPOLIA_RPC_URL;
38
- return override && override.length > 0 ? override : NETWORKS[net].rpc;
39
- }
40
-
41
- // ../onchain/src/eas.ts
42
- import { AbiCoder } from "ethers";
43
- var LITMUS_SCHEMA = "string serverRef,bytes32 toolDefsFingerprint,uint8 gradeC01,uint8 gradeC02,uint8 gradeC03,string overallGrade,string reportCID,string methodologyVersion,uint64 ranAt,string resolvedVersion";
44
- var LITMUS_ABI_TYPES = [
45
- "string",
46
- // serverRef
47
- "bytes32",
48
- // toolDefsFingerprint
49
- "uint8",
50
- // gradeC01
51
- "uint8",
52
- // gradeC02
53
- "uint8",
54
- // gradeC03
55
- "string",
56
- // overallGrade
57
- "string",
58
- // reportCID
59
- "string",
60
- // methodologyVersion
61
- "uint64",
62
- // ranAt
63
- "string"
64
- // resolvedVersion
65
- ];
66
- var LITMUS_ABI_NAMES = [
67
- "serverRef",
68
- "toolDefsFingerprint",
69
- "gradeC01",
70
- "gradeC02",
71
- "gradeC03",
72
- "overallGrade",
73
- "reportCID",
74
- "methodologyVersion",
75
- "ranAt",
76
- "resolvedVersion"
77
- ];
78
- function categoryUint8(bundle, code) {
79
- const status = bundle.categories.find((c) => c.code === code)?.status;
80
- return status ? CATEGORY_STATUS_UINT8[status] : CATEGORY_STATUS_UINT8.skipped;
81
- }
82
- function litmusFields(bundle, reportCID) {
83
- return {
84
- serverRef: bundle.serverRef,
85
- toolDefsFingerprint: bundle.toolDefsFingerprint,
86
- gradeC01: categoryUint8(bundle, "C-01"),
87
- gradeC02: categoryUint8(bundle, "C-02"),
88
- gradeC03: categoryUint8(bundle, "C-03"),
89
- overallGrade: bundle.grade,
90
- reportCID,
91
- methodologyVersion: bundle.methodologyVersion || METHODOLOGY_VERSION,
92
- ranAt: BigInt(Math.floor(Date.parse(bundle.ranAt) / 1e3)),
93
- resolvedVersion: bundle.resolvedVersion ?? ""
94
- };
95
- }
96
- function encodeLitmusAttestation(bundle, reportCID) {
97
- const f = litmusFields(bundle, reportCID);
98
- return AbiCoder.defaultAbiCoder().encode(
99
- [...LITMUS_ABI_TYPES],
100
- [
101
- f.serverRef,
102
- f.toolDefsFingerprint,
103
- f.gradeC01,
104
- f.gradeC02,
105
- f.gradeC03,
106
- f.overallGrade,
107
- f.reportCID,
108
- f.methodologyVersion,
109
- f.ranAt,
110
- f.resolvedVersion
111
- ]
112
- );
113
- }
114
- function decodeLitmusAttestation(encoded) {
115
- const values = AbiCoder.defaultAbiCoder().decode([...LITMUS_ABI_TYPES], encoded);
116
- const out = {};
117
- LITMUS_ABI_NAMES.forEach((name, i) => {
118
- out[name] = values[i];
119
- });
120
- return out;
121
- }
122
-
123
- // ../onchain/src/read.ts
124
- import { Contract, JsonRpcProvider, ZeroHash } from "ethers";
125
- var EAS_ABI = [
126
- "function getAttestation(bytes32 uid) view returns ((bytes32 uid, bytes32 schema, uint64 time, uint64 expirationTime, uint64 revocationTime, bytes32 refUID, address recipient, address attester, bool revocable, bytes data))"
127
- ];
128
- function litmusSchemaUID() {
129
- const uid = process.env.NEXT_PUBLIC_EAS_SCHEMA_UID;
130
- if (!uid) throw new Error("NEXT_PUBLIC_EAS_SCHEMA_UID is required \u2014 register the schema first.");
131
- return uid;
132
- }
133
- async function readAttestation(uid) {
134
- const cfg = networkConfig();
135
- const provider = new JsonRpcProvider(rpcUrl(), cfg.chainId);
136
- const eas = new Contract(cfg.eas, EAS_ABI, provider);
137
- const att = await eas.getAttestation(uid);
138
- if (!att || att.uid === ZeroHash) return null;
139
- if (String(att.schema).toLowerCase() !== litmusSchemaUID().toLowerCase()) return null;
140
- const d = decodeLitmusAttestation(att.data);
141
- return {
142
- uid: att.uid,
143
- serverRef: String(d.serverRef),
144
- toolDefsFingerprint: String(d.toolDefsFingerprint),
145
- overallGrade: String(d.overallGrade),
146
- reportCID: String(d.reportCID),
147
- resolvedVersion: d.resolvedVersion || null,
148
- revoked: att.revocationTime > 0n,
149
- attester: String(att.attester),
150
- expirationTime: BigInt(att.expirationTime ?? 0n)
151
- };
152
- }
153
-
154
- // src/tools/run-litmus.ts
155
- import { z } from "zod";
156
- var RUN_LITMUS_TOOL_NAME = "run_litmus";
157
- var RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server";
158
- var RUN_LITMUS_TOOL_DESCRIPTION = [
159
- `Grade an MCP server A\u2013F against the open behavioral litmus (${METHODOLOGY_VERSION}).`,
160
- "The harness connects the way an agent would, fingerprints the tool surface, and",
161
- "runs four checks: C-01 tool-output injection, C-02 permission/egress overreach",
162
- "(egress in a hardened default-deny Docker sandbox, plus a declared-permission",
163
- "honesty check), C-03 sensitive-data handling (planted canaries), and C-04",
164
- "adversarial-input handling (malformed/oversized and jailbreak inputs).",
165
- "",
166
- "This is ACTIVE: it launches the target server's code to exercise it (egress-",
167
- "sandboxed when Docker is available) and takes ~20\u201360s. It is not a lookup \u2014 for",
168
- "a server's already-published grade, use `verify_attestation`. No wallet or RPC",
169
- "needed.",
170
- "",
171
- "server_ref examples: npm/@modelcontextprotocol/server-filesystem \xB7",
172
- "https://example.com/mcp \xB7 ./build/index.js. For a token-gated https:// target,",
173
- "pass `bearer`. If Docker is unavailable, C-02 is skipped and the grade is capped",
174
- "at B for that run."
175
- ].join("\n");
176
- var runLitmusInputShape = {
177
- server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
178
- bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
179
- header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
180
- };
181
- var PROGRESS_TOTAL = 5;
182
- async function handleRunLitmus({ server_ref, bearer, header }, extra) {
183
- try {
184
- const argv = [
185
- ...bearer ? ["--bearer", bearer] : [],
186
- ...(header ?? []).flatMap((h) => ["--header", h])
187
- ];
188
- const { headers } = parseAuthFlags(argv, {});
189
- const progressToken = extra._meta?.progressToken;
190
- const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
191
- method: "notifications/progress",
192
- params: { progressToken, progress, total: PROGRESS_TOTAL, message }
193
- }) : void 0;
194
- sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
195
- const bundle = await runLitmus(resolveTarget(server_ref), {
196
- ...Object.keys(headers).length ? { headers } : {},
197
- ...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
198
- });
199
- const payload = summarize(bundle);
200
- return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
201
- } catch (err) {
202
- const message = err instanceof Error ? err.message : String(err);
203
- return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
204
- }
205
- }
206
- var CATEGORY_LABEL = {
207
- "C-01": "tool-output injection",
208
- "C-02": "permission / egress overreach",
209
- "C-03": "sensitive-data handling",
210
- "C-04": "adversarial-input handling"
211
- };
212
- function summarize(b) {
213
- const find = (code) => b.categories.find((c) => c.code === code);
214
- const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
215
- const c = find(code);
216
- const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
217
- return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
218
- });
219
- return {
220
- grade: b.grade,
221
- summary: b.gradeRationale,
222
- serverRef: b.serverRef,
223
- resolvedVersion: b.resolvedVersion,
224
- fingerprint: b.toolDefsFingerprint,
225
- ranAt: b.ranAt,
226
- methodologyVersion: b.methodologyVersion,
227
- categories
228
- };
229
- }
230
- function truncate(s, n) {
231
- return s.length > n ? `${s.slice(0, n)}\u2026` : s;
232
- }
233
-
234
- export {
235
- NETWORKS,
236
- selectedNetwork,
237
- networkConfig,
238
- rpcUrl,
239
- LITMUS_SCHEMA,
240
- litmusFields,
241
- encodeLitmusAttestation,
242
- decodeLitmusAttestation,
243
- litmusSchemaUID,
244
- readAttestation,
245
- RUN_LITMUS_TOOL_NAME,
246
- RUN_LITMUS_TOOL_TITLE,
247
- RUN_LITMUS_TOOL_DESCRIPTION,
248
- runLitmusInputShape,
249
- handleRunLitmus
250
- };
@@ -1,31 +0,0 @@
1
- import {
2
- assembleBundle,
3
- canaryMatch,
4
- classifyTool,
5
- connectTarget,
6
- fingerprintToolDefs,
7
- gradeFromCategories,
8
- hasHighSeverity,
9
- instructionMimicry,
10
- internalsLeak,
11
- invisibleUnicode,
12
- markdownTricks,
13
- runLitmus,
14
- stateChangingToolNames
15
- } from "./chunk-35UOPCBW.js";
16
- import "./chunk-ZR6XRGMQ.js";
17
- export {
18
- assembleBundle,
19
- canaryMatch,
20
- classifyTool,
21
- connectTarget,
22
- fingerprintToolDefs,
23
- gradeFromCategories,
24
- hasHighSeverity,
25
- instructionMimicry,
26
- internalsLeak,
27
- invisibleUnicode,
28
- markdownTricks,
29
- runLitmus,
30
- stateChangingToolNames
31
- };