@polygraphso/litmus 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -0
- package/dist/{chunk-ZR6XRGMQ.js → chunk-44R4ZYOE.js} +67 -0
- package/dist/chunk-AVF3GYCS.js +692 -0
- package/dist/{chunk-35UOPCBW.js → chunk-DN2OX4RT.js} +456 -2
- package/dist/{chunk-VOPISHBU.js → chunk-M5HXKZVN.js} +2 -2
- package/dist/cli-skill.d.ts +1 -0
- package/dist/cli-skill.js +98 -0
- package/dist/cli.js +2 -2
- package/dist/index.d.ts +437 -2
- package/dist/index.js +86 -8
- package/dist/mcp.js +130 -122
- package/dist/src-TG44QXFV.js +67 -0
- package/package.json +5 -4
- package/dist/chunk-BPS4YCDL.js +0 -250
- package/dist/src-RSTPCEYU.js +0 -31
package/dist/mcp.js
CHANGED
|
@@ -3,141 +3,61 @@ import {
|
|
|
3
3
|
RUN_LITMUS_TOOL_DESCRIPTION,
|
|
4
4
|
RUN_LITMUS_TOOL_NAME,
|
|
5
5
|
RUN_LITMUS_TOOL_TITLE,
|
|
6
|
+
RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
|
|
7
|
+
RUN_SKILL_LITMUS_TOOL_NAME,
|
|
8
|
+
RUN_SKILL_LITMUS_TOOL_TITLE,
|
|
9
|
+
VERIFY_SKILL_TOOL_DESCRIPTION,
|
|
10
|
+
VERIFY_SKILL_TOOL_NAME,
|
|
11
|
+
VERIFY_SKILL_TOOL_TITLE,
|
|
12
|
+
VERIFY_TOOL_DESCRIPTION,
|
|
13
|
+
VERIFY_TOOL_NAME,
|
|
14
|
+
VERIFY_TOOL_TITLE,
|
|
6
15
|
handleRunLitmus,
|
|
7
|
-
|
|
16
|
+
handleRunSkillLitmus,
|
|
17
|
+
handleVerify,
|
|
18
|
+
handleVerifySkill,
|
|
8
19
|
runLitmusInputShape,
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
20
|
+
runSkillLitmusInputShape,
|
|
21
|
+
verifyInputShape,
|
|
22
|
+
verifySkillInputShape
|
|
23
|
+
} from "./chunk-AVF3GYCS.js";
|
|
24
|
+
import "./chunk-M5HXKZVN.js";
|
|
13
25
|
import {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
26
|
+
judgeFromEnv
|
|
27
|
+
} from "./chunk-DN2OX4RT.js";
|
|
28
|
+
import "./chunk-44R4ZYOE.js";
|
|
17
29
|
|
|
18
30
|
// src/mcp.ts
|
|
19
31
|
import { realpathSync } from "fs";
|
|
20
32
|
import { fileURLToPath } from "url";
|
|
21
|
-
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
22
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
23
|
-
import { z as z2 } from "zod";
|
|
24
|
-
|
|
25
|
-
// ../mcp/src/index.ts
|
|
26
33
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
27
|
-
|
|
28
|
-
// ../mcp/src/tools/verify-attestation.ts
|
|
34
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
29
35
|
import { z } from "zod";
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return ref.trim().toLowerCase();
|
|
35
|
-
}
|
|
36
|
+
|
|
37
|
+
// src/sampling-judge.ts
|
|
38
|
+
function clientSupportsSampling(server) {
|
|
39
|
+
return Boolean(server.server.getClientCapabilities()?.sampling);
|
|
36
40
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
"today: that means UNEVALUATED (neither safe nor unsafe), not a failing grade \u2014 to",
|
|
51
|
-
"grade the server yourself right now, use `run_litmus`. A `lookup_failed` result",
|
|
52
|
-
"means the lookup itself failed (the index or chain was unreachable); the grade is",
|
|
53
|
-
"unknown, which is not the same as unevaluated.",
|
|
54
|
-
"",
|
|
55
|
-
"Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem."
|
|
56
|
-
].join("\n");
|
|
57
|
-
var verifyInputShape = {
|
|
58
|
-
server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server.")
|
|
59
|
-
};
|
|
60
|
-
async function handleVerify({ server_ref }) {
|
|
61
|
-
const found = await resolveUid(server_ref);
|
|
62
|
-
if (found.kind === "error") {
|
|
63
|
-
return {
|
|
64
|
-
isError: true,
|
|
65
|
-
content: [
|
|
66
|
-
{
|
|
67
|
-
type: "text",
|
|
68
|
-
text: `lookup_failed \u2014 could not reach the polygraph grade index for ${server_ref} (${found.detail}). The lookup itself failed, so the grade is unknown \u2014 retry or report it as unchecked, NOT as unevaluated.`
|
|
69
|
-
}
|
|
70
|
-
]
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
let att = null;
|
|
74
|
-
if (found.kind === "found") {
|
|
75
|
-
try {
|
|
76
|
-
att = await readAttestation(found.uid);
|
|
77
|
-
} catch (err) {
|
|
78
|
-
return {
|
|
79
|
-
isError: true,
|
|
80
|
-
content: [
|
|
81
|
-
{
|
|
82
|
-
type: "text",
|
|
83
|
-
text: `lookup_failed \u2014 the onchain read failed for ${server_ref} (${err instanceof Error ? err.message : String(err)}). Treat as unchecked (the chain/RPC was unreachable), not as "no grade".`
|
|
84
|
-
}
|
|
85
|
-
]
|
|
86
|
-
};
|
|
41
|
+
function samplingJudge(server) {
|
|
42
|
+
return {
|
|
43
|
+
id: "mcp-sampling",
|
|
44
|
+
async complete(system, user) {
|
|
45
|
+
if (!clientSupportsSampling(server)) {
|
|
46
|
+
throw new Error("MCP client does not support sampling");
|
|
47
|
+
}
|
|
48
|
+
const res = await server.server.createMessage({
|
|
49
|
+
systemPrompt: system,
|
|
50
|
+
maxTokens: 1024,
|
|
51
|
+
messages: [{ role: "user", content: { type: "text", text: user } }]
|
|
52
|
+
});
|
|
53
|
+
return res.content.type === "text" ? res.content.text : "";
|
|
87
54
|
}
|
|
88
|
-
}
|
|
89
|
-
if (!att) {
|
|
90
|
-
return {
|
|
91
|
-
content: [
|
|
92
|
-
{
|
|
93
|
-
type: "text",
|
|
94
|
-
text: `not_available \u2014 no published polygraph grade for ${server_ref}. Grade publishing is still rolling out, so this is expected for most servers; it means unevaluated (neither safe nor unsafe), not a failing grade. To grade it now, use run_litmus.`
|
|
95
|
-
}
|
|
96
|
-
]
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
if (canonicalRef(att.serverRef) !== canonicalRef(server_ref)) {
|
|
100
|
-
return {
|
|
101
|
-
content: [
|
|
102
|
-
{
|
|
103
|
-
type: "text",
|
|
104
|
-
text: `not_available \u2014 the resolved attestation is for ${att.serverRef}, not ${server_ref} (discovery mismatch; treat as unevaluated)`
|
|
105
|
-
}
|
|
106
|
-
]
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
const payload = {
|
|
110
|
-
status: "attested",
|
|
111
|
-
grade: att.overallGrade,
|
|
112
|
-
attestationUid: att.uid,
|
|
113
|
-
serverRef: att.serverRef,
|
|
114
|
-
// The version the grade was run against (null for HTTP/unresolved targets).
|
|
115
|
-
// Advisory: the live fingerprint, not this string, is the trust anchor.
|
|
116
|
-
resolvedVersion: att.resolvedVersion,
|
|
117
|
-
reportCID: att.reportCID,
|
|
118
|
-
toolDefsFingerprint: att.toolDefsFingerprint,
|
|
119
|
-
revoked: att.revoked,
|
|
120
|
-
network: selectedNetwork(),
|
|
121
|
-
liveFingerprintCheckRequired: "Recompute the live tool-surface fingerprint and require it to equal toolDefsFingerprint before paying."
|
|
122
55
|
};
|
|
123
|
-
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
|
|
124
|
-
}
|
|
125
|
-
async function resolveUid(serverRef) {
|
|
126
|
-
const base = process.env.POLYGRAPH_API_URL ?? "https://polygraph.so";
|
|
127
|
-
try {
|
|
128
|
-
const res = await fetch(`${base}/api/attestations?ref=${encodeURIComponent(serverRef)}`);
|
|
129
|
-
if (res.status === 404) return { kind: "none" };
|
|
130
|
-
if (!res.ok) return { kind: "error", detail: `grade index returned HTTP ${res.status}` };
|
|
131
|
-
const row = await res.json();
|
|
132
|
-
return row?.attestation_uid ? { kind: "found", uid: row.attestation_uid } : { kind: "none" };
|
|
133
|
-
} catch (err) {
|
|
134
|
-
return { kind: "error", detail: err instanceof Error ? err.message : String(err) };
|
|
135
|
-
}
|
|
136
56
|
}
|
|
137
57
|
|
|
138
58
|
// src/mcp.ts
|
|
139
59
|
function buildServer() {
|
|
140
|
-
const server = new
|
|
60
|
+
const server = new McpServer(
|
|
141
61
|
{ name: "polygraph-litmus", version: "0.1.0" },
|
|
142
62
|
{
|
|
143
63
|
instructions: [
|
|
@@ -155,7 +75,12 @@ function buildServer() {
|
|
|
155
75
|
"server, without running anything. Grade publishing is still rolling out, so",
|
|
156
76
|
"it commonly returns not_available today \u2014 that means unevaluated (neither",
|
|
157
77
|
"safe nor unsafe), not a failing grade; to grade the server yourself, use",
|
|
158
|
-
"`run_litmus`."
|
|
78
|
+
"`run_litmus`.",
|
|
79
|
+
"",
|
|
80
|
+
"Use `run_skill_litmus` to grade a Claude Code / Agent Skill (a SKILL.md +",
|
|
81
|
+
"bundle) A/B/D/F. This is a STATIC read of the skill's text and bundled files \u2014",
|
|
82
|
+
"no execution, no network \u2014 so it is fast but not behavioral proof. Pass",
|
|
83
|
+
"`skill_ref` as a local path to the skill directory."
|
|
159
84
|
].join("\n")
|
|
160
85
|
}
|
|
161
86
|
);
|
|
@@ -179,6 +104,30 @@ function buildServer() {
|
|
|
179
104
|
},
|
|
180
105
|
handleRunLitmus
|
|
181
106
|
);
|
|
107
|
+
server.registerTool(
|
|
108
|
+
RUN_SKILL_LITMUS_TOOL_NAME,
|
|
109
|
+
{
|
|
110
|
+
title: RUN_SKILL_LITMUS_TOOL_TITLE,
|
|
111
|
+
description: RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
|
|
112
|
+
inputSchema: runSkillLitmusInputShape,
|
|
113
|
+
annotations: {
|
|
114
|
+
title: RUN_SKILL_LITMUS_TOOL_TITLE,
|
|
115
|
+
readOnlyHint: true,
|
|
116
|
+
// never mutates: the safety scan reads files; quality judging is host-mediated
|
|
117
|
+
destructiveHint: false,
|
|
118
|
+
idempotentHint: false,
|
|
119
|
+
// the optional LLM-judged quality axes are non-deterministic
|
|
120
|
+
openWorldHint: true
|
|
121
|
+
// the optional quality judge may use the host model (sampling) or a configured endpoint
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
// Resolve the judge per call (the client connection is known now): the host
|
|
125
|
+
// agent's model via sampling if it's offered, else an operator-set env key,
|
|
126
|
+
// else null ⇒ deterministic quality only. The litmus core never needs a key.
|
|
127
|
+
(args) => handleRunSkillLitmus(args, {
|
|
128
|
+
judge: clientSupportsSampling(server) ? samplingJudge(server) : judgeFromEnv()
|
|
129
|
+
})
|
|
130
|
+
);
|
|
182
131
|
server.registerTool(
|
|
183
132
|
VERIFY_TOOL_NAME,
|
|
184
133
|
{
|
|
@@ -195,13 +144,30 @@ function buildServer() {
|
|
|
195
144
|
},
|
|
196
145
|
handleVerify
|
|
197
146
|
);
|
|
147
|
+
server.registerTool(
|
|
148
|
+
VERIFY_SKILL_TOOL_NAME,
|
|
149
|
+
{
|
|
150
|
+
title: VERIFY_SKILL_TOOL_TITLE,
|
|
151
|
+
description: VERIFY_SKILL_TOOL_DESCRIPTION,
|
|
152
|
+
inputSchema: verifySkillInputShape,
|
|
153
|
+
annotations: {
|
|
154
|
+
title: VERIFY_SKILL_TOOL_TITLE,
|
|
155
|
+
readOnlyHint: true,
|
|
156
|
+
destructiveHint: false,
|
|
157
|
+
idempotentHint: true,
|
|
158
|
+
openWorldHint: true
|
|
159
|
+
// reads the grade index + chain
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
handleVerifySkill
|
|
163
|
+
);
|
|
198
164
|
server.registerPrompt(
|
|
199
165
|
"grade",
|
|
200
166
|
{
|
|
201
167
|
title: "Grade an MCP server",
|
|
202
168
|
description: "Run the open behavioral litmus against an MCP server and report its grade A\u2013F with the evidence.",
|
|
203
169
|
argsSchema: {
|
|
204
|
-
server_ref:
|
|
170
|
+
server_ref: z.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
|
|
205
171
|
}
|
|
206
172
|
},
|
|
207
173
|
({ server_ref }) => ({
|
|
@@ -222,7 +188,7 @@ function buildServer() {
|
|
|
222
188
|
title: "Check a server's published grade",
|
|
223
189
|
description: "Read a server's already-published polygraph grade without running anything.",
|
|
224
190
|
argsSchema: {
|
|
225
|
-
server_ref:
|
|
191
|
+
server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
|
|
226
192
|
}
|
|
227
193
|
},
|
|
228
194
|
({ server_ref }) => ({
|
|
@@ -237,6 +203,48 @@ function buildServer() {
|
|
|
237
203
|
]
|
|
238
204
|
})
|
|
239
205
|
);
|
|
206
|
+
server.registerPrompt(
|
|
207
|
+
"grade-skill",
|
|
208
|
+
{
|
|
209
|
+
title: "Grade a Claude Code skill",
|
|
210
|
+
description: "Run the open static safety litmus over a skill (SKILL.md + bundle) and report its grade A/B/D/F with the evidence.",
|
|
211
|
+
argsSchema: {
|
|
212
|
+
skill_ref: z.string().min(1).max(1024).describe("Local path to a skill directory containing SKILL.md")
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
({ skill_ref }) => ({
|
|
216
|
+
messages: [
|
|
217
|
+
{
|
|
218
|
+
role: "user",
|
|
219
|
+
content: {
|
|
220
|
+
type: "text",
|
|
221
|
+
text: `Run the polygraph skill litmus on ${skill_ref} using the run_skill_litmus tool. Report the letter grade, the one-line summary, any failed category with its findings, and the contentHash. State plainly that this is a static scan, not behavioral proof.`
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
]
|
|
225
|
+
})
|
|
226
|
+
);
|
|
227
|
+
server.registerPrompt(
|
|
228
|
+
"check-skill",
|
|
229
|
+
{
|
|
230
|
+
title: "Check a skill's published grade",
|
|
231
|
+
description: "Read a skill's already-published polygraph grade without running anything.",
|
|
232
|
+
argsSchema: {
|
|
233
|
+
skill_ref: z.string().min(1).max(1024).describe("Skill identifier, e.g. github/<owner>/<repo>#<path> or marketplace/<owner>/<name>")
|
|
234
|
+
}
|
|
235
|
+
},
|
|
236
|
+
({ skill_ref }) => ({
|
|
237
|
+
messages: [
|
|
238
|
+
{
|
|
239
|
+
role: "user",
|
|
240
|
+
content: {
|
|
241
|
+
type: "text",
|
|
242
|
+
text: `Use the verify_skill_attestation tool to read the published polygraph grade for ${skill_ref}. If it returns not_available, say the skill is unevaluated (neither safe nor unsafe) and offer to grade a local copy with run_skill_litmus. If a grade is returned, report it and remind the user to recompute the skill's contentHash before installing.`
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
]
|
|
246
|
+
})
|
|
247
|
+
);
|
|
240
248
|
return server;
|
|
241
249
|
}
|
|
242
250
|
async function main() {
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import {
|
|
2
|
+
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
3
|
+
SKILL_METHODOLOGY_VERSION,
|
|
4
|
+
SKILL_QUALITY_VERSION,
|
|
5
|
+
SkillLoadError,
|
|
6
|
+
assembleBundle,
|
|
7
|
+
canaryMatch,
|
|
8
|
+
classifyTool,
|
|
9
|
+
connectTarget,
|
|
10
|
+
dangerousCommand,
|
|
11
|
+
exfilInstruction,
|
|
12
|
+
fingerprintToolDefs,
|
|
13
|
+
gradeFromCategories,
|
|
14
|
+
gradeSkillCategories,
|
|
15
|
+
hasHighSeverity,
|
|
16
|
+
instructionMimicry,
|
|
17
|
+
internalsLeak,
|
|
18
|
+
invisibleUnicode,
|
|
19
|
+
judgeFromEnv,
|
|
20
|
+
judgeSkillQuality,
|
|
21
|
+
loadSkill,
|
|
22
|
+
markdownTricks,
|
|
23
|
+
openAICompatJudge,
|
|
24
|
+
overBroadTrigger,
|
|
25
|
+
runLitmus,
|
|
26
|
+
runSkillLitmus,
|
|
27
|
+
runSkillQuality,
|
|
28
|
+
runSkillQualityJudged,
|
|
29
|
+
skillInjection,
|
|
30
|
+
skillInjectionFails,
|
|
31
|
+
stateChangingToolNames,
|
|
32
|
+
stripExamples
|
|
33
|
+
} from "./chunk-DN2OX4RT.js";
|
|
34
|
+
import "./chunk-44R4ZYOE.js";
|
|
35
|
+
export {
|
|
36
|
+
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
37
|
+
SKILL_METHODOLOGY_VERSION,
|
|
38
|
+
SKILL_QUALITY_VERSION,
|
|
39
|
+
SkillLoadError,
|
|
40
|
+
assembleBundle,
|
|
41
|
+
canaryMatch,
|
|
42
|
+
classifyTool,
|
|
43
|
+
connectTarget,
|
|
44
|
+
dangerousCommand,
|
|
45
|
+
exfilInstruction,
|
|
46
|
+
fingerprintToolDefs,
|
|
47
|
+
gradeFromCategories,
|
|
48
|
+
gradeSkillCategories,
|
|
49
|
+
hasHighSeverity,
|
|
50
|
+
instructionMimicry,
|
|
51
|
+
internalsLeak,
|
|
52
|
+
invisibleUnicode,
|
|
53
|
+
judgeFromEnv,
|
|
54
|
+
judgeSkillQuality,
|
|
55
|
+
loadSkill,
|
|
56
|
+
markdownTricks,
|
|
57
|
+
openAICompatJudge,
|
|
58
|
+
overBroadTrigger,
|
|
59
|
+
runLitmus,
|
|
60
|
+
runSkillLitmus,
|
|
61
|
+
runSkillQuality,
|
|
62
|
+
runSkillQualityJudged,
|
|
63
|
+
skillInjection,
|
|
64
|
+
skillInjectionFails,
|
|
65
|
+
stateChangingToolNames,
|
|
66
|
+
stripExamples
|
|
67
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
},
|
|
41
41
|
"bin": {
|
|
42
42
|
"polygraphso-litmus": "dist/cli.js",
|
|
43
|
+
"polygraphso-litmus-skill": "dist/cli-skill.js",
|
|
43
44
|
"polygraphso-litmus-mcp": "dist/mcp.js"
|
|
44
45
|
},
|
|
45
46
|
"files": [
|
|
@@ -62,11 +63,11 @@
|
|
|
62
63
|
"typescript": "^5.9.3",
|
|
63
64
|
"vitest": "^2.1.0",
|
|
64
65
|
"@polygraph/core": "0.0.0",
|
|
66
|
+
"@polygraph/probes": "0.0.0",
|
|
65
67
|
"@polygraph/onchain": "0.0.0",
|
|
66
68
|
"@polygraph/agent": "0.0.0",
|
|
67
|
-
"@polygraph/
|
|
68
|
-
"@polygraph/mcp": "0.0.0"
|
|
69
|
-
"@polygraph/cli": "0.0.0"
|
|
69
|
+
"@polygraph/cli": "0.0.0",
|
|
70
|
+
"@polygraph/mcp": "0.0.0"
|
|
70
71
|
},
|
|
71
72
|
"publishConfig": {
|
|
72
73
|
"access": "public"
|
package/dist/chunk-BPS4YCDL.js
DELETED
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
parseAuthFlags,
|
|
3
|
-
resolveTarget
|
|
4
|
-
} from "./chunk-VOPISHBU.js";
|
|
5
|
-
import {
|
|
6
|
-
runLitmus
|
|
7
|
-
} from "./chunk-35UOPCBW.js";
|
|
8
|
-
import {
|
|
9
|
-
CATEGORY_STATUS_UINT8,
|
|
10
|
-
METHODOLOGY_VERSION
|
|
11
|
-
} from "./chunk-ZR6XRGMQ.js";
|
|
12
|
-
|
|
13
|
-
// ../onchain/src/networks.ts
|
|
14
|
-
var NETWORKS = {
|
|
15
|
-
"base-sepolia": {
|
|
16
|
-
chainId: 84532,
|
|
17
|
-
rpc: "https://sepolia.base.org",
|
|
18
|
-
eas: "0x4200000000000000000000000000000000000021",
|
|
19
|
-
schemaRegistry: "0x4200000000000000000000000000000000000020",
|
|
20
|
-
easscan: "https://base-sepolia.easscan.org"
|
|
21
|
-
},
|
|
22
|
-
base: {
|
|
23
|
-
chainId: 8453,
|
|
24
|
-
rpc: "https://mainnet.base.org",
|
|
25
|
-
eas: "0x4200000000000000000000000000000000000021",
|
|
26
|
-
schemaRegistry: "0x4200000000000000000000000000000000000020",
|
|
27
|
-
easscan: "https://base.easscan.org"
|
|
28
|
-
}
|
|
29
|
-
};
|
|
30
|
-
function selectedNetwork() {
|
|
31
|
-
return process.env.NEXT_PUBLIC_POLYGRAPH_NETWORK === "base" ? "base" : "base-sepolia";
|
|
32
|
-
}
|
|
33
|
-
function networkConfig(net = selectedNetwork()) {
|
|
34
|
-
return NETWORKS[net];
|
|
35
|
-
}
|
|
36
|
-
function rpcUrl(net = selectedNetwork()) {
|
|
37
|
-
const override = net === "base" ? process.env.BASE_MAINNET_RPC_URL : process.env.BASE_SEPOLIA_RPC_URL;
|
|
38
|
-
return override && override.length > 0 ? override : NETWORKS[net].rpc;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
// ../onchain/src/eas.ts
|
|
42
|
-
import { AbiCoder } from "ethers";
|
|
43
|
-
var LITMUS_SCHEMA = "string serverRef,bytes32 toolDefsFingerprint,uint8 gradeC01,uint8 gradeC02,uint8 gradeC03,string overallGrade,string reportCID,string methodologyVersion,uint64 ranAt,string resolvedVersion";
|
|
44
|
-
var LITMUS_ABI_TYPES = [
|
|
45
|
-
"string",
|
|
46
|
-
// serverRef
|
|
47
|
-
"bytes32",
|
|
48
|
-
// toolDefsFingerprint
|
|
49
|
-
"uint8",
|
|
50
|
-
// gradeC01
|
|
51
|
-
"uint8",
|
|
52
|
-
// gradeC02
|
|
53
|
-
"uint8",
|
|
54
|
-
// gradeC03
|
|
55
|
-
"string",
|
|
56
|
-
// overallGrade
|
|
57
|
-
"string",
|
|
58
|
-
// reportCID
|
|
59
|
-
"string",
|
|
60
|
-
// methodologyVersion
|
|
61
|
-
"uint64",
|
|
62
|
-
// ranAt
|
|
63
|
-
"string"
|
|
64
|
-
// resolvedVersion
|
|
65
|
-
];
|
|
66
|
-
var LITMUS_ABI_NAMES = [
|
|
67
|
-
"serverRef",
|
|
68
|
-
"toolDefsFingerprint",
|
|
69
|
-
"gradeC01",
|
|
70
|
-
"gradeC02",
|
|
71
|
-
"gradeC03",
|
|
72
|
-
"overallGrade",
|
|
73
|
-
"reportCID",
|
|
74
|
-
"methodologyVersion",
|
|
75
|
-
"ranAt",
|
|
76
|
-
"resolvedVersion"
|
|
77
|
-
];
|
|
78
|
-
function categoryUint8(bundle, code) {
|
|
79
|
-
const status = bundle.categories.find((c) => c.code === code)?.status;
|
|
80
|
-
return status ? CATEGORY_STATUS_UINT8[status] : CATEGORY_STATUS_UINT8.skipped;
|
|
81
|
-
}
|
|
82
|
-
function litmusFields(bundle, reportCID) {
|
|
83
|
-
return {
|
|
84
|
-
serverRef: bundle.serverRef,
|
|
85
|
-
toolDefsFingerprint: bundle.toolDefsFingerprint,
|
|
86
|
-
gradeC01: categoryUint8(bundle, "C-01"),
|
|
87
|
-
gradeC02: categoryUint8(bundle, "C-02"),
|
|
88
|
-
gradeC03: categoryUint8(bundle, "C-03"),
|
|
89
|
-
overallGrade: bundle.grade,
|
|
90
|
-
reportCID,
|
|
91
|
-
methodologyVersion: bundle.methodologyVersion || METHODOLOGY_VERSION,
|
|
92
|
-
ranAt: BigInt(Math.floor(Date.parse(bundle.ranAt) / 1e3)),
|
|
93
|
-
resolvedVersion: bundle.resolvedVersion ?? ""
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
function encodeLitmusAttestation(bundle, reportCID) {
|
|
97
|
-
const f = litmusFields(bundle, reportCID);
|
|
98
|
-
return AbiCoder.defaultAbiCoder().encode(
|
|
99
|
-
[...LITMUS_ABI_TYPES],
|
|
100
|
-
[
|
|
101
|
-
f.serverRef,
|
|
102
|
-
f.toolDefsFingerprint,
|
|
103
|
-
f.gradeC01,
|
|
104
|
-
f.gradeC02,
|
|
105
|
-
f.gradeC03,
|
|
106
|
-
f.overallGrade,
|
|
107
|
-
f.reportCID,
|
|
108
|
-
f.methodologyVersion,
|
|
109
|
-
f.ranAt,
|
|
110
|
-
f.resolvedVersion
|
|
111
|
-
]
|
|
112
|
-
);
|
|
113
|
-
}
|
|
114
|
-
function decodeLitmusAttestation(encoded) {
|
|
115
|
-
const values = AbiCoder.defaultAbiCoder().decode([...LITMUS_ABI_TYPES], encoded);
|
|
116
|
-
const out = {};
|
|
117
|
-
LITMUS_ABI_NAMES.forEach((name, i) => {
|
|
118
|
-
out[name] = values[i];
|
|
119
|
-
});
|
|
120
|
-
return out;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// ../onchain/src/read.ts
|
|
124
|
-
import { Contract, JsonRpcProvider, ZeroHash } from "ethers";
|
|
125
|
-
var EAS_ABI = [
|
|
126
|
-
"function getAttestation(bytes32 uid) view returns ((bytes32 uid, bytes32 schema, uint64 time, uint64 expirationTime, uint64 revocationTime, bytes32 refUID, address recipient, address attester, bool revocable, bytes data))"
|
|
127
|
-
];
|
|
128
|
-
function litmusSchemaUID() {
|
|
129
|
-
const uid = process.env.NEXT_PUBLIC_EAS_SCHEMA_UID;
|
|
130
|
-
if (!uid) throw new Error("NEXT_PUBLIC_EAS_SCHEMA_UID is required \u2014 register the schema first.");
|
|
131
|
-
return uid;
|
|
132
|
-
}
|
|
133
|
-
async function readAttestation(uid) {
|
|
134
|
-
const cfg = networkConfig();
|
|
135
|
-
const provider = new JsonRpcProvider(rpcUrl(), cfg.chainId);
|
|
136
|
-
const eas = new Contract(cfg.eas, EAS_ABI, provider);
|
|
137
|
-
const att = await eas.getAttestation(uid);
|
|
138
|
-
if (!att || att.uid === ZeroHash) return null;
|
|
139
|
-
if (String(att.schema).toLowerCase() !== litmusSchemaUID().toLowerCase()) return null;
|
|
140
|
-
const d = decodeLitmusAttestation(att.data);
|
|
141
|
-
return {
|
|
142
|
-
uid: att.uid,
|
|
143
|
-
serverRef: String(d.serverRef),
|
|
144
|
-
toolDefsFingerprint: String(d.toolDefsFingerprint),
|
|
145
|
-
overallGrade: String(d.overallGrade),
|
|
146
|
-
reportCID: String(d.reportCID),
|
|
147
|
-
resolvedVersion: d.resolvedVersion || null,
|
|
148
|
-
revoked: att.revocationTime > 0n,
|
|
149
|
-
attester: String(att.attester),
|
|
150
|
-
expirationTime: BigInt(att.expirationTime ?? 0n)
|
|
151
|
-
};
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
// src/tools/run-litmus.ts
|
|
155
|
-
import { z } from "zod";
|
|
156
|
-
var RUN_LITMUS_TOOL_NAME = "run_litmus";
|
|
157
|
-
var RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server";
|
|
158
|
-
var RUN_LITMUS_TOOL_DESCRIPTION = [
|
|
159
|
-
`Grade an MCP server A\u2013F against the open behavioral litmus (${METHODOLOGY_VERSION}).`,
|
|
160
|
-
"The harness connects the way an agent would, fingerprints the tool surface, and",
|
|
161
|
-
"runs four checks: C-01 tool-output injection, C-02 permission/egress overreach",
|
|
162
|
-
"(egress in a hardened default-deny Docker sandbox, plus a declared-permission",
|
|
163
|
-
"honesty check), C-03 sensitive-data handling (planted canaries), and C-04",
|
|
164
|
-
"adversarial-input handling (malformed/oversized and jailbreak inputs).",
|
|
165
|
-
"",
|
|
166
|
-
"This is ACTIVE: it launches the target server's code to exercise it (egress-",
|
|
167
|
-
"sandboxed when Docker is available) and takes ~20\u201360s. It is not a lookup \u2014 for",
|
|
168
|
-
"a server's already-published grade, use `verify_attestation`. No wallet or RPC",
|
|
169
|
-
"needed.",
|
|
170
|
-
"",
|
|
171
|
-
"server_ref examples: npm/@modelcontextprotocol/server-filesystem \xB7",
|
|
172
|
-
"https://example.com/mcp \xB7 ./build/index.js. For a token-gated https:// target,",
|
|
173
|
-
"pass `bearer`. If Docker is unavailable, C-02 is skipped and the grade is capped",
|
|
174
|
-
"at B for that run."
|
|
175
|
-
].join("\n");
|
|
176
|
-
var runLitmusInputShape = {
|
|
177
|
-
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
|
|
178
|
-
bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
|
|
179
|
-
header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
|
|
180
|
-
};
|
|
181
|
-
var PROGRESS_TOTAL = 5;
|
|
182
|
-
async function handleRunLitmus({ server_ref, bearer, header }, extra) {
|
|
183
|
-
try {
|
|
184
|
-
const argv = [
|
|
185
|
-
...bearer ? ["--bearer", bearer] : [],
|
|
186
|
-
...(header ?? []).flatMap((h) => ["--header", h])
|
|
187
|
-
];
|
|
188
|
-
const { headers } = parseAuthFlags(argv, {});
|
|
189
|
-
const progressToken = extra._meta?.progressToken;
|
|
190
|
-
const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
|
|
191
|
-
method: "notifications/progress",
|
|
192
|
-
params: { progressToken, progress, total: PROGRESS_TOTAL, message }
|
|
193
|
-
}) : void 0;
|
|
194
|
-
sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
|
|
195
|
-
const bundle = await runLitmus(resolveTarget(server_ref), {
|
|
196
|
-
...Object.keys(headers).length ? { headers } : {},
|
|
197
|
-
...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
|
|
198
|
-
});
|
|
199
|
-
const payload = summarize(bundle);
|
|
200
|
-
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
|
|
201
|
-
} catch (err) {
|
|
202
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
203
|
-
return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
var CATEGORY_LABEL = {
|
|
207
|
-
"C-01": "tool-output injection",
|
|
208
|
-
"C-02": "permission / egress overreach",
|
|
209
|
-
"C-03": "sensitive-data handling",
|
|
210
|
-
"C-04": "adversarial-input handling"
|
|
211
|
-
};
|
|
212
|
-
function summarize(b) {
|
|
213
|
-
const find = (code) => b.categories.find((c) => c.code === code);
|
|
214
|
-
const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
|
|
215
|
-
const c = find(code);
|
|
216
|
-
const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
|
|
217
|
-
return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
|
|
218
|
-
});
|
|
219
|
-
return {
|
|
220
|
-
grade: b.grade,
|
|
221
|
-
summary: b.gradeRationale,
|
|
222
|
-
serverRef: b.serverRef,
|
|
223
|
-
resolvedVersion: b.resolvedVersion,
|
|
224
|
-
fingerprint: b.toolDefsFingerprint,
|
|
225
|
-
ranAt: b.ranAt,
|
|
226
|
-
methodologyVersion: b.methodologyVersion,
|
|
227
|
-
categories
|
|
228
|
-
};
|
|
229
|
-
}
|
|
230
|
-
function truncate(s, n) {
|
|
231
|
-
return s.length > n ? `${s.slice(0, n)}\u2026` : s;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
export {
|
|
235
|
-
NETWORKS,
|
|
236
|
-
selectedNetwork,
|
|
237
|
-
networkConfig,
|
|
238
|
-
rpcUrl,
|
|
239
|
-
LITMUS_SCHEMA,
|
|
240
|
-
litmusFields,
|
|
241
|
-
encodeLitmusAttestation,
|
|
242
|
-
decodeLitmusAttestation,
|
|
243
|
-
litmusSchemaUID,
|
|
244
|
-
readAttestation,
|
|
245
|
-
RUN_LITMUS_TOOL_NAME,
|
|
246
|
-
RUN_LITMUS_TOOL_TITLE,
|
|
247
|
-
RUN_LITMUS_TOOL_DESCRIPTION,
|
|
248
|
-
runLitmusInputShape,
|
|
249
|
-
handleRunLitmus
|
|
250
|
-
};
|
package/dist/src-RSTPCEYU.js
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
assembleBundle,
|
|
3
|
-
canaryMatch,
|
|
4
|
-
classifyTool,
|
|
5
|
-
connectTarget,
|
|
6
|
-
fingerprintToolDefs,
|
|
7
|
-
gradeFromCategories,
|
|
8
|
-
hasHighSeverity,
|
|
9
|
-
instructionMimicry,
|
|
10
|
-
internalsLeak,
|
|
11
|
-
invisibleUnicode,
|
|
12
|
-
markdownTricks,
|
|
13
|
-
runLitmus,
|
|
14
|
-
stateChangingToolNames
|
|
15
|
-
} from "./chunk-35UOPCBW.js";
|
|
16
|
-
import "./chunk-ZR6XRGMQ.js";
|
|
17
|
-
export {
|
|
18
|
-
assembleBundle,
|
|
19
|
-
canaryMatch,
|
|
20
|
-
classifyTool,
|
|
21
|
-
connectTarget,
|
|
22
|
-
fingerprintToolDefs,
|
|
23
|
-
gradeFromCategories,
|
|
24
|
-
hasHighSeverity,
|
|
25
|
-
instructionMimicry,
|
|
26
|
-
internalsLeak,
|
|
27
|
-
invisibleUnicode,
|
|
28
|
-
markdownTricks,
|
|
29
|
-
runLitmus,
|
|
30
|
-
stateChangingToolNames
|
|
31
|
-
};
|