@polygraphso/litmus 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -4
- package/dist/{chunk-EWLIQPXF.js → chunk-35UOPCBW.js} +12 -8
- package/dist/{chunk-GJ7M7C46.js → chunk-BPS4YCDL.js} +106 -51
- package/dist/{chunk-RAZNXIE5.js → chunk-VOPISHBU.js} +1 -1
- package/dist/cli.js +2 -2
- package/dist/index.d.ts +20 -3
- package/dist/index.js +3 -3
- package/dist/mcp.d.ts +3 -2
- package/dist/mcp.js +112 -29
- package/dist/{src-GJ2L6B7K.js → src-RSTPCEYU.js} +1 -1
- package/package.json +4 -5
package/README.md
CHANGED
|
@@ -50,16 +50,43 @@ open and deterministic, so a re-run reproduces the grade — or refutes it.
|
|
|
50
50
|
The package ships a stdio MCP server, `polygraphso-litmus-mcp`, so it works in any
|
|
51
51
|
MCP-capable client. It exposes two tools:
|
|
52
52
|
|
|
53
|
-
- **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end)
|
|
54
|
-
and return the grade and the evidence.
|
|
53
|
+
- **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end)
|
|
54
|
+
and return the grade and the evidence. Optional **`bearer`** (and `header`
|
|
55
|
+
entries, each `"Key: Value"`) grade a token-gated `https://` MCP target — sent
|
|
56
|
+
to that origin only, ignored for stdio/local targets, the same plumbing as the
|
|
57
|
+
CLI's `--bearer` / `--header`.
|
|
55
58
|
- **`verify_attestation`** — passively read a server's *already-published* grade
|
|
56
59
|
before trusting or paying it.
|
|
57
60
|
|
|
61
|
+
It also registers two **prompts** that show up as slash commands — in Claude Code,
|
|
62
|
+
`/mcp__polygraph-litmus__grade <server_ref>` (run a fresh grade) and
|
|
63
|
+
`/mcp__polygraph-litmus__check <server_ref>` (read a published grade); other
|
|
64
|
+
clients surface the same prompts in their own UI. For a cleaner pair of commands
|
|
65
|
+
in Claude Code — `/polygraph:grade` and `/polygraph:check` — install the plugin
|
|
66
|
+
(below), which wires up this server and both commands in one step.
|
|
67
|
+
|
|
58
68
|
**Prerequisites:** Node ≥ 18. Docker is optional (without it, C-02 egress is
|
|
59
69
|
skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so` so
|
|
60
|
-
`verify_attestation` can
|
|
70
|
+
`verify_attestation` can look up published grades.
|
|
71
|
+
|
|
72
|
+
> **Heads-up:** grade *publishing* is still rolling out, so `verify_attestation`
|
|
73
|
+
> commonly returns `not_available` today — that means *unevaluated*, not a failing
|
|
74
|
+
> grade. To grade a server right now, use `run_litmus`.
|
|
75
|
+
|
|
76
|
+
### Claude Code: one-click plugin (recommended)
|
|
77
|
+
|
|
78
|
+
The plugin bundles this MCP server **and** adds the `/polygraph:grade` and
|
|
79
|
+
`/polygraph:check` commands — one install does everything:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
/plugin marketplace add polygraphso/litmus
|
|
83
|
+
/plugin install polygraph@polygraphso
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Then just run `/polygraph:grade npm/@modelcontextprotocol/server-filesystem`.
|
|
61
87
|
|
|
62
|
-
|
|
88
|
+
Prefer to wire the server up by hand, or using another client? Add it once, then
|
|
89
|
+
just talk to your agent.
|
|
63
90
|
|
|
64
91
|
**Claude Code** — one command:
|
|
65
92
|
|
|
@@ -99,6 +126,31 @@ that's already published.
|
|
|
99
126
|
`run_litmus` launches the target server's code to exercise it (egress-sandboxed
|
|
100
127
|
when Docker is present). It needs no wallet or RPC.
|
|
101
128
|
|
|
129
|
+
### ChatGPT and other remote clients
|
|
130
|
+
|
|
131
|
+
ChatGPT's MCP support expects a remote **Streamable-HTTP** server; this package is
|
|
132
|
+
**stdio-only**, so you can't point ChatGPT at it directly. If you self-host, bridge
|
|
133
|
+
stdio over HTTP yourself — e.g.
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
npx -y supergateway --stdio "npx -y -p @polygraphso/litmus polygraphso-litmus-mcp" --port 8000
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
(or [`mcp-proxy`](https://github.com/sparfenyuk/mcp-proxy)) — then point your client
|
|
140
|
+
at that endpoint. polygraph does not host this for you; the bridge runs on your own
|
|
141
|
+
machine.
|
|
142
|
+
|
|
143
|
+
### Troubleshooting
|
|
144
|
+
|
|
145
|
+
- **Two bins / `npx`:** `npx` needs `-p @polygraphso/litmus` *plus* the bin name
|
|
146
|
+
(`polygraphso-litmus` or `polygraphso-litmus-mcp`); plain `npx @polygraphso/litmus`
|
|
147
|
+
can't choose which to run. Installed globally? Use the bin name directly, no `-p`.
|
|
148
|
+
- **Docker optional:** without Docker, C-02 (egress) is skipped and the grade caps
|
|
149
|
+
at **B** — the C-02 row reads `skipped` with reason `no sandbox (Docker
|
|
150
|
+
unavailable)`. Not a failure, just unverified.
|
|
151
|
+
- **`verify_attestation` says `lookup_failed`:** the grade index or RPC was
|
|
152
|
+
unreachable — that's *unknown*, not *no grade*. Retry; check `POLYGRAPH_API_URL`.
|
|
153
|
+
|
|
102
154
|
## Library
|
|
103
155
|
|
|
104
156
|
```ts
|
|
@@ -2003,6 +2003,7 @@ function assembleBundle(input) {
|
|
|
2003
2003
|
}
|
|
2004
2004
|
|
|
2005
2005
|
// ../probes/src/harness.ts
|
|
2006
|
+
var PROGRESS_STEPS = 5;
|
|
2006
2007
|
async function runLitmus(target, opts = {}) {
|
|
2007
2008
|
const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
|
|
2008
2009
|
const ranAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -2025,6 +2026,7 @@ async function runLitmus(target, opts = {}) {
|
|
|
2025
2026
|
});
|
|
2026
2027
|
try {
|
|
2027
2028
|
const runProbes = async () => {
|
|
2029
|
+
const step = (done, label) => opts.onProgress?.(done, PROGRESS_STEPS, label);
|
|
2028
2030
|
const listed = await enumerateTools(conn.client);
|
|
2029
2031
|
const tools = listed.map((t) => ({
|
|
2030
2032
|
name: t.name,
|
|
@@ -2033,6 +2035,7 @@ async function runLitmus(target, opts = {}) {
|
|
|
2033
2035
|
}));
|
|
2034
2036
|
assertGradableSurface(tools);
|
|
2035
2037
|
const { fingerprint, canonical } = fingerprintToolDefs(tools);
|
|
2038
|
+
step(1, "fingerprinted tool surface");
|
|
2036
2039
|
const annotated = listed.map((t) => ({
|
|
2037
2040
|
name: t.name,
|
|
2038
2041
|
description: t.description ?? "",
|
|
@@ -2056,14 +2059,15 @@ async function runLitmus(target, opts = {}) {
|
|
|
2056
2059
|
baselineAllowlist: []
|
|
2057
2060
|
};
|
|
2058
2061
|
assertEgressRanUnderIsolation(egress, isolation, isStdio);
|
|
2059
|
-
const
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2062
|
+
const c01 = await c01Injection(ctx);
|
|
2063
|
+
step(2, "C-01 tool-output injection");
|
|
2064
|
+
const c02 = c02Permission(probe21Declaration(annotated), egress);
|
|
2065
|
+
step(3, "C-02 permission / egress");
|
|
2066
|
+
const c03 = await c03Sensitive(ctx, egress);
|
|
2067
|
+
step(4, "C-03 sensitive-data handling");
|
|
2068
|
+
const c04 = await c04Adversarial(ctx);
|
|
2069
|
+
step(5, "C-04 adversarial-input handling");
|
|
2070
|
+
const categories = [c01, c02, c03, c04];
|
|
2067
2071
|
const grade = gradeFromCategories(categories);
|
|
2068
2072
|
return assembleBundle({
|
|
2069
2073
|
serverRef: conn.serverRef,
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
|
+
parseAuthFlags,
|
|
2
3
|
resolveTarget
|
|
3
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-VOPISHBU.js";
|
|
4
5
|
import {
|
|
5
6
|
runLitmus
|
|
6
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-35UOPCBW.js";
|
|
7
8
|
import {
|
|
8
9
|
CATEGORY_STATUS_UINT8,
|
|
9
10
|
METHODOLOGY_VERSION
|
|
@@ -37,16 +38,43 @@ function rpcUrl(net = selectedNetwork()) {
|
|
|
37
38
|
return override && override.length > 0 ? override : NETWORKS[net].rpc;
|
|
38
39
|
}
|
|
39
40
|
|
|
40
|
-
// ../onchain/src/eas-sdk.ts
|
|
41
|
-
import { createRequire } from "module";
|
|
42
|
-
var require2 = createRequire(import.meta.url);
|
|
43
|
-
var sdk = require2(
|
|
44
|
-
"@ethereum-attestation-service/eas-sdk"
|
|
45
|
-
);
|
|
46
|
-
var { EAS, SchemaEncoder, SchemaRegistry } = sdk;
|
|
47
|
-
|
|
48
41
|
// ../onchain/src/eas.ts
|
|
42
|
+
import { AbiCoder } from "ethers";
|
|
49
43
|
var LITMUS_SCHEMA = "string serverRef,bytes32 toolDefsFingerprint,uint8 gradeC01,uint8 gradeC02,uint8 gradeC03,string overallGrade,string reportCID,string methodologyVersion,uint64 ranAt,string resolvedVersion";
|
|
44
|
+
var LITMUS_ABI_TYPES = [
|
|
45
|
+
"string",
|
|
46
|
+
// serverRef
|
|
47
|
+
"bytes32",
|
|
48
|
+
// toolDefsFingerprint
|
|
49
|
+
"uint8",
|
|
50
|
+
// gradeC01
|
|
51
|
+
"uint8",
|
|
52
|
+
// gradeC02
|
|
53
|
+
"uint8",
|
|
54
|
+
// gradeC03
|
|
55
|
+
"string",
|
|
56
|
+
// overallGrade
|
|
57
|
+
"string",
|
|
58
|
+
// reportCID
|
|
59
|
+
"string",
|
|
60
|
+
// methodologyVersion
|
|
61
|
+
"uint64",
|
|
62
|
+
// ranAt
|
|
63
|
+
"string"
|
|
64
|
+
// resolvedVersion
|
|
65
|
+
];
|
|
66
|
+
var LITMUS_ABI_NAMES = [
|
|
67
|
+
"serverRef",
|
|
68
|
+
"toolDefsFingerprint",
|
|
69
|
+
"gradeC01",
|
|
70
|
+
"gradeC02",
|
|
71
|
+
"gradeC03",
|
|
72
|
+
"overallGrade",
|
|
73
|
+
"reportCID",
|
|
74
|
+
"methodologyVersion",
|
|
75
|
+
"ranAt",
|
|
76
|
+
"resolvedVersion"
|
|
77
|
+
];
|
|
50
78
|
function categoryUint8(bundle, code) {
|
|
51
79
|
const status = bundle.categories.find((c) => c.code === code)?.status;
|
|
52
80
|
return status ? CATEGORY_STATUS_UINT8[status] : CATEGORY_STATUS_UINT8.skipped;
|
|
@@ -67,31 +95,36 @@ function litmusFields(bundle, reportCID) {
|
|
|
67
95
|
}
|
|
68
96
|
function encodeLitmusAttestation(bundle, reportCID) {
|
|
69
97
|
const f = litmusFields(bundle, reportCID);
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
98
|
+
return AbiCoder.defaultAbiCoder().encode(
|
|
99
|
+
[...LITMUS_ABI_TYPES],
|
|
100
|
+
[
|
|
101
|
+
f.serverRef,
|
|
102
|
+
f.toolDefsFingerprint,
|
|
103
|
+
f.gradeC01,
|
|
104
|
+
f.gradeC02,
|
|
105
|
+
f.gradeC03,
|
|
106
|
+
f.overallGrade,
|
|
107
|
+
f.reportCID,
|
|
108
|
+
f.methodologyVersion,
|
|
109
|
+
f.ranAt,
|
|
110
|
+
f.resolvedVersion
|
|
111
|
+
]
|
|
112
|
+
);
|
|
83
113
|
}
|
|
84
114
|
function decodeLitmusAttestation(encoded) {
|
|
85
|
-
const
|
|
115
|
+
const values = AbiCoder.defaultAbiCoder().decode([...LITMUS_ABI_TYPES], encoded);
|
|
86
116
|
const out = {};
|
|
87
|
-
|
|
88
|
-
out[
|
|
89
|
-
}
|
|
117
|
+
LITMUS_ABI_NAMES.forEach((name, i) => {
|
|
118
|
+
out[name] = values[i];
|
|
119
|
+
});
|
|
90
120
|
return out;
|
|
91
121
|
}
|
|
92
122
|
|
|
93
123
|
// ../onchain/src/read.ts
|
|
94
|
-
import { JsonRpcProvider, ZeroHash } from "ethers";
|
|
124
|
+
import { Contract, JsonRpcProvider, ZeroHash } from "ethers";
|
|
125
|
+
var EAS_ABI = [
|
|
126
|
+
"function getAttestation(bytes32 uid) view returns ((bytes32 uid, bytes32 schema, uint64 time, uint64 expirationTime, uint64 revocationTime, bytes32 refUID, address recipient, address attester, bool revocable, bytes data))"
|
|
127
|
+
];
|
|
95
128
|
function litmusSchemaUID() {
|
|
96
129
|
const uid = process.env.NEXT_PUBLIC_EAS_SCHEMA_UID;
|
|
97
130
|
if (!uid) throw new Error("NEXT_PUBLIC_EAS_SCHEMA_UID is required \u2014 register the schema first.");
|
|
@@ -100,8 +133,7 @@ function litmusSchemaUID() {
|
|
|
100
133
|
async function readAttestation(uid) {
|
|
101
134
|
const cfg = networkConfig();
|
|
102
135
|
const provider = new JsonRpcProvider(rpcUrl(), cfg.chainId);
|
|
103
|
-
const eas = new
|
|
104
|
-
eas.connect(provider);
|
|
136
|
+
const eas = new Contract(cfg.eas, EAS_ABI, provider);
|
|
105
137
|
const att = await eas.getAttestation(uid);
|
|
106
138
|
if (!att || att.uid === ZeroHash) return null;
|
|
107
139
|
if (String(att.schema).toLowerCase() !== litmusSchemaUID().toLowerCase()) return null;
|
|
@@ -124,27 +156,46 @@ import { z } from "zod";
|
|
|
124
156
|
var RUN_LITMUS_TOOL_NAME = "run_litmus";
|
|
125
157
|
var RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server";
|
|
126
158
|
var RUN_LITMUS_TOOL_DESCRIPTION = [
|
|
127
|
-
`
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
159
|
+
`Grade an MCP server A\u2013F against the open behavioral litmus (${METHODOLOGY_VERSION}).`,
|
|
160
|
+
"The harness connects the way an agent would, fingerprints the tool surface, and",
|
|
161
|
+
"runs four checks: C-01 tool-output injection, C-02 permission/egress overreach",
|
|
162
|
+
"(egress in a hardened default-deny Docker sandbox, plus a declared-permission",
|
|
163
|
+
"honesty check), C-03 sensitive-data handling (planted canaries), and C-04",
|
|
164
|
+
"adversarial-input handling (malformed/oversized and jailbreak inputs).",
|
|
133
165
|
"",
|
|
134
|
-
"This is ACTIVE: it launches the target server's code to exercise it (
|
|
135
|
-
"
|
|
136
|
-
"use `verify_attestation`.
|
|
166
|
+
"This is ACTIVE: it launches the target server's code to exercise it (egress-",
|
|
167
|
+
"sandboxed when Docker is available) and takes ~20\u201360s. It is not a lookup \u2014 for",
|
|
168
|
+
"a server's already-published grade, use `verify_attestation`. No wallet or RPC",
|
|
169
|
+
"needed.",
|
|
137
170
|
"",
|
|
138
|
-
"
|
|
139
|
-
"
|
|
140
|
-
"skipped and the grade is capped
|
|
171
|
+
"server_ref examples: npm/@modelcontextprotocol/server-filesystem \xB7",
|
|
172
|
+
"https://example.com/mcp \xB7 ./build/index.js. For a token-gated https:// target,",
|
|
173
|
+
"pass `bearer`. If Docker is unavailable, C-02 is skipped and the grade is capped",
|
|
174
|
+
"at B for that run."
|
|
141
175
|
].join("\n");
|
|
142
176
|
var runLitmusInputShape = {
|
|
143
|
-
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
|
|
177
|
+
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
|
|
178
|
+
bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
|
|
179
|
+
header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
|
|
144
180
|
};
|
|
145
|
-
|
|
181
|
+
var PROGRESS_TOTAL = 5;
|
|
182
|
+
async function handleRunLitmus({ server_ref, bearer, header }, extra) {
|
|
146
183
|
try {
|
|
147
|
-
const
|
|
184
|
+
const argv = [
|
|
185
|
+
...bearer ? ["--bearer", bearer] : [],
|
|
186
|
+
...(header ?? []).flatMap((h) => ["--header", h])
|
|
187
|
+
];
|
|
188
|
+
const { headers } = parseAuthFlags(argv, {});
|
|
189
|
+
const progressToken = extra._meta?.progressToken;
|
|
190
|
+
const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
|
|
191
|
+
method: "notifications/progress",
|
|
192
|
+
params: { progressToken, progress, total: PROGRESS_TOTAL, message }
|
|
193
|
+
}) : void 0;
|
|
194
|
+
sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
|
|
195
|
+
const bundle = await runLitmus(resolveTarget(server_ref), {
|
|
196
|
+
...Object.keys(headers).length ? { headers } : {},
|
|
197
|
+
...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
|
|
198
|
+
});
|
|
148
199
|
const payload = summarize(bundle);
|
|
149
200
|
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
|
|
150
201
|
} catch (err) {
|
|
@@ -152,24 +203,28 @@ async function handleRunLitmus({ server_ref }) {
|
|
|
152
203
|
return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
|
|
153
204
|
}
|
|
154
205
|
}
|
|
206
|
+
var CATEGORY_LABEL = {
|
|
207
|
+
"C-01": "tool-output injection",
|
|
208
|
+
"C-02": "permission / egress overreach",
|
|
209
|
+
"C-03": "sensitive-data handling",
|
|
210
|
+
"C-04": "adversarial-input handling"
|
|
211
|
+
};
|
|
155
212
|
function summarize(b) {
|
|
156
213
|
const find = (code) => b.categories.find((c) => c.code === code);
|
|
157
214
|
const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
|
|
158
215
|
const c = find(code);
|
|
159
216
|
const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
|
|
160
|
-
return { code, status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
|
|
217
|
+
return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
|
|
161
218
|
});
|
|
162
|
-
const dockerSkipped = !b.harness.dockerAvailable || find("C-02")?.status === "skipped";
|
|
163
219
|
return {
|
|
164
220
|
grade: b.grade,
|
|
165
|
-
|
|
166
|
-
fingerprint: b.toolDefsFingerprint,
|
|
221
|
+
summary: b.gradeRationale,
|
|
167
222
|
serverRef: b.serverRef,
|
|
168
223
|
resolvedVersion: b.resolvedVersion,
|
|
224
|
+
fingerprint: b.toolDefsFingerprint,
|
|
169
225
|
ranAt: b.ranAt,
|
|
170
226
|
methodologyVersion: b.methodologyVersion,
|
|
171
|
-
categories
|
|
172
|
-
...dockerSkipped ? { dockerSkipped: "C-02 (egress) was not run because Docker was unavailable; the grade is capped at B for this run." } : {}
|
|
227
|
+
categories
|
|
173
228
|
};
|
|
174
229
|
}
|
|
175
230
|
function truncate(s, n) {
|
|
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
|
|
|
44
44
|
);
|
|
45
45
|
return 2;
|
|
46
46
|
}
|
|
47
|
-
const { runLitmus } = await import("./src-
|
|
47
|
+
const { runLitmus } = await import("./src-RSTPCEYU.js");
|
|
48
48
|
const input = resolveTarget(target);
|
|
49
49
|
try {
|
|
50
50
|
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
package/dist/cli.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
runLitmusCli
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-VOPISHBU.js";
|
|
5
5
|
import {
|
|
6
6
|
parseServerRef,
|
|
7
7
|
serverKey
|
|
@@ -104,7 +104,7 @@ examples:
|
|
|
104
104
|
polygraphso-litmus litmus npm/@modelcontextprotocol/server-filesystem
|
|
105
105
|
polygraphso-litmus litmus --json npm/@modelcontextprotocol/server-filesystem
|
|
106
106
|
|
|
107
|
-
Set POLYGRAPH_API_URL
|
|
107
|
+
Set POLYGRAPH_API_URL so check/list can look up a server's published grade.
|
|
108
108
|
More at https://polygraph.so
|
|
109
109
|
`;
|
|
110
110
|
function readVersion() {
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
|
2
2
|
import { z } from 'zod';
|
|
3
|
+
import { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js';
|
|
4
|
+
import { ServerRequest, ServerNotification } from '@modelcontextprotocol/sdk/types.js';
|
|
3
5
|
|
|
4
6
|
/**
|
|
5
7
|
* Shared contract types for the litmus MVP. Web3-free.
|
|
@@ -272,6 +274,14 @@ interface RunLitmusOptions {
|
|
|
272
274
|
* the `finally` tears the connection down, settling any in-flight calls.
|
|
273
275
|
*/
|
|
274
276
|
timeoutMs?: number;
|
|
277
|
+
/**
|
|
278
|
+
* Optional progress callback, fired once per probe phase as the run proceeds:
|
|
279
|
+
* `(done, total, label)` are step counts plus a short human phase name. Purely
|
|
280
|
+
* observational — it never affects the grade or the bundle. The MCP server
|
|
281
|
+
* forwards these as `notifications/progress` so a ~20–60s run isn't a frozen
|
|
282
|
+
* tool call.
|
|
283
|
+
*/
|
|
284
|
+
onProgress?: (done: number, total: number, label: string) => void;
|
|
275
285
|
}
|
|
276
286
|
declare function runLitmus(target: TargetInput, opts?: RunLitmusOptions): Promise<EvidenceBundle>;
|
|
277
287
|
|
|
@@ -484,7 +494,10 @@ declare function decodeLitmusAttestation(encoded: string): Record<string, unknow
|
|
|
484
494
|
* §7). Needs an RPC + a registered schema; the agent-gate calls this, then
|
|
485
495
|
* re-checks the live fingerprint before paying.
|
|
486
496
|
*
|
|
487
|
-
*
|
|
497
|
+
* The read is a single EAS `getAttestation` view call. We hit the contract
|
|
498
|
+
* directly through a minimal ethers ABI fragment (below) rather than the
|
|
499
|
+
* eas-sdk `EAS` class — same on-chain struct, one fewer dependency (eas-sdk
|
|
500
|
+
* dragged hardhat into the production tree).
|
|
488
501
|
*/
|
|
489
502
|
/** The registered litmus schema UID for the selected network (from env). */
|
|
490
503
|
declare function litmusSchemaUID(): string;
|
|
@@ -573,10 +586,14 @@ declare const RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server"
|
|
|
573
586
|
declare const RUN_LITMUS_TOOL_DESCRIPTION: string;
|
|
574
587
|
declare const runLitmusInputShape: {
|
|
575
588
|
server_ref: z.ZodString;
|
|
589
|
+
bearer: z.ZodOptional<z.ZodString>;
|
|
590
|
+
header: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
576
591
|
};
|
|
577
|
-
declare function handleRunLitmus({ server_ref }: {
|
|
592
|
+
declare function handleRunLitmus({ server_ref, bearer, header }: {
|
|
578
593
|
server_ref: string;
|
|
579
|
-
|
|
594
|
+
bearer?: string;
|
|
595
|
+
header?: string[];
|
|
596
|
+
}, extra: RequestHandlerExtra<ServerRequest, ServerNotification>): Promise<{
|
|
580
597
|
content: {
|
|
581
598
|
type: "text";
|
|
582
599
|
text: string;
|
package/dist/index.js
CHANGED
|
@@ -14,11 +14,11 @@ import {
|
|
|
14
14
|
rpcUrl,
|
|
15
15
|
runLitmusInputShape,
|
|
16
16
|
selectedNetwork
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-BPS4YCDL.js";
|
|
18
18
|
import {
|
|
19
19
|
parseAuthFlags,
|
|
20
20
|
resolveTarget
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-VOPISHBU.js";
|
|
22
22
|
import {
|
|
23
23
|
assembleBundle,
|
|
24
24
|
canaryMatch,
|
|
@@ -33,7 +33,7 @@ import {
|
|
|
33
33
|
markdownTricks,
|
|
34
34
|
runLitmus,
|
|
35
35
|
stateChangingToolNames
|
|
36
|
-
} from "./chunk-
|
|
36
|
+
} from "./chunk-35UOPCBW.js";
|
|
37
37
|
import {
|
|
38
38
|
BUNDLE_SCHEMA_VERSION,
|
|
39
39
|
CATEGORY_STATUS_UINT8,
|
package/dist/mcp.d.ts
CHANGED
|
@@ -3,10 +3,11 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* `polygraphso-litmus-mcp` — the polygraph litmus MCP server. Stdio transport.
|
|
6
|
-
* Exposes
|
|
6
|
+
* Exposes to any MCP client (Claude Desktop, Cursor, …):
|
|
7
7
|
*
|
|
8
|
-
* • `run_litmus` — actively grade an MCP server A–F
|
|
8
|
+
* • `run_litmus` — actively grade an MCP server A–F against the open harness.
|
|
9
9
|
* • `verify_attestation` — passively read a server's published onchain grade.
|
|
10
|
+
* • prompts `grade` / `check` — one-line slash-command entry points to the two tools.
|
|
10
11
|
*
|
|
11
12
|
* Also exported as `@polygraphso/litmus/mcp` for embedding in a custom server.
|
|
12
13
|
*/
|
package/dist/mcp.js
CHANGED
|
@@ -7,9 +7,9 @@ import {
|
|
|
7
7
|
readAttestation,
|
|
8
8
|
runLitmusInputShape,
|
|
9
9
|
selectedNetwork
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-BPS4YCDL.js";
|
|
11
|
+
import "./chunk-VOPISHBU.js";
|
|
12
|
+
import "./chunk-35UOPCBW.js";
|
|
13
13
|
import {
|
|
14
14
|
parseServerRef,
|
|
15
15
|
serverKey
|
|
@@ -20,6 +20,7 @@ import { realpathSync } from "fs";
|
|
|
20
20
|
import { fileURLToPath } from "url";
|
|
21
21
|
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
22
22
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
23
|
+
import { z as z2 } from "zod";
|
|
23
24
|
|
|
24
25
|
// ../mcp/src/index.ts
|
|
25
26
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -36,28 +37,63 @@ function canonicalRef(ref) {
|
|
|
36
37
|
var VERIFY_TOOL_NAME = "verify_attestation";
|
|
37
38
|
var VERIFY_TOOL_TITLE = "Verify a server's polygraph attestation";
|
|
38
39
|
var VERIFY_TOOL_DESCRIPTION = [
|
|
39
|
-
"Read
|
|
40
|
-
"agent trusts
|
|
40
|
+
"Read a server's already-published polygraph (litmus) grade \u2014 without running",
|
|
41
|
+
"anything \u2014 before an agent trusts or, in agentic commerce, pays it.",
|
|
41
42
|
"",
|
|
42
|
-
"
|
|
43
|
-
"and the graded tool-surface fingerprint. The caller must
|
|
44
|
-
"LIVE fingerprint and require it to equal the attested one
|
|
45
|
-
"passing attestation can otherwise front for a tool surface the
|
|
46
|
-
"longer serves (rug pull).",
|
|
43
|
+
"When a grade is published it returns the behavioral grade (A\u2013F), the attestation",
|
|
44
|
+
"UID, the evidence CID, and the graded tool-surface fingerprint. The caller must",
|
|
45
|
+
"still recompute the LIVE fingerprint and require it to equal the attested one",
|
|
46
|
+
"before paying \u2014 a passing attestation can otherwise front for a tool surface the",
|
|
47
|
+
"server no longer serves (rug pull).",
|
|
47
48
|
"",
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
49
|
+
"Grade publishing is still rolling out, so this commonly returns not_available",
|
|
50
|
+
"today: that means UNEVALUATED (neither safe nor unsafe), not a failing grade \u2014 to",
|
|
51
|
+
"grade the server yourself right now, use `run_litmus`. A `lookup_failed` result",
|
|
52
|
+
"means the lookup itself failed (the index or chain was unreachable); the grade is",
|
|
53
|
+
"unknown, which is not the same as unevaluated.",
|
|
54
|
+
"",
|
|
55
|
+
"Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem."
|
|
51
56
|
].join("\n");
|
|
52
57
|
var verifyInputShape = {
|
|
53
58
|
server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server.")
|
|
54
59
|
};
|
|
55
60
|
async function handleVerify({ server_ref }) {
|
|
56
|
-
const
|
|
57
|
-
|
|
61
|
+
const found = await resolveUid(server_ref);
|
|
62
|
+
if (found.kind === "error") {
|
|
63
|
+
return {
|
|
64
|
+
isError: true,
|
|
65
|
+
content: [
|
|
66
|
+
{
|
|
67
|
+
type: "text",
|
|
68
|
+
text: `lookup_failed \u2014 could not reach the polygraph grade index for ${server_ref} (${found.detail}). The lookup itself failed, so the grade is unknown \u2014 retry or report it as unchecked, NOT as unevaluated.`
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
let att = null;
|
|
74
|
+
if (found.kind === "found") {
|
|
75
|
+
try {
|
|
76
|
+
att = await readAttestation(found.uid);
|
|
77
|
+
} catch (err) {
|
|
78
|
+
return {
|
|
79
|
+
isError: true,
|
|
80
|
+
content: [
|
|
81
|
+
{
|
|
82
|
+
type: "text",
|
|
83
|
+
text: `lookup_failed \u2014 the onchain read failed for ${server_ref} (${err instanceof Error ? err.message : String(err)}). Treat as unchecked (the chain/RPC was unreachable), not as "no grade".`
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
}
|
|
58
89
|
if (!att) {
|
|
59
90
|
return {
|
|
60
|
-
content: [
|
|
91
|
+
content: [
|
|
92
|
+
{
|
|
93
|
+
type: "text",
|
|
94
|
+
text: `not_available \u2014 no published polygraph grade for ${server_ref}. Grade publishing is still rolling out, so this is expected for most servers; it means unevaluated (neither safe nor unsafe), not a failing grade. To grade it now, use run_litmus.`
|
|
95
|
+
}
|
|
96
|
+
]
|
|
61
97
|
};
|
|
62
98
|
}
|
|
63
99
|
if (canonicalRef(att.serverRef) !== canonicalRef(server_ref)) {
|
|
@@ -90,11 +126,12 @@ async function resolveUid(serverRef) {
|
|
|
90
126
|
const base = process.env.POLYGRAPH_API_URL ?? "https://polygraph.so";
|
|
91
127
|
try {
|
|
92
128
|
const res = await fetch(`${base}/api/attestations?ref=${encodeURIComponent(serverRef)}`);
|
|
93
|
-
if (
|
|
129
|
+
if (res.status === 404) return { kind: "none" };
|
|
130
|
+
if (!res.ok) return { kind: "error", detail: `grade index returned HTTP ${res.status}` };
|
|
94
131
|
const row = await res.json();
|
|
95
|
-
return row?.attestation_uid
|
|
96
|
-
} catch {
|
|
97
|
-
return
|
|
132
|
+
return row?.attestation_uid ? { kind: "found", uid: row.attestation_uid } : { kind: "none" };
|
|
133
|
+
} catch (err) {
|
|
134
|
+
return { kind: "error", detail: err instanceof Error ? err.message : String(err) };
|
|
98
135
|
}
|
|
99
136
|
}
|
|
100
137
|
|
|
@@ -104,17 +141,21 @@ function buildServer() {
|
|
|
104
141
|
{ name: "polygraph-litmus", version: "0.1.0" },
|
|
105
142
|
{
|
|
106
143
|
instructions: [
|
|
107
|
-
"polygraph
|
|
144
|
+
"polygraph runs an open behavioral test on an MCP server and reports a",
|
|
145
|
+
"letter grade A\u2013F, with the evidence behind it.",
|
|
108
146
|
"",
|
|
109
|
-
"Use `run_litmus` to grade a server now
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
"
|
|
147
|
+
"Use `run_litmus` to grade a server now. It connects the way an agent would",
|
|
148
|
+
"and exercises the target \u2014 so it runs the target's code (egress-sandboxed",
|
|
149
|
+
"when Docker is present), not a passive read; ~20\u201360s. No wallet or RPC",
|
|
150
|
+
"needed. Pass `server_ref` as an npm ref (npm/@scope/server), an https:// MCP",
|
|
151
|
+
"URL, or a local path to an MCP entry file; pass `bearer` for a token-gated",
|
|
152
|
+
"https target.",
|
|
114
153
|
"",
|
|
115
|
-
"Use `verify_attestation` to read a
|
|
116
|
-
"
|
|
117
|
-
"
|
|
154
|
+
"Use `verify_attestation` to read a grade that was already published for a",
|
|
155
|
+
"server, without running anything. Grade publishing is still rolling out, so",
|
|
156
|
+
"it commonly returns not_available today \u2014 that means unevaluated (neither",
|
|
157
|
+
"safe nor unsafe), not a failing grade; to grade the server yourself, use",
|
|
158
|
+
"`run_litmus`."
|
|
118
159
|
].join("\n")
|
|
119
160
|
}
|
|
120
161
|
);
|
|
@@ -154,6 +195,48 @@ function buildServer() {
|
|
|
154
195
|
},
|
|
155
196
|
handleVerify
|
|
156
197
|
);
|
|
198
|
+
server.registerPrompt(
|
|
199
|
+
"grade",
|
|
200
|
+
{
|
|
201
|
+
title: "Grade an MCP server",
|
|
202
|
+
description: "Run the open behavioral litmus against an MCP server and report its grade A\u2013F with the evidence.",
|
|
203
|
+
argsSchema: {
|
|
204
|
+
server_ref: z2.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
({ server_ref }) => ({
|
|
208
|
+
messages: [
|
|
209
|
+
{
|
|
210
|
+
role: "user",
|
|
211
|
+
content: {
|
|
212
|
+
type: "text",
|
|
213
|
+
text: `Run the polygraph litmus on ${server_ref} using the run_litmus tool. Report the letter grade, the one-line summary, and any failed category with its findings. If the grade is capped at B because Docker was unavailable, say so plainly.`
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
]
|
|
217
|
+
})
|
|
218
|
+
);
|
|
219
|
+
server.registerPrompt(
|
|
220
|
+
"check",
|
|
221
|
+
{
|
|
222
|
+
title: "Check a server's published grade",
|
|
223
|
+
description: "Read a server's already-published polygraph grade without running anything.",
|
|
224
|
+
argsSchema: {
|
|
225
|
+
server_ref: z2.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
|
|
226
|
+
}
|
|
227
|
+
},
|
|
228
|
+
({ server_ref }) => ({
|
|
229
|
+
messages: [
|
|
230
|
+
{
|
|
231
|
+
role: "user",
|
|
232
|
+
content: {
|
|
233
|
+
type: "text",
|
|
234
|
+
text: `Use the verify_attestation tool to read the published polygraph grade for ${server_ref}. If it returns not_available, say the server is unevaluated (neither safe nor unsafe) and offer to run a live grade with run_litmus. If it returns lookup_failed, say the lookup itself failed so the grade is unknown \u2014 do not call it unevaluated.`
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
]
|
|
238
|
+
})
|
|
239
|
+
);
|
|
157
240
|
return server;
|
|
158
241
|
}
|
|
159
242
|
async function main() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -52,7 +52,6 @@
|
|
|
52
52
|
},
|
|
53
53
|
"dependencies": {
|
|
54
54
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
55
|
-
"@ethereum-attestation-service/eas-sdk": "^2.9.1",
|
|
56
55
|
"ethers": "^6.16.0",
|
|
57
56
|
"zod": "^3.23.8",
|
|
58
57
|
"tsx": "^4.19.0"
|
|
@@ -62,12 +61,12 @@
|
|
|
62
61
|
"tsup": "^8.3.0",
|
|
63
62
|
"typescript": "^5.9.3",
|
|
64
63
|
"vitest": "^2.1.0",
|
|
65
|
-
"@polygraph/probes": "0.0.0",
|
|
66
64
|
"@polygraph/core": "0.0.0",
|
|
67
65
|
"@polygraph/onchain": "0.0.0",
|
|
68
66
|
"@polygraph/agent": "0.0.0",
|
|
69
|
-
"@polygraph/
|
|
70
|
-
"@polygraph/mcp": "0.0.0"
|
|
67
|
+
"@polygraph/probes": "0.0.0",
|
|
68
|
+
"@polygraph/mcp": "0.0.0",
|
|
69
|
+
"@polygraph/cli": "0.0.0"
|
|
71
70
|
},
|
|
72
71
|
"publishConfig": {
|
|
73
72
|
"access": "public"
|