@polygraphso/litmus 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -15
- package/dist/{chunk-6QM4RK25.js → chunk-BIALP22F.js} +1 -63
- package/dist/{chunk-MQC54LFV.js → chunk-JK3UGN2G.js} +4 -27
- package/dist/cli.js +23 -2
- package/dist/index.d.ts +5 -9
- package/dist/index.js +2 -2
- package/dist/mcp.js +2 -2
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -5,9 +5,8 @@ The behavioral **litmus** harness for MCP servers, from [polygraph.so](https://p
|
|
|
5
5
|
It connects to an MCP server the way an agent would, fingerprints its exact tool
|
|
6
6
|
surface, and runs three probe categories — **C-01** tool-output injection, **C-02**
|
|
7
7
|
permission/egress (in a hardened default-deny Docker sandbox), **C-03**
|
|
8
|
-
sensitive-data handling (planted canaries) — then grades the server **A–F
|
|
9
|
-
|
|
10
|
-
browser flow where you sign an onchain EAS attestation on Base.
|
|
8
|
+
sensitive-data handling (planted canaries) — then grades the server **A–F** and
|
|
9
|
+
produces a deterministic, content-addressed evidence bundle.
|
|
11
10
|
|
|
12
11
|
A passing grade is a measurement, not a guarantee. The methodology and its
|
|
13
12
|
disclosed limits live at [polygraph.so](https://polygraph.so).
|
|
@@ -29,7 +28,6 @@ and the grade is capped at **B** for that run.
|
|
|
29
28
|
```bash
|
|
30
29
|
polygraphso-litmus litmus <registry-ref | https-url | path-to-mcp> # grade a server
|
|
31
30
|
polygraphso-litmus litmus --json <ref> # machine-readable evidence bundle
|
|
32
|
-
polygraphso-litmus challenge <attestation-uid> <ref> # re-run to dispute a published grade
|
|
33
31
|
polygraphso-litmus check <ref> # look up a published grade
|
|
34
32
|
```
|
|
35
33
|
|
|
@@ -41,7 +39,9 @@ polygraphso-litmus litmus https://example.com/mcp
|
|
|
41
39
|
```
|
|
42
40
|
|
|
43
41
|
The `litmus` command exits non-zero on a failing grade (D/F), so it scripts in CI.
|
|
44
|
-
|
|
42
|
+
|
|
43
|
+
To dispute a published grade, just re-run `litmus` against the same server: the harness is
|
|
44
|
+
open and deterministic, so a re-run reproduces the grade — or refutes it.
|
|
45
45
|
|
|
46
46
|
## Use it from an AI agent (MCP server)
|
|
47
47
|
|
|
@@ -49,13 +49,13 @@ The package ships a stdio MCP server, `polygraphso-litmus-mcp`, so it works in a
|
|
|
49
49
|
MCP-capable client. It exposes two tools:
|
|
50
50
|
|
|
51
51
|
- **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end),
|
|
52
|
-
and return the grade
|
|
52
|
+
and return the grade and the evidence.
|
|
53
53
|
- **`verify_attestation`** — passively read a server's *already-published* grade
|
|
54
54
|
before trusting or paying it.
|
|
55
55
|
|
|
56
56
|
**Prerequisites:** Node ≥ 18. Docker is optional (without it, C-02 egress is
|
|
57
|
-
skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so`
|
|
58
|
-
|
|
57
|
+
skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so` so
|
|
58
|
+
`verify_attestation` can resolve a server's published grade.
|
|
59
59
|
|
|
60
60
|
Add the server once, then just talk to your agent.
|
|
61
61
|
|
|
@@ -90,15 +90,12 @@ claude mcp add polygraph-litmus -e POLYGRAPH_API_URL=https://polygraph.so \
|
|
|
90
90
|
> Run polygraph against `npm/@modelcontextprotocol/server-filesystem` and tell me the grade.
|
|
91
91
|
|
|
92
92
|
The agent calls **`run_litmus`**, which launches that server in the harness, runs
|
|
93
|
-
C-01/C-02/C-03, and returns the **grade (A–F)**, the per-category results, the
|
|
94
|
-
tool-surface fingerprint
|
|
95
|
-
|
|
96
|
-
onchain as an EAS attestation. Signing is intentionally **not** headless: the agent
|
|
97
|
-
does the work, you approve the mint. Use **`verify_attestation`** instead to read a
|
|
98
|
-
grade that's already published.
|
|
93
|
+
C-01/C-02/C-03, and returns the **grade (A–F)**, the per-category results, and the
|
|
94
|
+
tool-surface fingerprint. Use **`verify_attestation`** instead to read a grade
|
|
95
|
+
that's already published.
|
|
99
96
|
|
|
100
97
|
`run_litmus` launches the target server's code to exercise it (egress-sandboxed
|
|
101
|
-
when Docker is present). It needs no wallet or RPC
|
|
98
|
+
when Docker is present). It needs no wallet or RPC.
|
|
102
99
|
|
|
103
100
|
## Library
|
|
104
101
|
|
|
@@ -33,39 +33,6 @@ function truncate(s, n) {
|
|
|
33
33
|
return s.length > n ? `${s.slice(0, n)}\u2026` : s;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
// ../cli/src/api.ts
|
|
37
|
-
var DEFAULT_BASE = "https://polygraph.so";
|
|
38
|
-
function apiBaseUrl() {
|
|
39
|
-
const override = process.env.POLYGRAPH_API_URL;
|
|
40
|
-
if (!override || override.length === 0) return DEFAULT_BASE;
|
|
41
|
-
const trimmed = override.replace(/\/+$/, "");
|
|
42
|
-
let u;
|
|
43
|
-
try {
|
|
44
|
-
u = new URL(trimmed);
|
|
45
|
-
} catch {
|
|
46
|
-
throw new Error(`POLYGRAPH_API_URL is not a valid URL: ${override}`);
|
|
47
|
-
}
|
|
48
|
-
const isLoopback = u.hostname === "localhost" || u.hostname === "127.0.0.1" || u.hostname === "::1";
|
|
49
|
-
if (u.protocol !== "https:" && !(u.protocol === "http:" && isLoopback)) {
|
|
50
|
-
throw new Error(`POLYGRAPH_API_URL must use https (http allowed only for localhost): ${override}`);
|
|
51
|
-
}
|
|
52
|
-
return trimmed;
|
|
53
|
-
}
|
|
54
|
-
function pinUrl() {
|
|
55
|
-
return `${apiBaseUrl()}/api/pin`;
|
|
56
|
-
}
|
|
57
|
-
function attestationsUrl() {
|
|
58
|
-
return `${apiBaseUrl()}/api/attestations`;
|
|
59
|
-
}
|
|
60
|
-
function mintUrl(params) {
|
|
61
|
-
const u = new URL(`${apiBaseUrl()}/mint`);
|
|
62
|
-
u.searchParams.set("cid", params.cid);
|
|
63
|
-
u.searchParams.set("ref", params.ref);
|
|
64
|
-
u.searchParams.set("fp", params.fp);
|
|
65
|
-
if (params.ver) u.searchParams.set("ver", params.ver);
|
|
66
|
-
return u.toString();
|
|
67
|
-
}
|
|
68
|
-
|
|
69
36
|
// ../cli/src/litmus.ts
|
|
70
37
|
async function runLitmusCli(args) {
|
|
71
38
|
const json = args.includes("--json");
|
|
@@ -82,7 +49,6 @@ async function runLitmusCli(args) {
|
|
|
82
49
|
try {
|
|
83
50
|
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
|
84
51
|
process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
|
|
85
|
-
await maybePin(bundle, json);
|
|
86
52
|
return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
|
|
87
53
|
} catch (err) {
|
|
88
54
|
process.stderr.write(`\u2192 litmus failed: ${err instanceof Error ? err.message : String(err)}
|
|
@@ -144,37 +110,9 @@ function tsxCli() {
|
|
|
144
110
|
const rel = typeof bin === "string" ? bin : bin.tsx ?? "./dist/cli.mjs";
|
|
145
111
|
return path.join(dir, rel);
|
|
146
112
|
}
|
|
147
|
-
async function maybePin(bundle, json = false) {
|
|
148
|
-
if (!process.env.POLYGRAPH_API_URL) return;
|
|
149
|
-
const note = (line) => (json ? process.stderr : process.stdout).write(line);
|
|
150
|
-
try {
|
|
151
|
-
const cid = await pinBundle(bundle);
|
|
152
|
-
note(`\u2192 pinned ${cid}
|
|
153
|
-
`);
|
|
154
|
-
note(`\u2192 mint ${mintUrl({ cid, ref: bundle.serverRef, fp: bundle.toolDefsFingerprint, ver: bundle.resolvedVersion })}
|
|
155
|
-
`);
|
|
156
|
-
} catch (err) {
|
|
157
|
-
note(`\u2192 pin skipped: ${err instanceof Error ? err.message : String(err)}
|
|
158
|
-
`);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
async function pinBundle(bundle) {
|
|
162
|
-
const res = await fetch(pinUrl(), {
|
|
163
|
-
method: "POST",
|
|
164
|
-
headers: { "content-type": "application/json" },
|
|
165
|
-
body: canonicalStringify(bundle)
|
|
166
|
-
});
|
|
167
|
-
if (!res.ok) throw new Error(`pin endpoint returned ${res.status}`);
|
|
168
|
-
const data = await res.json();
|
|
169
|
-
if (!data.cid) throw new Error("pin response missing cid");
|
|
170
|
-
return data.cid;
|
|
171
|
-
}
|
|
172
113
|
|
|
173
114
|
export {
|
|
174
|
-
attestationsUrl,
|
|
175
|
-
mintUrl,
|
|
176
115
|
runLitmusCli,
|
|
177
116
|
parseAuthFlags,
|
|
178
|
-
resolveTarget
|
|
179
|
-
pinBundle
|
|
117
|
+
resolveTarget
|
|
180
118
|
};
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
|
-
mintUrl,
|
|
3
|
-
pinBundle,
|
|
4
2
|
resolveTarget
|
|
5
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-BIALP22F.js";
|
|
6
4
|
import {
|
|
7
5
|
runLitmus
|
|
8
6
|
} from "./chunk-2K6T4FZX.js";
|
|
@@ -137,44 +135,23 @@ var RUN_LITMUS_TOOL_DESCRIPTION = [
|
|
|
137
135
|
"for egress when Docker is available). It is not a passive lookup \u2014 for that,",
|
|
138
136
|
"use `verify_attestation`. It needs no wallet or RPC.",
|
|
139
137
|
"",
|
|
140
|
-
"When POLYGRAPH_API_URL is configured the evidence is pinned and the result",
|
|
141
|
-
"includes a `mint` URL: open it in a browser, connect a wallet, and sign to",
|
|
142
|
-
"publish the grade onchain as an EAS attestation. Signing is intentionally not",
|
|
143
|
-
"headless.",
|
|
144
|
-
"",
|
|
145
138
|
"Input: server_ref \u2014 a registry ref (npm/@scope/server), an https:// MCP URL,",
|
|
146
139
|
"or a local path to an MCP entry file. If Docker is unavailable, C-02 is",
|
|
147
140
|
"skipped and the grade is capped at B for that run."
|
|
148
141
|
].join("\n");
|
|
149
142
|
var runLitmusInputShape = {
|
|
150
|
-
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
|
|
151
|
-
pin: z.boolean().optional().describe("When true (default) and POLYGRAPH_API_URL is set, pin the evidence and return a mint hand-off URL. Set false to grade only.")
|
|
143
|
+
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
|
|
152
144
|
};
|
|
153
|
-
async function handleRunLitmus({ server_ref
|
|
145
|
+
async function handleRunLitmus({ server_ref }) {
|
|
154
146
|
try {
|
|
155
147
|
const bundle = await runLitmus(resolveTarget(server_ref));
|
|
156
|
-
const payload =
|
|
148
|
+
const payload = summarize(bundle);
|
|
157
149
|
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
|
|
158
150
|
} catch (err) {
|
|
159
151
|
const message = err instanceof Error ? err.message : String(err);
|
|
160
152
|
return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
|
|
161
153
|
}
|
|
162
154
|
}
|
|
163
|
-
async function mintHandoff(bundle, pin) {
|
|
164
|
-
if (pin === false || !process.env.POLYGRAPH_API_URL) {
|
|
165
|
-
return { available: false, reason: "Set POLYGRAPH_API_URL to pin the evidence and get a mint hand-off URL." };
|
|
166
|
-
}
|
|
167
|
-
try {
|
|
168
|
-
const cid = await pinBundle(bundle);
|
|
169
|
-
return {
|
|
170
|
-
url: mintUrl({ cid, ref: bundle.serverRef, fp: bundle.toolDefsFingerprint, ver: bundle.resolvedVersion }),
|
|
171
|
-
cid,
|
|
172
|
-
instruction: "Open this URL in a browser, connect your wallet, and sign to mint the onchain EAS attestation. Signing cannot be done headlessly."
|
|
173
|
-
};
|
|
174
|
-
} catch (err) {
|
|
175
|
-
return { available: false, reason: `pin failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
155
|
function summarize(b) {
|
|
179
156
|
const find = (code) => b.categories.find((c) => c.code === code);
|
|
180
157
|
const categories = ["C-01", "C-02", "C-03"].map((code) => {
|
package/dist/cli.js
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
|
-
attestationsUrl,
|
|
4
3
|
runLitmusCli
|
|
5
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-BIALP22F.js";
|
|
6
5
|
import {
|
|
7
6
|
parseServerRef,
|
|
8
7
|
serverKey
|
|
@@ -13,6 +12,28 @@ import { readFileSync } from "fs";
|
|
|
13
12
|
import { fileURLToPath } from "url";
|
|
14
13
|
import { dirname, join } from "path";
|
|
15
14
|
|
|
15
|
+
// ../cli/src/api.ts
|
|
16
|
+
var DEFAULT_BASE = "https://polygraph.so";
|
|
17
|
+
function apiBaseUrl() {
|
|
18
|
+
const override = process.env.POLYGRAPH_API_URL;
|
|
19
|
+
if (!override || override.length === 0) return DEFAULT_BASE;
|
|
20
|
+
const trimmed = override.replace(/\/+$/, "");
|
|
21
|
+
let u;
|
|
22
|
+
try {
|
|
23
|
+
u = new URL(trimmed);
|
|
24
|
+
} catch {
|
|
25
|
+
throw new Error(`POLYGRAPH_API_URL is not a valid URL: ${override}`);
|
|
26
|
+
}
|
|
27
|
+
const isLoopback = u.hostname === "localhost" || u.hostname === "127.0.0.1" || u.hostname === "::1";
|
|
28
|
+
if (u.protocol !== "https:" && !(u.protocol === "http:" && isLoopback)) {
|
|
29
|
+
throw new Error(`POLYGRAPH_API_URL must use https (http allowed only for localhost): ${override}`);
|
|
30
|
+
}
|
|
31
|
+
return trimmed;
|
|
32
|
+
}
|
|
33
|
+
function attestationsUrl() {
|
|
34
|
+
return `${apiBaseUrl()}/api/attestations`;
|
|
35
|
+
}
|
|
36
|
+
|
|
16
37
|
// ../cli/src/check.ts
|
|
17
38
|
function checkQuery(rawRef) {
|
|
18
39
|
try {
|
package/dist/index.d.ts
CHANGED
|
@@ -196,7 +196,7 @@ interface ConnectOptions {
|
|
|
196
196
|
httpHeaders?: Record<string, string>;
|
|
197
197
|
/**
|
|
198
198
|
* stdio execution mode. "none" (default) launches the target on the host;
|
|
199
|
-
* "docker" runs an npm target ONLY inside the hardened container
|
|
199
|
+
* "docker" runs an npm target ONLY inside the hardened container and
|
|
200
200
|
* throws IsolationUnsupportedError for any other stdio kind. http targets are
|
|
201
201
|
* unaffected (isolation is stdio-only).
|
|
202
202
|
*/
|
|
@@ -527,13 +527,12 @@ declare function liveFingerprint(target: TargetInput): Promise<LiveTarget>;
|
|
|
527
527
|
|
|
528
528
|
/**
|
|
529
529
|
* `run_litmus` — run the open behavioral harness end-to-end against an MCP
|
|
530
|
-
* server and return the grade
|
|
531
|
-
*
|
|
530
|
+
* server and return the grade and the evidence. Brand-voiced: plain, exact, no
|
|
531
|
+
* overclaim.
|
|
532
532
|
*
|
|
533
533
|
* Unlike `verify_attestation` (a passive onchain read), this tool LAUNCHES the
|
|
534
534
|
* target server's code to exercise it — sandboxed for egress when Docker is
|
|
535
|
-
* present. It needs no wallet or RPC
|
|
536
|
-
* browser via the returned URL) requires a wallet.
|
|
535
|
+
* present. It needs no wallet or RPC.
|
|
537
536
|
*/
|
|
538
537
|
|
|
539
538
|
declare const RUN_LITMUS_TOOL_NAME = "run_litmus";
|
|
@@ -541,11 +540,9 @@ declare const RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server"
|
|
|
541
540
|
declare const RUN_LITMUS_TOOL_DESCRIPTION: string;
|
|
542
541
|
declare const runLitmusInputShape: {
|
|
543
542
|
server_ref: z.ZodString;
|
|
544
|
-
pin: z.ZodOptional<z.ZodBoolean>;
|
|
545
543
|
};
|
|
546
|
-
declare function handleRunLitmus({ server_ref
|
|
544
|
+
declare function handleRunLitmus({ server_ref }: {
|
|
547
545
|
server_ref: string;
|
|
548
|
-
pin?: boolean;
|
|
549
546
|
}): Promise<{
|
|
550
547
|
content: {
|
|
551
548
|
type: "text";
|
|
@@ -565,7 +562,6 @@ declare function handleRunLitmus({ server_ref, pin }: {
|
|
|
565
562
|
* harness locally and print the grade. The heavy harness (`@polygraph/probes`)
|
|
566
563
|
* is loaded lazily so the zero-dep `check`/`list` fast path stays intact.
|
|
567
564
|
*/
|
|
568
|
-
|
|
569
565
|
type StdioCommand = {
|
|
570
566
|
command: string;
|
|
571
567
|
args: string[];
|
package/dist/index.js
CHANGED
|
@@ -14,11 +14,11 @@ import {
|
|
|
14
14
|
rpcUrl,
|
|
15
15
|
runLitmusInputShape,
|
|
16
16
|
selectedNetwork
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-JK3UGN2G.js";
|
|
18
18
|
import {
|
|
19
19
|
parseAuthFlags,
|
|
20
20
|
resolveTarget
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-BIALP22F.js";
|
|
22
22
|
import {
|
|
23
23
|
assembleBundle,
|
|
24
24
|
canaryMatch,
|
package/dist/mcp.js
CHANGED
|
@@ -7,8 +7,8 @@ import {
|
|
|
7
7
|
readAttestation,
|
|
8
8
|
runLitmusInputShape,
|
|
9
9
|
selectedNetwork
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-JK3UGN2G.js";
|
|
11
|
+
import "./chunk-BIALP22F.js";
|
|
12
12
|
import "./chunk-2K6T4FZX.js";
|
|
13
13
|
import {
|
|
14
14
|
parseServerRef,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data)
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
7
7
|
"repository": {
|
|
@@ -58,9 +58,9 @@
|
|
|
58
58
|
"typescript": "^5.9.3",
|
|
59
59
|
"vitest": "^2.1.0",
|
|
60
60
|
"@polygraph/core": "0.0.0",
|
|
61
|
+
"@polygraph/onchain": "0.0.0",
|
|
61
62
|
"@polygraph/probes": "0.0.0",
|
|
62
63
|
"@polygraph/agent": "0.0.0",
|
|
63
|
-
"@polygraph/onchain": "0.0.0",
|
|
64
64
|
"@polygraph/mcp": "0.0.0",
|
|
65
65
|
"@polygraph/cli": "0.0.0"
|
|
66
66
|
},
|