@polygraphso/litmus 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -15
- package/dist/{chunk-SAZKXB35.js → chunk-K7UEK2BA.js} +2 -2
- package/dist/{chunk-2K6T4FZX.js → chunk-MB5EPL2V.js} +319 -115
- package/dist/{chunk-MQC54LFV.js → chunk-UA4BIHP4.js} +6 -29
- package/dist/{chunk-6QM4RK25.js → chunk-WBXHDYIV.js} +3 -65
- package/dist/cli.js +24 -3
- package/dist/index.d.ts +23 -16
- package/dist/index.js +4 -4
- package/dist/mcp.js +4 -4
- package/dist/{src-XIEFSTXC.js → src-PTK3WEGQ.js} +2 -2
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -5,9 +5,8 @@ The behavioral **litmus** harness for MCP servers, from [polygraph.so](https://p
|
|
|
5
5
|
It connects to an MCP server the way an agent would, fingerprints its exact tool
|
|
6
6
|
surface, and runs three probe categories — **C-01** tool-output injection, **C-02**
|
|
7
7
|
permission/egress (in a hardened default-deny Docker sandbox), **C-03**
|
|
8
|
-
sensitive-data handling (planted canaries) — then grades the server **A–F
|
|
9
|
-
|
|
10
|
-
browser flow where you sign an onchain EAS attestation on Base.
|
|
8
|
+
sensitive-data handling (planted canaries) — then grades the server **A–F** and
|
|
9
|
+
produces a deterministic, content-addressed evidence bundle.
|
|
11
10
|
|
|
12
11
|
A passing grade is a measurement, not a guarantee. The methodology and its
|
|
13
12
|
disclosed limits live at [polygraph.so](https://polygraph.so).
|
|
@@ -29,7 +28,6 @@ and the grade is capped at **B** for that run.
|
|
|
29
28
|
```bash
|
|
30
29
|
polygraphso-litmus litmus <registry-ref | https-url | path-to-mcp> # grade a server
|
|
31
30
|
polygraphso-litmus litmus --json <ref> # machine-readable evidence bundle
|
|
32
|
-
polygraphso-litmus challenge <attestation-uid> <ref> # re-run to dispute a published grade
|
|
33
31
|
polygraphso-litmus check <ref> # look up a published grade
|
|
34
32
|
```
|
|
35
33
|
|
|
@@ -41,7 +39,9 @@ polygraphso-litmus litmus https://example.com/mcp
|
|
|
41
39
|
```
|
|
42
40
|
|
|
43
41
|
The `litmus` command exits non-zero on a failing grade (D/F), so it scripts in CI.
|
|
44
|
-
|
|
42
|
+
|
|
43
|
+
To dispute a published grade, just re-run `litmus` against the same server: the harness is
|
|
44
|
+
open and deterministic, so a re-run reproduces the grade — or refutes it.
|
|
45
45
|
|
|
46
46
|
## Use it from an AI agent (MCP server)
|
|
47
47
|
|
|
@@ -49,13 +49,13 @@ The package ships a stdio MCP server, `polygraphso-litmus-mcp`, so it works in a
|
|
|
49
49
|
MCP-capable client. It exposes two tools:
|
|
50
50
|
|
|
51
51
|
- **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end),
|
|
52
|
-
and return the grade
|
|
52
|
+
and return the grade and the evidence.
|
|
53
53
|
- **`verify_attestation`** — passively read a server's *already-published* grade
|
|
54
54
|
before trusting or paying it.
|
|
55
55
|
|
|
56
56
|
**Prerequisites:** Node ≥ 18. Docker is optional (without it, C-02 egress is
|
|
57
|
-
skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so`
|
|
58
|
-
|
|
57
|
+
skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so` so
|
|
58
|
+
`verify_attestation` can resolve a server's published grade.
|
|
59
59
|
|
|
60
60
|
Add the server once, then just talk to your agent.
|
|
61
61
|
|
|
@@ -90,15 +90,12 @@ claude mcp add polygraph-litmus -e POLYGRAPH_API_URL=https://polygraph.so \
|
|
|
90
90
|
> Run polygraph against `npm/@modelcontextprotocol/server-filesystem` and tell me the grade.
|
|
91
91
|
|
|
92
92
|
The agent calls **`run_litmus`**, which launches that server in the harness, runs
|
|
93
|
-
C-01/C-02/C-03, and returns the **grade (A–F)**, the per-category results, the
|
|
94
|
-
tool-surface fingerprint
|
|
95
|
-
|
|
96
|
-
onchain as an EAS attestation. Signing is intentionally **not** headless: the agent
|
|
97
|
-
does the work, you approve the mint. Use **`verify_attestation`** instead to read a
|
|
98
|
-
grade that's already published.
|
|
93
|
+
C-01/C-02/C-03, and returns the **grade (A–F)**, the per-category results, and the
|
|
94
|
+
tool-surface fingerprint. Use **`verify_attestation`** instead to read a grade
|
|
95
|
+
that's already published.
|
|
99
96
|
|
|
100
97
|
`run_litmus` launches the target server's code to exercise it (egress-sandboxed
|
|
101
|
-
when Docker is present). It needs no wallet or RPC
|
|
98
|
+
when Docker is present). It needs no wallet or RPC.
|
|
102
99
|
|
|
103
100
|
## Library
|
|
104
101
|
|
|
@@ -3,10 +3,10 @@ import {
|
|
|
3
3
|
METHODOLOGY_VERSION,
|
|
4
4
|
parseServerRef,
|
|
5
5
|
serverKey
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-K7UEK2BA.js";
|
|
7
7
|
|
|
8
8
|
// ../probes/src/harness.ts
|
|
9
|
-
import { execFile as
|
|
9
|
+
import { execFile as execFile3 } from "child_process";
|
|
10
10
|
|
|
11
11
|
// ../probes/src/connect/index.ts
|
|
12
12
|
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
@@ -138,7 +138,7 @@ function resolveDockerDir() {
|
|
|
138
138
|
}
|
|
139
139
|
return fileURLToPath(new URL("../../docker", import.meta.url));
|
|
140
140
|
}
|
|
141
|
-
var RESOLVER_SCRIPT = `const p=require("path");const d="/stage/node_modules/"+
|
|
141
|
+
var RESOLVER_SCRIPT = `const p=require("path");const n=process.argv[1];const d="/stage/node_modules/"+n;let j;try{j=require(d+'/package.json')}catch{}let bins={};if(j){const b=j.bin;if(typeof b==="string"){bins[n.replace(/^@[^/]+\\//,"")]=p.join(d,b);}else if(b){for(const k in b){bins[k]=p.join(d,b[k]);}}}const version=j&&j.version?j.version:null;let declaredEgress=[];if(j&&j.polygraph&&Array.isArray(j.polygraph.egress)){declaredEgress=j.polygraph.egress.filter(function(x){return typeof x==="string"});}process.stdout.write(JSON.stringify({bins,version,declaredEgress}));`;
|
|
142
142
|
function labelFlags(runLabel) {
|
|
143
143
|
return runLabel ? ["--label", `${LABEL_KEY}=${runLabel}`] : [];
|
|
144
144
|
}
|
|
@@ -211,12 +211,16 @@ function resolverRunArgs(vol, image, pkgName, runLabel, runtime) {
|
|
|
211
211
|
function parseResolverOutput(output) {
|
|
212
212
|
try {
|
|
213
213
|
const rec = JSON.parse(output);
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
214
|
+
const bins = {};
|
|
215
|
+
if (rec.bins && typeof rec.bins === "object" && !Array.isArray(rec.bins)) {
|
|
216
|
+
for (const [k, v] of Object.entries(rec.bins)) {
|
|
217
|
+
if (typeof v === "string") bins[k] = v;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
const declaredEgress = Array.isArray(rec.declaredEgress) ? rec.declaredEgress.filter((x) => typeof x === "string") : [];
|
|
221
|
+
return { bins, version: typeof rec.version === "string" ? rec.version : null, declaredEgress };
|
|
218
222
|
} catch {
|
|
219
|
-
return {
|
|
223
|
+
return { bins: {}, version: null, declaredEgress: [] };
|
|
220
224
|
}
|
|
221
225
|
}
|
|
222
226
|
function buildImageArgs(pull) {
|
|
@@ -250,13 +254,13 @@ async function stageInto(vol, image, spec, pkgName, opts) {
|
|
|
250
254
|
try {
|
|
251
255
|
await docker(stageInstallArgs(vol, image, spec, opts.runLabel, runtime), 18e4);
|
|
252
256
|
const resolved = parseResolverOutput((await docker(resolverRunArgs(vol, image, pkgName, opts.runLabel, runtime))).trim());
|
|
253
|
-
if (
|
|
257
|
+
if (Object.keys(resolved.bins).length === 0) {
|
|
254
258
|
await cleanup();
|
|
255
259
|
throw new Error(
|
|
256
260
|
`target package ${pkgName} exposes no launchable bin under the sandbox policy (install scripts are skipped)`
|
|
257
261
|
);
|
|
258
262
|
}
|
|
259
|
-
return { volume: vol,
|
|
263
|
+
return { volume: vol, bins: resolved.bins, resolvedVersion: resolved.version, declaredEgress: resolved.declaredEgress, cleanup };
|
|
260
264
|
} catch (err) {
|
|
261
265
|
await cleanup();
|
|
262
266
|
throw err;
|
|
@@ -397,125 +401,228 @@ function resolveStagedVersion(requested, staged) {
|
|
|
397
401
|
return staged;
|
|
398
402
|
}
|
|
399
403
|
|
|
404
|
+
// ../probes/src/connect/bin-candidates.ts
|
|
405
|
+
var MCP_NAME = /mcp/i;
|
|
406
|
+
function orderBinCandidates(binNames, pkgName) {
|
|
407
|
+
const seen = /* @__PURE__ */ new Set();
|
|
408
|
+
const out = [];
|
|
409
|
+
const take = (name) => {
|
|
410
|
+
if (!seen.has(name)) {
|
|
411
|
+
seen.add(name);
|
|
412
|
+
out.push(name);
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
for (const n of binNames) if (MCP_NAME.test(n)) take(n);
|
|
416
|
+
for (const n of binNames) if (n === pkgName) take(n);
|
|
417
|
+
for (const n of binNames) take(n);
|
|
418
|
+
return out;
|
|
419
|
+
}
|
|
420
|
+
function parseNpmBins(stdout, pkgName) {
|
|
421
|
+
const trimmed = stdout.trim();
|
|
422
|
+
if (!trimmed) return [];
|
|
423
|
+
let v;
|
|
424
|
+
try {
|
|
425
|
+
v = JSON.parse(trimmed);
|
|
426
|
+
} catch {
|
|
427
|
+
return [];
|
|
428
|
+
}
|
|
429
|
+
if (typeof v === "string") return [pkgName];
|
|
430
|
+
if (v && typeof v === "object" && !Array.isArray(v)) return Object.keys(v);
|
|
431
|
+
return [];
|
|
432
|
+
}
|
|
433
|
+
var NoMcpBinError = class extends Error {
|
|
434
|
+
constructor(ref, tried) {
|
|
435
|
+
super(
|
|
436
|
+
`${ref}: no bin spoke MCP \u2014 tried ${tried.length ? tried.join(", ") : "no launchable bins"}. The target must be an MCP server; a CLI-only package can't be graded.`
|
|
437
|
+
);
|
|
438
|
+
this.name = "NoMcpBinError";
|
|
439
|
+
}
|
|
440
|
+
};
|
|
441
|
+
async function probeForMcpBin(ref, candidates, attempt) {
|
|
442
|
+
for (const bin of candidates) {
|
|
443
|
+
const result = await attempt(bin);
|
|
444
|
+
if (result !== null) return { bin, result };
|
|
445
|
+
}
|
|
446
|
+
throw new NoMcpBinError(ref, candidates);
|
|
447
|
+
}
|
|
448
|
+
|
|
400
449
|
// ../probes/src/connect/index.ts
|
|
450
|
+
import { execFile as execFile2 } from "child_process";
|
|
451
|
+
import { promisify } from "util";
|
|
401
452
|
import { randomUUID as randomUUID3 } from "crypto";
|
|
453
|
+
var execFileP = promisify(execFile2);
|
|
402
454
|
var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
|
|
403
455
|
async function connectTarget(input, opts = {}) {
|
|
404
456
|
const isolated = opts.isolation === "docker";
|
|
405
|
-
let kind;
|
|
406
|
-
let descriptor;
|
|
407
|
-
let serverRef;
|
|
408
|
-
let resolvedVersion = null;
|
|
409
|
-
let transport;
|
|
410
|
-
const teardownExtra = [];
|
|
411
457
|
if (typeof input !== "string") {
|
|
412
458
|
if (isolated) {
|
|
413
459
|
throw new IsolationUnsupportedError(
|
|
414
460
|
"docker isolation is unsupported for an explicit stdio command \u2014 only an npm ref can be containerized"
|
|
415
461
|
);
|
|
416
462
|
}
|
|
417
|
-
|
|
418
|
-
transport = new StdioClientTransport({
|
|
463
|
+
const transport2 = new StdioClientTransport({
|
|
419
464
|
command: input.command,
|
|
420
465
|
args: input.args ?? [],
|
|
421
466
|
env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
|
|
422
467
|
...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
|
|
423
468
|
});
|
|
424
469
|
const cmdline = [input.command, ...input.args ?? []].join(" ");
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
}
|
|
428
|
-
|
|
470
|
+
const client2 = await connectOrThrow(transport2);
|
|
471
|
+
return makeResult(client2, "stdio", { kind: "stdio", command: cmdline, url: null }, input.serverRef ?? cmdline, null, []);
|
|
472
|
+
}
|
|
473
|
+
if (/^https?:\/\//i.test(input)) {
|
|
429
474
|
await assertPublicHttpUrl(input);
|
|
430
475
|
const headers = opts.httpHeaders && Object.keys(opts.httpHeaders).length > 0 ? opts.httpHeaders : void 0;
|
|
431
|
-
|
|
476
|
+
const transport2 = new StreamableHTTPClientTransport(
|
|
432
477
|
new URL(input),
|
|
433
478
|
headers ? { requestInit: { headers }, fetch: sameOriginAuthFetch(input, headers) } : void 0
|
|
434
479
|
);
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
if (
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
480
|
+
const client2 = await connectOrThrow(transport2);
|
|
481
|
+
return makeResult(client2, "http", { kind: "http", command: null, url: input }, input, null, []);
|
|
482
|
+
}
|
|
483
|
+
const parsed = parseServerRef(input);
|
|
484
|
+
if (isolated) {
|
|
485
|
+
if (parsed.registry !== "npm") {
|
|
486
|
+
throw new IsolationUnsupportedError(
|
|
487
|
+
`docker isolation is unsupported for ${parsed.registry} refs \u2014 only npm refs can be containerized`
|
|
488
|
+
);
|
|
489
|
+
}
|
|
490
|
+
return connectIsolatedNpm(input, parsed, opts);
|
|
491
|
+
}
|
|
492
|
+
if (parsed.registry === "npm") {
|
|
493
|
+
return connectHostNpm(input, parsed, opts);
|
|
494
|
+
}
|
|
495
|
+
const launch = launchForRef(parsed);
|
|
496
|
+
const transport = new StdioClientTransport({
|
|
497
|
+
command: launch.command,
|
|
498
|
+
args: launch.args,
|
|
499
|
+
env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
|
|
500
|
+
...opts.seedCwd ? { cwd: opts.seedCwd } : {}
|
|
501
|
+
});
|
|
502
|
+
const client = await connectOrThrow(transport);
|
|
503
|
+
return makeResult(
|
|
504
|
+
client,
|
|
505
|
+
"stdio",
|
|
506
|
+
{ kind: "stdio", command: [launch.command, ...launch.args].join(" "), url: null },
|
|
507
|
+
serverKey(parsed),
|
|
508
|
+
parsed.version ?? null,
|
|
509
|
+
[]
|
|
510
|
+
);
|
|
511
|
+
}
|
|
512
|
+
async function connectHostNpm(ref, parsed, opts) {
|
|
513
|
+
const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
|
|
514
|
+
const serverRefVal = serverKey(parsed);
|
|
515
|
+
const resolvedVersion = parsed.version ?? null;
|
|
516
|
+
const env = { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} };
|
|
517
|
+
const cwd = opts.seedCwd ? { cwd: opts.seedCwd } : {};
|
|
518
|
+
const binNames = await fetchNpmBins(spec, parsed.name);
|
|
519
|
+
if (!binNames || binNames.length === 0) {
|
|
520
|
+
const args = ["-y", spec];
|
|
521
|
+
const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
|
|
522
|
+
const client = await connectOrThrow(transport);
|
|
523
|
+
return makeResult(client, "stdio", { kind: "stdio", command: ["npx", ...args].join(" "), url: null }, serverRefVal, resolvedVersion, []);
|
|
524
|
+
}
|
|
525
|
+
const candidates = orderBinCandidates(binNames, parsed.name);
|
|
526
|
+
const { result } = await probeForMcpBin(ref, candidates, async (bin) => {
|
|
527
|
+
const args = ["-y", "-p", spec, bin];
|
|
528
|
+
const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
|
|
529
|
+
const client = await tryConnect(transport);
|
|
530
|
+
return client ? { client, descriptor: { kind: "stdio", command: ["npx", ...args].join(" "), url: null } } : null;
|
|
531
|
+
});
|
|
532
|
+
return makeResult(result.client, "stdio", result.descriptor, serverRefVal, resolvedVersion, []);
|
|
533
|
+
}
|
|
534
|
+
async function connectIsolatedNpm(ref, parsed, opts) {
|
|
535
|
+
const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
|
|
536
|
+
const stageOpts = opts.runLabel ? { runLabel: opts.runLabel } : {};
|
|
537
|
+
await ensureImage();
|
|
538
|
+
let staged = null;
|
|
539
|
+
let seed = null;
|
|
540
|
+
try {
|
|
541
|
+
staged = await stageNpmPackage(spec, stageOpts);
|
|
542
|
+
if (!opts.seedCwd) {
|
|
543
|
+
throw new Error("docker isolation requires a canary seed directory (seedCwd)");
|
|
544
|
+
}
|
|
545
|
+
seed = await prepareSeedVolume(opts.seedCwd, stageOpts);
|
|
546
|
+
const resolvedVersion = resolveStagedVersion(parsed.version, staged.resolvedVersion);
|
|
547
|
+
const stagedPkg = staged;
|
|
548
|
+
const seedVol = seed;
|
|
549
|
+
const candidates = orderBinCandidates(Object.keys(stagedPkg.bins), parsed.name);
|
|
550
|
+
const { result } = await probeForMcpBin(ref, candidates, async (binName) => {
|
|
551
|
+
const launch = containerLaunch({
|
|
552
|
+
entry: stagedPkg.bins[binName],
|
|
553
|
+
stageVolume: stagedPkg.volume,
|
|
554
|
+
seedVolume: seedVol.volume,
|
|
555
|
+
// Canaries travel INTO the container via -e, NOT via the docker CLI's own env.
|
|
556
|
+
canaryEnv: opts.seedEnv ?? {},
|
|
557
|
+
...opts.runLabel ? { runLabel: opts.runLabel } : {},
|
|
558
|
+
...process.env.LITMUS_DOCKER_RUNTIME ? { runtime: process.env.LITMUS_DOCKER_RUNTIME } : {}
|
|
559
|
+
});
|
|
560
|
+
const containerName = `pg-connect-${randomUUID3().slice(0, 8)}`;
|
|
561
|
+
const namedArgs = [launch.args[0], "--name", containerName, ...launch.args.slice(1)];
|
|
562
|
+
const transport = new StdioClientTransport({
|
|
503
563
|
command: launch.command,
|
|
504
|
-
args:
|
|
505
|
-
env:
|
|
506
|
-
|
|
564
|
+
args: namedArgs,
|
|
565
|
+
env: getDefaultEnvironment()
|
|
566
|
+
// default env only: no host secrets, no canaries
|
|
507
567
|
});
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
568
|
+
const client = await tryConnect(transport);
|
|
569
|
+
if (!client) {
|
|
570
|
+
await docker(["rm", "-f", containerName]).then(() => {
|
|
571
|
+
}).catch(() => {
|
|
572
|
+
});
|
|
573
|
+
return null;
|
|
574
|
+
}
|
|
575
|
+
const descriptor = {
|
|
576
|
+
kind: "stdio",
|
|
577
|
+
command: recordedContainerCommand(launch.command, launch.args, {
|
|
578
|
+
stageVolume: stagedPkg.volume,
|
|
579
|
+
seedVolume: seedVol.volume
|
|
580
|
+
}),
|
|
581
|
+
url: null
|
|
582
|
+
};
|
|
583
|
+
return { client, descriptor, containerName };
|
|
584
|
+
});
|
|
585
|
+
const teardownExtra = [
|
|
586
|
+
() => docker(["rm", "-f", result.containerName]).then(() => {
|
|
587
|
+
}).catch(() => {
|
|
588
|
+
}),
|
|
589
|
+
staged.cleanup,
|
|
590
|
+
seed.cleanup
|
|
591
|
+
];
|
|
592
|
+
return makeResult(result.client, "stdio", result.descriptor, serverKey(parsed), resolvedVersion, teardownExtra);
|
|
593
|
+
} catch (err) {
|
|
594
|
+
if (seed) await seed.cleanup();
|
|
595
|
+
if (staged) await staged.cleanup();
|
|
596
|
+
throw err;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
async function fetchNpmBins(spec, pkgName) {
|
|
600
|
+
try {
|
|
601
|
+
const { stdout } = await execFileP("npm", ["view", spec, "bin", "--json"], { timeout: 2e4 });
|
|
602
|
+
return parseNpmBins(stdout, pkgName);
|
|
603
|
+
} catch {
|
|
604
|
+
return null;
|
|
511
605
|
}
|
|
606
|
+
}
|
|
607
|
+
async function tryConnect(transport) {
|
|
512
608
|
const client = new Client(CLIENT_INFO, { capabilities: {} });
|
|
513
609
|
try {
|
|
514
610
|
await withConnectTimeout(client.connect(transport), transport);
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
611
|
+
return client;
|
|
612
|
+
} catch {
|
|
613
|
+
try {
|
|
614
|
+
await client.close();
|
|
615
|
+
} catch {
|
|
616
|
+
}
|
|
617
|
+
return null;
|
|
518
618
|
}
|
|
619
|
+
}
|
|
620
|
+
async function connectOrThrow(transport) {
|
|
621
|
+
const client = new Client(CLIENT_INFO, { capabilities: {} });
|
|
622
|
+
await withConnectTimeout(client.connect(transport), transport);
|
|
623
|
+
return client;
|
|
624
|
+
}
|
|
625
|
+
function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardownExtra) {
|
|
519
626
|
return {
|
|
520
627
|
client,
|
|
521
628
|
kind,
|
|
@@ -549,10 +656,6 @@ async function withConnectTimeout(connecting, transport) {
|
|
|
549
656
|
}
|
|
550
657
|
}
|
|
551
658
|
function launchForRef(p) {
|
|
552
|
-
if (p.registry === "npm") {
|
|
553
|
-
const spec = (p.owner ? `${p.owner}/${p.name}` : p.name) + (p.version ? `@${p.version}` : "");
|
|
554
|
-
return { command: "npx", args: ["-y", spec] };
|
|
555
|
-
}
|
|
556
659
|
if (p.registry === "pypi") {
|
|
557
660
|
return { command: "uvx", args: [p.version ? `${p.name}@${p.version}` : p.name] };
|
|
558
661
|
}
|
|
@@ -929,9 +1032,29 @@ async function c01Injection(ctx) {
|
|
|
929
1032
|
|
|
930
1033
|
// ../probes/src/docker/egress-runner.ts
|
|
931
1034
|
import { randomUUID as randomUUID4 } from "crypto";
|
|
1035
|
+
|
|
1036
|
+
// ../probes/src/probes/host-match.ts
|
|
1037
|
+
function normalizeHost(h) {
|
|
1038
|
+
let s = h.trim().toLowerCase();
|
|
1039
|
+
const colon = s.indexOf(":");
|
|
1040
|
+
if (colon !== -1) s = s.slice(0, colon);
|
|
1041
|
+
if (s.endsWith(".")) s = s.slice(0, -1);
|
|
1042
|
+
return s;
|
|
1043
|
+
}
|
|
1044
|
+
function hostMatchesPattern(host, pattern) {
|
|
1045
|
+
const h = normalizeHost(host);
|
|
1046
|
+
const p = pattern.trim().toLowerCase();
|
|
1047
|
+
if (p.startsWith("*.")) {
|
|
1048
|
+
const suffix = p.slice(1);
|
|
1049
|
+
return h.endsWith(suffix) && h.length > suffix.length;
|
|
1050
|
+
}
|
|
1051
|
+
return h === p;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
// ../probes/src/docker/egress-runner.ts
|
|
932
1055
|
var IMAGE_TAG3 = "polygraph-egress-sniff:latest";
|
|
933
1056
|
function notRan(reason) {
|
|
934
|
-
return { ran: false, reason, attempts: [] };
|
|
1057
|
+
return { ran: false, reason, attempts: [], declaredEgress: [], baselineAllowlist: [] };
|
|
935
1058
|
}
|
|
936
1059
|
function parseSinkholeOutput(output) {
|
|
937
1060
|
const attempts = [];
|
|
@@ -963,6 +1086,40 @@ function egressToFindings(attempts) {
|
|
|
963
1086
|
...a.firstBytes !== void 0 ? { firstBytes: a.firstBytes } : {}
|
|
964
1087
|
}));
|
|
965
1088
|
}
|
|
1089
|
+
function correlateEgress(attempts) {
|
|
1090
|
+
const pendingDnsHosts = [];
|
|
1091
|
+
const out = [];
|
|
1092
|
+
for (const a of attempts) {
|
|
1093
|
+
if (a.kind === "dns") {
|
|
1094
|
+
out.push({ ...a, hostSource: a.host ? "given" : "none" });
|
|
1095
|
+
if (a.host) pendingDnsHosts.push(a.host);
|
|
1096
|
+
} else if (a.host) {
|
|
1097
|
+
out.push({ ...a, hostSource: "given" });
|
|
1098
|
+
} else {
|
|
1099
|
+
const host = pendingDnsHosts.shift();
|
|
1100
|
+
out.push({ ...a, ...host ? { host } : {}, hostSource: host ? "dns-correlation" : "none" });
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
return out;
|
|
1104
|
+
}
|
|
1105
|
+
function classifyEgress(correlated, allowlist) {
|
|
1106
|
+
return correlated.map((c) => {
|
|
1107
|
+
if (c.host !== void 0) {
|
|
1108
|
+
const matchedPattern = allowlist.find((p) => hostMatchesPattern(c.host, p));
|
|
1109
|
+
return matchedPattern ? { ...c, allowed: true, matchedPattern } : { ...c, allowed: false };
|
|
1110
|
+
}
|
|
1111
|
+
return { ...c, allowed: false };
|
|
1112
|
+
});
|
|
1113
|
+
}
|
|
1114
|
+
function egressAllowedFindings(classified) {
|
|
1115
|
+
return classified.filter((c) => c.allowed).map((c) => ({
|
|
1116
|
+
kind: "egress-allowed",
|
|
1117
|
+
severity: "low",
|
|
1118
|
+
match: `${c.host ?? "?"}${c.port ? `:${c.port}` : ""} (allowed: ${c.matchedPattern ?? "?"})`,
|
|
1119
|
+
...c.host !== void 0 ? { host: c.host } : {},
|
|
1120
|
+
...c.port !== void 0 ? { port: c.port } : {}
|
|
1121
|
+
}));
|
|
1122
|
+
}
|
|
966
1123
|
function egressCanaryFindings(attempts, canaries) {
|
|
967
1124
|
const findings = [];
|
|
968
1125
|
for (const a of attempts) {
|
|
@@ -1041,7 +1198,8 @@ async function runEgressProbe(ref, opts) {
|
|
|
1041
1198
|
if (msg.includes("exposes no launchable bin")) return notRan(msg);
|
|
1042
1199
|
throw err;
|
|
1043
1200
|
}
|
|
1044
|
-
const
|
|
1201
|
+
const vol = staged.volume;
|
|
1202
|
+
const entry = staged.bins[orderBinCandidates(Object.keys(staged.bins), parsed.name)[0]];
|
|
1045
1203
|
await docker(["network", "create", "--internal", ...label, net]);
|
|
1046
1204
|
await docker([
|
|
1047
1205
|
"run",
|
|
@@ -1081,7 +1239,13 @@ async function runEgressProbe(ref, opts) {
|
|
|
1081
1239
|
await conn.teardown();
|
|
1082
1240
|
}
|
|
1083
1241
|
const logs = await docker(["logs", sink]);
|
|
1084
|
-
return {
|
|
1242
|
+
return {
|
|
1243
|
+
ran: true,
|
|
1244
|
+
reason: null,
|
|
1245
|
+
attempts: parseSinkholeOutput(logs),
|
|
1246
|
+
declaredEgress: staged.declaredEgress,
|
|
1247
|
+
baselineAllowlist: opts.baselineAllowlist ?? []
|
|
1248
|
+
};
|
|
1085
1249
|
} catch (err) {
|
|
1086
1250
|
return notRan(`egress sandbox unavailable: ${err instanceof Error ? err.message : String(err)}`);
|
|
1087
1251
|
} finally {
|
|
@@ -1095,6 +1259,28 @@ async function runEgressProbe(ref, opts) {
|
|
|
1095
1259
|
}
|
|
1096
1260
|
}
|
|
1097
1261
|
|
|
1262
|
+
// ../probes/src/probes/egress-allowlist.ts
|
|
1263
|
+
var DEFAULT_EGRESS_BASELINE = [];
|
|
1264
|
+
function normalizePattern(p) {
|
|
1265
|
+
return p.trim().toLowerCase();
|
|
1266
|
+
}
|
|
1267
|
+
function parseAllowlistEnv(raw) {
|
|
1268
|
+
if (!raw) return [];
|
|
1269
|
+
return raw.split(",").map(normalizePattern).filter((s) => s.length > 0);
|
|
1270
|
+
}
|
|
1271
|
+
function effectiveAllowlist(baseline, declared) {
|
|
1272
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1273
|
+
const out = [];
|
|
1274
|
+
for (const p of [...baseline, ...declared]) {
|
|
1275
|
+
const n = normalizePattern(p);
|
|
1276
|
+
if (n.length > 0 && !seen.has(n)) {
|
|
1277
|
+
seen.add(n);
|
|
1278
|
+
out.push(n);
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
return out;
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1098
1284
|
// ../probes/src/probes/c02-egress.ts
|
|
1099
1285
|
function probe21Declaration(tools) {
|
|
1100
1286
|
const findings = [];
|
|
@@ -1113,8 +1299,18 @@ function probe21Declaration(tools) {
|
|
|
1113
1299
|
}
|
|
1114
1300
|
function probe22Egress(egress) {
|
|
1115
1301
|
if (!egress.ran) return { id: "2.2", status: "skipped", findings: [], reason: egress.reason };
|
|
1116
|
-
const
|
|
1117
|
-
|
|
1302
|
+
const allowlist = effectiveAllowlist(egress.baselineAllowlist, egress.declaredEgress);
|
|
1303
|
+
const classified = classifyEgress(correlateEgress(egress.attempts), allowlist);
|
|
1304
|
+
const overreach = classified.filter((c) => !c.allowed);
|
|
1305
|
+
const allowed = classified.filter((c) => c.allowed);
|
|
1306
|
+
const findings = [...egressToFindings(overreach), ...egressAllowedFindings(allowed)];
|
|
1307
|
+
if (overreach.length > 0) return { id: "2.2", status: "fail", findings };
|
|
1308
|
+
return {
|
|
1309
|
+
id: "2.2",
|
|
1310
|
+
status: "pass",
|
|
1311
|
+
findings,
|
|
1312
|
+
reason: allowed.length > 0 ? `${allowed.length} declared/baseline egress attempt(s) permitted; 0 overreach` : null
|
|
1313
|
+
};
|
|
1118
1314
|
}
|
|
1119
1315
|
function c02Permission(declaration, egress) {
|
|
1120
1316
|
const egressProbe = probe22Egress(egress);
|
|
@@ -1245,11 +1441,14 @@ function gradeFromCategories(categories) {
|
|
|
1245
1441
|
if (c02?.status === "fail") {
|
|
1246
1442
|
return {
|
|
1247
1443
|
grade: "D",
|
|
1248
|
-
rationale: "
|
|
1444
|
+
rationale: "Egress overreach (C-02 failed): reached a host outside its declared/baseline allowlist (or mislabeled a tool). No injection or data leak, so the grade caps at D."
|
|
1249
1445
|
};
|
|
1250
1446
|
}
|
|
1251
1447
|
if (c01?.status === "pass" && c02?.status === "pass" && c03?.status === "pass") {
|
|
1252
|
-
return {
|
|
1448
|
+
return {
|
|
1449
|
+
grade: "A",
|
|
1450
|
+
rationale: "All three categories passed. No injection, no data leak, and no egress overreach \u2014 declared/baseline egress, if any, was permitted (A means no overreach, not no network)."
|
|
1451
|
+
};
|
|
1253
1452
|
}
|
|
1254
1453
|
if (c01?.status === "pass") {
|
|
1255
1454
|
const note = skipped.length ? ` Not verified: ${skipped.join(", ")} (${skipped.map((c) => byCode(c)?.reason).filter(Boolean).join("; ")}).` : "";
|
|
@@ -1304,6 +1503,7 @@ function assembleBundle(input) {
|
|
|
1304
1503
|
async function runLitmus(target, opts = {}) {
|
|
1305
1504
|
const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
|
|
1306
1505
|
const ranAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1506
|
+
const baselineAllowlist = [...DEFAULT_EGRESS_BASELINE, ...parseAllowlistEnv(process.env.LITMUS_EGRESS_ALLOWLIST)];
|
|
1307
1507
|
const dockerAvailable = await checkDocker();
|
|
1308
1508
|
const canaries = mintCanaries();
|
|
1309
1509
|
const seedEnv = canaryEnv(canaries);
|
|
@@ -1344,10 +1544,12 @@ async function runLitmus(target, opts = {}) {
|
|
|
1344
1544
|
stateChangingTools,
|
|
1345
1545
|
allowStateChanging: opts.allowStateChanging ?? false
|
|
1346
1546
|
};
|
|
1347
|
-
const egress = dockerAvailable && typeof target === "string" && !/^https?:\/\//i.test(target) ? await runEgressProbe(target, { canaryEnv: seedEnv, ...opts.runLabel ? { runLabel: opts.runLabel } : {} }) : {
|
|
1547
|
+
const egress = dockerAvailable && typeof target === "string" && !/^https?:\/\//i.test(target) ? await runEgressProbe(target, { canaryEnv: seedEnv, baselineAllowlist, ...opts.runLabel ? { runLabel: opts.runLabel } : {} }) : {
|
|
1348
1548
|
ran: false,
|
|
1349
1549
|
reason: dockerAvailable ? "egress not run for this target" : "no sandbox (Docker unavailable)",
|
|
1350
|
-
attempts: []
|
|
1550
|
+
attempts: [],
|
|
1551
|
+
declaredEgress: [],
|
|
1552
|
+
baselineAllowlist: []
|
|
1351
1553
|
};
|
|
1352
1554
|
assertEgressRanUnderIsolation(egress, isolation, isStdio);
|
|
1353
1555
|
const categories = [
|
|
@@ -1359,7 +1561,9 @@ async function runLitmus(target, opts = {}) {
|
|
|
1359
1561
|
return assembleBundle({
|
|
1360
1562
|
serverRef: conn.serverRef,
|
|
1361
1563
|
resolvedVersion: conn.resolvedVersion,
|
|
1362
|
-
|
|
1564
|
+
// Surface the server's declared egress in the bundle (disclosure: a
|
|
1565
|
+
// declaration is not exoneration — the consumer/agent-gate can judge).
|
|
1566
|
+
target: egress.declaredEgress.length ? { ...conn.descriptor, declaredEgress: egress.declaredEgress } : conn.descriptor,
|
|
1363
1567
|
toolDefsFingerprint: fingerprint,
|
|
1364
1568
|
toolDefs: canonical,
|
|
1365
1569
|
categories,
|
|
@@ -1437,7 +1641,7 @@ function withTimeout(p, ms, label) {
|
|
|
1437
1641
|
}
|
|
1438
1642
|
function checkDocker() {
|
|
1439
1643
|
return new Promise((resolve) => {
|
|
1440
|
-
const child =
|
|
1644
|
+
const child = execFile3("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
|
|
1441
1645
|
child.on("error", () => resolve(false));
|
|
1442
1646
|
});
|
|
1443
1647
|
}
|
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import {
|
|
2
|
-
mintUrl,
|
|
3
|
-
pinBundle,
|
|
4
2
|
resolveTarget
|
|
5
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-WBXHDYIV.js";
|
|
6
4
|
import {
|
|
7
5
|
runLitmus
|
|
8
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-MB5EPL2V.js";
|
|
9
7
|
import {
|
|
10
8
|
CATEGORY_STATUS_UINT8,
|
|
11
9
|
METHODOLOGY_VERSION
|
|
12
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-K7UEK2BA.js";
|
|
13
11
|
|
|
14
12
|
// ../onchain/src/networks.ts
|
|
15
13
|
var NETWORKS = {
|
|
@@ -137,44 +135,23 @@ var RUN_LITMUS_TOOL_DESCRIPTION = [
|
|
|
137
135
|
"for egress when Docker is available). It is not a passive lookup \u2014 for that,",
|
|
138
136
|
"use `verify_attestation`. It needs no wallet or RPC.",
|
|
139
137
|
"",
|
|
140
|
-
"When POLYGRAPH_API_URL is configured the evidence is pinned and the result",
|
|
141
|
-
"includes a `mint` URL: open it in a browser, connect a wallet, and sign to",
|
|
142
|
-
"publish the grade onchain as an EAS attestation. Signing is intentionally not",
|
|
143
|
-
"headless.",
|
|
144
|
-
"",
|
|
145
138
|
"Input: server_ref \u2014 a registry ref (npm/@scope/server), an https:// MCP URL,",
|
|
146
139
|
"or a local path to an MCP entry file. If Docker is unavailable, C-02 is",
|
|
147
140
|
"skipped and the grade is capped at B for that run."
|
|
148
141
|
].join("\n");
|
|
149
142
|
var runLitmusInputShape = {
|
|
150
|
-
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
|
|
151
|
-
pin: z.boolean().optional().describe("When true (default) and POLYGRAPH_API_URL is set, pin the evidence and return a mint hand-off URL. Set false to grade only.")
|
|
143
|
+
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
|
|
152
144
|
};
|
|
153
|
-
async function handleRunLitmus({ server_ref
|
|
145
|
+
async function handleRunLitmus({ server_ref }) {
|
|
154
146
|
try {
|
|
155
147
|
const bundle = await runLitmus(resolveTarget(server_ref));
|
|
156
|
-
const payload =
|
|
148
|
+
const payload = summarize(bundle);
|
|
157
149
|
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
|
|
158
150
|
} catch (err) {
|
|
159
151
|
const message = err instanceof Error ? err.message : String(err);
|
|
160
152
|
return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
|
|
161
153
|
}
|
|
162
154
|
}
|
|
163
|
-
async function mintHandoff(bundle, pin) {
|
|
164
|
-
if (pin === false || !process.env.POLYGRAPH_API_URL) {
|
|
165
|
-
return { available: false, reason: "Set POLYGRAPH_API_URL to pin the evidence and get a mint hand-off URL." };
|
|
166
|
-
}
|
|
167
|
-
try {
|
|
168
|
-
const cid = await pinBundle(bundle);
|
|
169
|
-
return {
|
|
170
|
-
url: mintUrl({ cid, ref: bundle.serverRef, fp: bundle.toolDefsFingerprint, ver: bundle.resolvedVersion }),
|
|
171
|
-
cid,
|
|
172
|
-
instruction: "Open this URL in a browser, connect your wallet, and sign to mint the onchain EAS attestation. Signing cannot be done headlessly."
|
|
173
|
-
};
|
|
174
|
-
} catch (err) {
|
|
175
|
-
return { available: false, reason: `pin failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
155
|
function summarize(b) {
|
|
179
156
|
const find = (code) => b.categories.find((c) => c.code === code);
|
|
180
157
|
const categories = ["C-01", "C-02", "C-03"].map((code) => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
canonicalStringify
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-K7UEK2BA.js";
|
|
4
4
|
|
|
5
5
|
// ../cli/src/litmus.ts
|
|
6
6
|
import { existsSync } from "fs";
|
|
@@ -33,39 +33,6 @@ function truncate(s, n) {
|
|
|
33
33
|
return s.length > n ? `${s.slice(0, n)}\u2026` : s;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
// ../cli/src/api.ts
|
|
37
|
-
var DEFAULT_BASE = "https://polygraph.so";
|
|
38
|
-
function apiBaseUrl() {
|
|
39
|
-
const override = process.env.POLYGRAPH_API_URL;
|
|
40
|
-
if (!override || override.length === 0) return DEFAULT_BASE;
|
|
41
|
-
const trimmed = override.replace(/\/+$/, "");
|
|
42
|
-
let u;
|
|
43
|
-
try {
|
|
44
|
-
u = new URL(trimmed);
|
|
45
|
-
} catch {
|
|
46
|
-
throw new Error(`POLYGRAPH_API_URL is not a valid URL: ${override}`);
|
|
47
|
-
}
|
|
48
|
-
const isLoopback = u.hostname === "localhost" || u.hostname === "127.0.0.1" || u.hostname === "::1";
|
|
49
|
-
if (u.protocol !== "https:" && !(u.protocol === "http:" && isLoopback)) {
|
|
50
|
-
throw new Error(`POLYGRAPH_API_URL must use https (http allowed only for localhost): ${override}`);
|
|
51
|
-
}
|
|
52
|
-
return trimmed;
|
|
53
|
-
}
|
|
54
|
-
function pinUrl() {
|
|
55
|
-
return `${apiBaseUrl()}/api/pin`;
|
|
56
|
-
}
|
|
57
|
-
function attestationsUrl() {
|
|
58
|
-
return `${apiBaseUrl()}/api/attestations`;
|
|
59
|
-
}
|
|
60
|
-
function mintUrl(params) {
|
|
61
|
-
const u = new URL(`${apiBaseUrl()}/mint`);
|
|
62
|
-
u.searchParams.set("cid", params.cid);
|
|
63
|
-
u.searchParams.set("ref", params.ref);
|
|
64
|
-
u.searchParams.set("fp", params.fp);
|
|
65
|
-
if (params.ver) u.searchParams.set("ver", params.ver);
|
|
66
|
-
return u.toString();
|
|
67
|
-
}
|
|
68
|
-
|
|
69
36
|
// ../cli/src/litmus.ts
|
|
70
37
|
async function runLitmusCli(args) {
|
|
71
38
|
const json = args.includes("--json");
|
|
@@ -77,12 +44,11 @@ async function runLitmusCli(args) {
|
|
|
77
44
|
);
|
|
78
45
|
return 2;
|
|
79
46
|
}
|
|
80
|
-
const { runLitmus } = await import("./src-
|
|
47
|
+
const { runLitmus } = await import("./src-PTK3WEGQ.js");
|
|
81
48
|
const input = resolveTarget(target);
|
|
82
49
|
try {
|
|
83
50
|
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
|
84
51
|
process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
|
|
85
|
-
await maybePin(bundle, json);
|
|
86
52
|
return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
|
|
87
53
|
} catch (err) {
|
|
88
54
|
process.stderr.write(`\u2192 litmus failed: ${err instanceof Error ? err.message : String(err)}
|
|
@@ -144,37 +110,9 @@ function tsxCli() {
|
|
|
144
110
|
const rel = typeof bin === "string" ? bin : bin.tsx ?? "./dist/cli.mjs";
|
|
145
111
|
return path.join(dir, rel);
|
|
146
112
|
}
|
|
147
|
-
async function maybePin(bundle, json = false) {
|
|
148
|
-
if (!process.env.POLYGRAPH_API_URL) return;
|
|
149
|
-
const note = (line) => (json ? process.stderr : process.stdout).write(line);
|
|
150
|
-
try {
|
|
151
|
-
const cid = await pinBundle(bundle);
|
|
152
|
-
note(`\u2192 pinned ${cid}
|
|
153
|
-
`);
|
|
154
|
-
note(`\u2192 mint ${mintUrl({ cid, ref: bundle.serverRef, fp: bundle.toolDefsFingerprint, ver: bundle.resolvedVersion })}
|
|
155
|
-
`);
|
|
156
|
-
} catch (err) {
|
|
157
|
-
note(`\u2192 pin skipped: ${err instanceof Error ? err.message : String(err)}
|
|
158
|
-
`);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
async function pinBundle(bundle) {
|
|
162
|
-
const res = await fetch(pinUrl(), {
|
|
163
|
-
method: "POST",
|
|
164
|
-
headers: { "content-type": "application/json" },
|
|
165
|
-
body: canonicalStringify(bundle)
|
|
166
|
-
});
|
|
167
|
-
if (!res.ok) throw new Error(`pin endpoint returned ${res.status}`);
|
|
168
|
-
const data = await res.json();
|
|
169
|
-
if (!data.cid) throw new Error("pin response missing cid");
|
|
170
|
-
return data.cid;
|
|
171
|
-
}
|
|
172
113
|
|
|
173
114
|
export {
|
|
174
|
-
attestationsUrl,
|
|
175
|
-
mintUrl,
|
|
176
115
|
runLitmusCli,
|
|
177
116
|
parseAuthFlags,
|
|
178
|
-
resolveTarget
|
|
179
|
-
pinBundle
|
|
117
|
+
resolveTarget
|
|
180
118
|
};
|
package/dist/cli.js
CHANGED
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
|
-
attestationsUrl,
|
|
4
3
|
runLitmusCli
|
|
5
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-WBXHDYIV.js";
|
|
6
5
|
import {
|
|
7
6
|
parseServerRef,
|
|
8
7
|
serverKey
|
|
9
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-K7UEK2BA.js";
|
|
10
9
|
|
|
11
10
|
// src/cli.ts
|
|
12
11
|
import { readFileSync } from "fs";
|
|
13
12
|
import { fileURLToPath } from "url";
|
|
14
13
|
import { dirname, join } from "path";
|
|
15
14
|
|
|
15
|
+
// ../cli/src/api.ts
|
|
16
|
+
var DEFAULT_BASE = "https://polygraph.so";
|
|
17
|
+
function apiBaseUrl() {
|
|
18
|
+
const override = process.env.POLYGRAPH_API_URL;
|
|
19
|
+
if (!override || override.length === 0) return DEFAULT_BASE;
|
|
20
|
+
const trimmed = override.replace(/\/+$/, "");
|
|
21
|
+
let u;
|
|
22
|
+
try {
|
|
23
|
+
u = new URL(trimmed);
|
|
24
|
+
} catch {
|
|
25
|
+
throw new Error(`POLYGRAPH_API_URL is not a valid URL: ${override}`);
|
|
26
|
+
}
|
|
27
|
+
const isLoopback = u.hostname === "localhost" || u.hostname === "127.0.0.1" || u.hostname === "::1";
|
|
28
|
+
if (u.protocol !== "https:" && !(u.protocol === "http:" && isLoopback)) {
|
|
29
|
+
throw new Error(`POLYGRAPH_API_URL must use https (http allowed only for localhost): ${override}`);
|
|
30
|
+
}
|
|
31
|
+
return trimmed;
|
|
32
|
+
}
|
|
33
|
+
function attestationsUrl() {
|
|
34
|
+
return `${apiBaseUrl()}/api/attestations`;
|
|
35
|
+
}
|
|
36
|
+
|
|
16
37
|
// ../cli/src/check.ts
|
|
17
38
|
function checkQuery(rawRef) {
|
|
18
39
|
try {
|
package/dist/index.d.ts
CHANGED
|
@@ -11,13 +11,16 @@ import { z } from 'zod';
|
|
|
11
11
|
/** Package registries a server ref can name. */
|
|
12
12
|
type Registry = "npm" | "pypi" | "github";
|
|
13
13
|
/** The methodology this build implements; embedded in every bundle + attestation.
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
|
|
14
|
+
* v3 reframes C-02 probe 2.2 from default-deny (any egress fails) to OVERREACH:
|
|
15
|
+
* egress to a host the server declared (`polygraph.egress`) or on the operator
|
|
16
|
+
* baseline allowlist is permitted; only egress beyond that union fails. A
|
|
17
|
+
* pass/fail-semantics change → version bumps per litmus-test §8. NOTE: under v3,
|
|
18
|
+
* grade "A" means "no overreach", NOT "no network". (v2 added probe 2.1.) */
|
|
19
|
+
declare const METHODOLOGY_VERSION: "litmus-v3";
|
|
17
20
|
/** Evidence-bundle format version (owned by onchain-proof-spec §2).
|
|
18
|
-
* 1.
|
|
19
|
-
*
|
|
20
|
-
declare const BUNDLE_SCHEMA_VERSION: "1.
|
|
21
|
+
* 1.2.0 adds the optional `target.declaredEgress` field and the `egress-allowed`
|
|
22
|
+
* finding kind (litmus-v3); 1.1.0 adds `harness.stdioIsolation`; older remain valid. */
|
|
23
|
+
declare const BUNDLE_SCHEMA_VERSION: "1.2.0";
|
|
21
24
|
type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
|
|
22
25
|
/** Probe IDs carry their family number (1=injection, 2=permission, 4=sensitive). */
|
|
23
26
|
type ProbeId = "1.1" | "1.2" | "2.1" | "2.2" | "4.1" | "4.2";
|
|
@@ -27,7 +30,7 @@ type LitmusGrade = "A" | "B" | "C" | "D" | "F";
|
|
|
27
30
|
type Severity = "low" | "medium" | "high";
|
|
28
31
|
/** uint8 encoding for per-category verdicts on the attestation (onchain-proof-spec §5). */
|
|
29
32
|
declare const CATEGORY_STATUS_UINT8: Record<CategoryStatus, number>;
|
|
30
|
-
type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "permission-mislabel";
|
|
33
|
+
type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "egress-allowed" | "permission-mislabel";
|
|
31
34
|
interface Finding {
|
|
32
35
|
kind: FindingKind;
|
|
33
36
|
severity: Severity;
|
|
@@ -61,6 +64,9 @@ interface TargetDescriptor {
|
|
|
61
64
|
command?: string | null;
|
|
62
65
|
/** http: the remote MCP URL. */
|
|
63
66
|
url?: string | null;
|
|
67
|
+
/** The server's declared egress host patterns (`polygraph.egress`, C-02
|
|
68
|
+
* litmus-v3). Present only when non-empty. Disclosure, not exoneration. */
|
|
69
|
+
declaredEgress?: string[];
|
|
64
70
|
}
|
|
65
71
|
/** The canonicalized fields of a tool that the fingerprint hashes. */
|
|
66
72
|
interface ToolDef {
|
|
@@ -159,6 +165,11 @@ declare function canonicalStringify(value: unknown): string;
|
|
|
159
165
|
* - an explicit `{command,args}` (for in-repo demo servers and tests) launches
|
|
160
166
|
* over stdio directly.
|
|
161
167
|
*
|
|
168
|
+
* For an npm ref the package may ship several bins (e.g. a CLI plus a `*-mcp`
|
|
169
|
+
* server) or a default bin that isn't an MCP server. We enumerate the bins and
|
|
170
|
+
* PROBE them in order (mcp-named first), keeping the first that completes the MCP
|
|
171
|
+
* handshake — so a CLI-first or multi-bin package still grades.
|
|
172
|
+
*
|
|
162
173
|
* Returns the connected `Client`, a descriptor for the evidence bundle, and a
|
|
163
174
|
* teardown. The normal MCP handshake (`initialize`) happens inside `connect()`.
|
|
164
175
|
*/
|
|
@@ -196,7 +207,7 @@ interface ConnectOptions {
|
|
|
196
207
|
httpHeaders?: Record<string, string>;
|
|
197
208
|
/**
|
|
198
209
|
* stdio execution mode. "none" (default) launches the target on the host;
|
|
199
|
-
* "docker" runs an npm target ONLY inside the hardened container
|
|
210
|
+
* "docker" runs an npm target ONLY inside the hardened container and
|
|
200
211
|
* throws IsolationUnsupportedError for any other stdio kind. http targets are
|
|
201
212
|
* unaffected (isolation is stdio-only).
|
|
202
213
|
*/
|
|
@@ -527,13 +538,12 @@ declare function liveFingerprint(target: TargetInput): Promise<LiveTarget>;
|
|
|
527
538
|
|
|
528
539
|
/**
|
|
529
540
|
* `run_litmus` — run the open behavioral harness end-to-end against an MCP
|
|
530
|
-
* server and return the grade
|
|
531
|
-
*
|
|
541
|
+
* server and return the grade and the evidence. Brand-voiced: plain, exact, no
|
|
542
|
+
* overclaim.
|
|
532
543
|
*
|
|
533
544
|
* Unlike `verify_attestation` (a passive onchain read), this tool LAUNCHES the
|
|
534
545
|
* target server's code to exercise it — sandboxed for egress when Docker is
|
|
535
|
-
* present. It needs no wallet or RPC
|
|
536
|
-
* browser via the returned URL) requires a wallet.
|
|
546
|
+
* present. It needs no wallet or RPC.
|
|
537
547
|
*/
|
|
538
548
|
|
|
539
549
|
declare const RUN_LITMUS_TOOL_NAME = "run_litmus";
|
|
@@ -541,11 +551,9 @@ declare const RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server"
|
|
|
541
551
|
declare const RUN_LITMUS_TOOL_DESCRIPTION: string;
|
|
542
552
|
declare const runLitmusInputShape: {
|
|
543
553
|
server_ref: z.ZodString;
|
|
544
|
-
pin: z.ZodOptional<z.ZodBoolean>;
|
|
545
554
|
};
|
|
546
|
-
declare function handleRunLitmus({ server_ref
|
|
555
|
+
declare function handleRunLitmus({ server_ref }: {
|
|
547
556
|
server_ref: string;
|
|
548
|
-
pin?: boolean;
|
|
549
557
|
}): Promise<{
|
|
550
558
|
content: {
|
|
551
559
|
type: "text";
|
|
@@ -565,7 +573,6 @@ declare function handleRunLitmus({ server_ref, pin }: {
|
|
|
565
573
|
* harness locally and print the grade. The heavy harness (`@polygraph/probes`)
|
|
566
574
|
* is loaded lazily so the zero-dep `check`/`list` fast path stays intact.
|
|
567
575
|
*/
|
|
568
|
-
|
|
569
576
|
type StdioCommand = {
|
|
570
577
|
command: string;
|
|
571
578
|
args: string[];
|
package/dist/index.js
CHANGED
|
@@ -14,11 +14,11 @@ import {
|
|
|
14
14
|
rpcUrl,
|
|
15
15
|
runLitmusInputShape,
|
|
16
16
|
selectedNetwork
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-UA4BIHP4.js";
|
|
18
18
|
import {
|
|
19
19
|
parseAuthFlags,
|
|
20
20
|
resolveTarget
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-WBXHDYIV.js";
|
|
22
22
|
import {
|
|
23
23
|
assembleBundle,
|
|
24
24
|
canaryMatch,
|
|
@@ -32,7 +32,7 @@ import {
|
|
|
32
32
|
markdownTricks,
|
|
33
33
|
runLitmus,
|
|
34
34
|
stateChangingToolNames
|
|
35
|
-
} from "./chunk-
|
|
35
|
+
} from "./chunk-MB5EPL2V.js";
|
|
36
36
|
import {
|
|
37
37
|
BUNDLE_SCHEMA_VERSION,
|
|
38
38
|
CATEGORY_STATUS_UINT8,
|
|
@@ -42,7 +42,7 @@ import {
|
|
|
42
42
|
formatServerRef,
|
|
43
43
|
parseServerRef,
|
|
44
44
|
serverKey
|
|
45
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-K7UEK2BA.js";
|
|
46
46
|
|
|
47
47
|
// ../agent/src/gate.ts
|
|
48
48
|
function sameServer(a, b) {
|
package/dist/mcp.js
CHANGED
|
@@ -7,13 +7,13 @@ import {
|
|
|
7
7
|
readAttestation,
|
|
8
8
|
runLitmusInputShape,
|
|
9
9
|
selectedNetwork
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-UA4BIHP4.js";
|
|
11
|
+
import "./chunk-WBXHDYIV.js";
|
|
12
|
+
import "./chunk-MB5EPL2V.js";
|
|
13
13
|
import {
|
|
14
14
|
parseServerRef,
|
|
15
15
|
serverKey
|
|
16
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-K7UEK2BA.js";
|
|
17
17
|
|
|
18
18
|
// src/mcp.ts
|
|
19
19
|
import { realpathSync } from "fs";
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data)
|
|
3
|
+
"version": "0.4.0",
|
|
4
|
+
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
7
7
|
"repository": {
|
|
@@ -59,9 +59,9 @@
|
|
|
59
59
|
"vitest": "^2.1.0",
|
|
60
60
|
"@polygraph/core": "0.0.0",
|
|
61
61
|
"@polygraph/probes": "0.0.0",
|
|
62
|
-
"@polygraph/agent": "0.0.0",
|
|
63
62
|
"@polygraph/onchain": "0.0.0",
|
|
64
63
|
"@polygraph/mcp": "0.0.0",
|
|
64
|
+
"@polygraph/agent": "0.0.0",
|
|
65
65
|
"@polygraph/cli": "0.0.0"
|
|
66
66
|
},
|
|
67
67
|
"publishConfig": {
|