@polygraphso/litmus 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  // ../core/src/types.ts
2
- var METHODOLOGY_VERSION = "litmus-v2";
3
- var BUNDLE_SCHEMA_VERSION = "1.1.0";
2
+ var METHODOLOGY_VERSION = "litmus-v3";
3
+ var BUNDLE_SCHEMA_VERSION = "1.2.0";
4
4
  var CATEGORY_STATUS_UINT8 = {
5
5
  pass: 0,
6
6
  fail: 1,
@@ -3,10 +3,10 @@ import {
3
3
  METHODOLOGY_VERSION,
4
4
  parseServerRef,
5
5
  serverKey
6
- } from "./chunk-SAZKXB35.js";
6
+ } from "./chunk-K7UEK2BA.js";
7
7
 
8
8
  // ../probes/src/harness.ts
9
- import { execFile as execFile2 } from "child_process";
9
+ import { execFile as execFile3 } from "child_process";
10
10
 
11
11
  // ../probes/src/connect/index.ts
12
12
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
@@ -138,7 +138,7 @@ function resolveDockerDir() {
138
138
  }
139
139
  return fileURLToPath(new URL("../../docker", import.meta.url));
140
140
  }
141
- var RESOLVER_SCRIPT = `const p=require("path");const d="/stage/node_modules/"+process.argv[1];let j;try{j=require(d+'/package.json')}catch{}let entry=null;if(j){const b=j.bin;const r=typeof b==="string"?b:(b&&Object.values(b)[0]);if(r)entry=p.join(d,r);}const version=j&&j.version?j.version:null;process.stdout.write(JSON.stringify({entry,version}));`;
141
+ var RESOLVER_SCRIPT = `const p=require("path");const n=process.argv[1];const d="/stage/node_modules/"+n;let j;try{j=require(d+'/package.json')}catch{}let bins={};if(j){const b=j.bin;if(typeof b==="string"){bins[n.replace(/^@[^/]+\\//,"")]=p.join(d,b);}else if(b){for(const k in b){bins[k]=p.join(d,b[k]);}}}const version=j&&j.version?j.version:null;let declaredEgress=[];if(j&&j.polygraph&&Array.isArray(j.polygraph.egress)){declaredEgress=j.polygraph.egress.filter(function(x){return typeof x==="string"});}process.stdout.write(JSON.stringify({bins,version,declaredEgress}));`;
142
142
  function labelFlags(runLabel) {
143
143
  return runLabel ? ["--label", `${LABEL_KEY}=${runLabel}`] : [];
144
144
  }
@@ -211,12 +211,16 @@ function resolverRunArgs(vol, image, pkgName, runLabel, runtime) {
211
211
  function parseResolverOutput(output) {
212
212
  try {
213
213
  const rec = JSON.parse(output);
214
- return {
215
- entry: typeof rec.entry === "string" ? rec.entry : null,
216
- version: typeof rec.version === "string" ? rec.version : null
217
- };
214
+ const bins = {};
215
+ if (rec.bins && typeof rec.bins === "object" && !Array.isArray(rec.bins)) {
216
+ for (const [k, v] of Object.entries(rec.bins)) {
217
+ if (typeof v === "string") bins[k] = v;
218
+ }
219
+ }
220
+ const declaredEgress = Array.isArray(rec.declaredEgress) ? rec.declaredEgress.filter((x) => typeof x === "string") : [];
221
+ return { bins, version: typeof rec.version === "string" ? rec.version : null, declaredEgress };
218
222
  } catch {
219
- return { entry: null, version: null };
223
+ return { bins: {}, version: null, declaredEgress: [] };
220
224
  }
221
225
  }
222
226
  function buildImageArgs(pull) {
@@ -250,13 +254,13 @@ async function stageInto(vol, image, spec, pkgName, opts) {
250
254
  try {
251
255
  await docker(stageInstallArgs(vol, image, spec, opts.runLabel, runtime), 18e4);
252
256
  const resolved = parseResolverOutput((await docker(resolverRunArgs(vol, image, pkgName, opts.runLabel, runtime))).trim());
253
- if (!resolved.entry) {
257
+ if (Object.keys(resolved.bins).length === 0) {
254
258
  await cleanup();
255
259
  throw new Error(
256
260
  `target package ${pkgName} exposes no launchable bin under the sandbox policy (install scripts are skipped)`
257
261
  );
258
262
  }
259
- return { volume: vol, entry: resolved.entry, resolvedVersion: resolved.version, cleanup };
263
+ return { volume: vol, bins: resolved.bins, resolvedVersion: resolved.version, declaredEgress: resolved.declaredEgress, cleanup };
260
264
  } catch (err) {
261
265
  await cleanup();
262
266
  throw err;
@@ -397,125 +401,228 @@ function resolveStagedVersion(requested, staged) {
397
401
  return staged;
398
402
  }
399
403
 
404
+ // ../probes/src/connect/bin-candidates.ts
405
+ var MCP_NAME = /mcp/i;
406
+ function orderBinCandidates(binNames, pkgName) {
407
+ const seen = /* @__PURE__ */ new Set();
408
+ const out = [];
409
+ const take = (name) => {
410
+ if (!seen.has(name)) {
411
+ seen.add(name);
412
+ out.push(name);
413
+ }
414
+ };
415
+ for (const n of binNames) if (MCP_NAME.test(n)) take(n);
416
+ for (const n of binNames) if (n === pkgName) take(n);
417
+ for (const n of binNames) take(n);
418
+ return out;
419
+ }
420
+ function parseNpmBins(stdout, pkgName) {
421
+ const trimmed = stdout.trim();
422
+ if (!trimmed) return [];
423
+ let v;
424
+ try {
425
+ v = JSON.parse(trimmed);
426
+ } catch {
427
+ return [];
428
+ }
429
+ if (typeof v === "string") return [pkgName];
430
+ if (v && typeof v === "object" && !Array.isArray(v)) return Object.keys(v);
431
+ return [];
432
+ }
433
+ var NoMcpBinError = class extends Error {
434
+ constructor(ref, tried) {
435
+ super(
436
+ `${ref}: no bin spoke MCP \u2014 tried ${tried.length ? tried.join(", ") : "no launchable bins"}. The target must be an MCP server; a CLI-only package can't be graded.`
437
+ );
438
+ this.name = "NoMcpBinError";
439
+ }
440
+ };
441
+ async function probeForMcpBin(ref, candidates, attempt) {
442
+ for (const bin of candidates) {
443
+ const result = await attempt(bin);
444
+ if (result !== null) return { bin, result };
445
+ }
446
+ throw new NoMcpBinError(ref, candidates);
447
+ }
448
+
400
449
  // ../probes/src/connect/index.ts
450
+ import { execFile as execFile2 } from "child_process";
451
+ import { promisify } from "util";
401
452
  import { randomUUID as randomUUID3 } from "crypto";
453
+ var execFileP = promisify(execFile2);
402
454
  var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
403
455
  async function connectTarget(input, opts = {}) {
404
456
  const isolated = opts.isolation === "docker";
405
- let kind;
406
- let descriptor;
407
- let serverRef;
408
- let resolvedVersion = null;
409
- let transport;
410
- const teardownExtra = [];
411
457
  if (typeof input !== "string") {
412
458
  if (isolated) {
413
459
  throw new IsolationUnsupportedError(
414
460
  "docker isolation is unsupported for an explicit stdio command \u2014 only an npm ref can be containerized"
415
461
  );
416
462
  }
417
- kind = "stdio";
418
- transport = new StdioClientTransport({
463
+ const transport2 = new StdioClientTransport({
419
464
  command: input.command,
420
465
  args: input.args ?? [],
421
466
  env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
422
467
  ...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
423
468
  });
424
469
  const cmdline = [input.command, ...input.args ?? []].join(" ");
425
- descriptor = { kind, command: cmdline, url: null };
426
- serverRef = input.serverRef ?? cmdline;
427
- } else if (/^https?:\/\//i.test(input)) {
428
- kind = "http";
470
+ const client2 = await connectOrThrow(transport2);
471
+ return makeResult(client2, "stdio", { kind: "stdio", command: cmdline, url: null }, input.serverRef ?? cmdline, null, []);
472
+ }
473
+ if (/^https?:\/\//i.test(input)) {
429
474
  await assertPublicHttpUrl(input);
430
475
  const headers = opts.httpHeaders && Object.keys(opts.httpHeaders).length > 0 ? opts.httpHeaders : void 0;
431
- transport = new StreamableHTTPClientTransport(
476
+ const transport2 = new StreamableHTTPClientTransport(
432
477
  new URL(input),
433
478
  headers ? { requestInit: { headers }, fetch: sameOriginAuthFetch(input, headers) } : void 0
434
479
  );
435
- descriptor = { kind, command: null, url: input };
436
- serverRef = input;
437
- } else {
438
- const parsed = parseServerRef(input);
439
- kind = "stdio";
440
- if (isolated) {
441
- if (parsed.registry !== "npm") {
442
- throw new IsolationUnsupportedError(
443
- `docker isolation is unsupported for ${parsed.registry} refs \u2014 only npm refs can be containerized`
444
- );
445
- }
446
- const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
447
- const stageOpts = opts.runLabel ? { runLabel: opts.runLabel } : {};
448
- await ensureImage();
449
- let staged = null;
450
- let seed = null;
451
- try {
452
- staged = await stageNpmPackage(spec, stageOpts);
453
- if (!opts.seedCwd) {
454
- throw new Error("docker isolation requires a canary seed directory (seedCwd)");
455
- }
456
- seed = await prepareSeedVolume(opts.seedCwd, stageOpts);
457
- const launch = containerLaunch({
458
- entry: staged.entry,
459
- stageVolume: staged.volume,
460
- seedVolume: seed.volume,
461
- // Canaries travel INTO the container via -e, NOT via the docker CLI's
462
- // own env (the CLI runs on the secrets-bearing host).
463
- canaryEnv: opts.seedEnv ?? {},
464
- ...opts.runLabel ? { runLabel: opts.runLabel } : {},
465
- ...process.env.LITMUS_DOCKER_RUNTIME ? { runtime: process.env.LITMUS_DOCKER_RUNTIME } : {}
466
- });
467
- const containerName = `pg-connect-${randomUUID3().slice(0, 8)}`;
468
- const namedArgs = [launch.args[0], "--name", containerName, ...launch.args.slice(1)];
469
- transport = new StdioClientTransport({
470
- command: launch.command,
471
- args: namedArgs,
472
- // Default env only: no host secrets, no canaries (those are -e args).
473
- env: getDefaultEnvironment()
474
- });
475
- descriptor = {
476
- kind,
477
- command: recordedContainerCommand(launch.command, launch.args, {
478
- stageVolume: staged.volume,
479
- seedVolume: seed.volume
480
- }),
481
- url: null
482
- };
483
- resolvedVersion = resolveStagedVersion(parsed.version, staged.resolvedVersion);
484
- const stagedCleanup = staged.cleanup;
485
- const seedCleanup = seed.cleanup;
486
- teardownExtra.push(
487
- () => docker(["rm", "-f", containerName]).then(() => {
488
- }).catch(() => {
489
- }),
490
- stagedCleanup,
491
- seedCleanup
492
- );
493
- } catch (err) {
494
- if (seed) await seed.cleanup();
495
- if (staged) await staged.cleanup();
496
- throw err;
497
- }
498
- serverRef = serverKey(parsed);
499
- } else {
500
- const launch = launchForRef(parsed);
501
- resolvedVersion = parsed.version ?? null;
502
- transport = new StdioClientTransport({
480
+ const client2 = await connectOrThrow(transport2);
481
+ return makeResult(client2, "http", { kind: "http", command: null, url: input }, input, null, []);
482
+ }
483
+ const parsed = parseServerRef(input);
484
+ if (isolated) {
485
+ if (parsed.registry !== "npm") {
486
+ throw new IsolationUnsupportedError(
487
+ `docker isolation is unsupported for ${parsed.registry} refs \u2014 only npm refs can be containerized`
488
+ );
489
+ }
490
+ return connectIsolatedNpm(input, parsed, opts);
491
+ }
492
+ if (parsed.registry === "npm") {
493
+ return connectHostNpm(input, parsed, opts);
494
+ }
495
+ const launch = launchForRef(parsed);
496
+ const transport = new StdioClientTransport({
497
+ command: launch.command,
498
+ args: launch.args,
499
+ env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
500
+ ...opts.seedCwd ? { cwd: opts.seedCwd } : {}
501
+ });
502
+ const client = await connectOrThrow(transport);
503
+ return makeResult(
504
+ client,
505
+ "stdio",
506
+ { kind: "stdio", command: [launch.command, ...launch.args].join(" "), url: null },
507
+ serverKey(parsed),
508
+ parsed.version ?? null,
509
+ []
510
+ );
511
+ }
512
+ async function connectHostNpm(ref, parsed, opts) {
513
+ const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
514
+ const serverRefVal = serverKey(parsed);
515
+ const resolvedVersion = parsed.version ?? null;
516
+ const env = { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} };
517
+ const cwd = opts.seedCwd ? { cwd: opts.seedCwd } : {};
518
+ const binNames = await fetchNpmBins(spec, parsed.name);
519
+ if (!binNames || binNames.length === 0) {
520
+ const args = ["-y", spec];
521
+ const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
522
+ const client = await connectOrThrow(transport);
523
+ return makeResult(client, "stdio", { kind: "stdio", command: ["npx", ...args].join(" "), url: null }, serverRefVal, resolvedVersion, []);
524
+ }
525
+ const candidates = orderBinCandidates(binNames, parsed.name);
526
+ const { result } = await probeForMcpBin(ref, candidates, async (bin) => {
527
+ const args = ["-y", "-p", spec, bin];
528
+ const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
529
+ const client = await tryConnect(transport);
530
+ return client ? { client, descriptor: { kind: "stdio", command: ["npx", ...args].join(" "), url: null } } : null;
531
+ });
532
+ return makeResult(result.client, "stdio", result.descriptor, serverRefVal, resolvedVersion, []);
533
+ }
534
+ async function connectIsolatedNpm(ref, parsed, opts) {
535
+ const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
536
+ const stageOpts = opts.runLabel ? { runLabel: opts.runLabel } : {};
537
+ await ensureImage();
538
+ let staged = null;
539
+ let seed = null;
540
+ try {
541
+ staged = await stageNpmPackage(spec, stageOpts);
542
+ if (!opts.seedCwd) {
543
+ throw new Error("docker isolation requires a canary seed directory (seedCwd)");
544
+ }
545
+ seed = await prepareSeedVolume(opts.seedCwd, stageOpts);
546
+ const resolvedVersion = resolveStagedVersion(parsed.version, staged.resolvedVersion);
547
+ const stagedPkg = staged;
548
+ const seedVol = seed;
549
+ const candidates = orderBinCandidates(Object.keys(stagedPkg.bins), parsed.name);
550
+ const { result } = await probeForMcpBin(ref, candidates, async (binName) => {
551
+ const launch = containerLaunch({
552
+ entry: stagedPkg.bins[binName],
553
+ stageVolume: stagedPkg.volume,
554
+ seedVolume: seedVol.volume,
555
+ // Canaries travel INTO the container via -e, NOT via the docker CLI's own env.
556
+ canaryEnv: opts.seedEnv ?? {},
557
+ ...opts.runLabel ? { runLabel: opts.runLabel } : {},
558
+ ...process.env.LITMUS_DOCKER_RUNTIME ? { runtime: process.env.LITMUS_DOCKER_RUNTIME } : {}
559
+ });
560
+ const containerName = `pg-connect-${randomUUID3().slice(0, 8)}`;
561
+ const namedArgs = [launch.args[0], "--name", containerName, ...launch.args.slice(1)];
562
+ const transport = new StdioClientTransport({
503
563
  command: launch.command,
504
- args: launch.args,
505
- env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
506
- ...opts.seedCwd ? { cwd: opts.seedCwd } : {}
564
+ args: namedArgs,
565
+ env: getDefaultEnvironment()
566
+ // default env only: no host secrets, no canaries
507
567
  });
508
- descriptor = { kind, command: [launch.command, ...launch.args].join(" "), url: null };
509
- serverRef = serverKey(parsed);
510
- }
568
+ const client = await tryConnect(transport);
569
+ if (!client) {
570
+ await docker(["rm", "-f", containerName]).then(() => {
571
+ }).catch(() => {
572
+ });
573
+ return null;
574
+ }
575
+ const descriptor = {
576
+ kind: "stdio",
577
+ command: recordedContainerCommand(launch.command, launch.args, {
578
+ stageVolume: stagedPkg.volume,
579
+ seedVolume: seedVol.volume
580
+ }),
581
+ url: null
582
+ };
583
+ return { client, descriptor, containerName };
584
+ });
585
+ const teardownExtra = [
586
+ () => docker(["rm", "-f", result.containerName]).then(() => {
587
+ }).catch(() => {
588
+ }),
589
+ staged.cleanup,
590
+ seed.cleanup
591
+ ];
592
+ return makeResult(result.client, "stdio", result.descriptor, serverKey(parsed), resolvedVersion, teardownExtra);
593
+ } catch (err) {
594
+ if (seed) await seed.cleanup();
595
+ if (staged) await staged.cleanup();
596
+ throw err;
597
+ }
598
+ }
599
+ async function fetchNpmBins(spec, pkgName) {
600
+ try {
601
+ const { stdout } = await execFileP("npm", ["view", spec, "bin", "--json"], { timeout: 2e4 });
602
+ return parseNpmBins(stdout, pkgName);
603
+ } catch {
604
+ return null;
511
605
  }
606
+ }
607
+ async function tryConnect(transport) {
512
608
  const client = new Client(CLIENT_INFO, { capabilities: {} });
513
609
  try {
514
610
  await withConnectTimeout(client.connect(transport), transport);
515
- } catch (err) {
516
- for (const c of teardownExtra) await c();
517
- throw err;
611
+ return client;
612
+ } catch {
613
+ try {
614
+ await client.close();
615
+ } catch {
616
+ }
617
+ return null;
518
618
  }
619
+ }
620
+ async function connectOrThrow(transport) {
621
+ const client = new Client(CLIENT_INFO, { capabilities: {} });
622
+ await withConnectTimeout(client.connect(transport), transport);
623
+ return client;
624
+ }
625
+ function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardownExtra) {
519
626
  return {
520
627
  client,
521
628
  kind,
@@ -549,10 +656,6 @@ async function withConnectTimeout(connecting, transport) {
549
656
  }
550
657
  }
551
658
  function launchForRef(p) {
552
- if (p.registry === "npm") {
553
- const spec = (p.owner ? `${p.owner}/${p.name}` : p.name) + (p.version ? `@${p.version}` : "");
554
- return { command: "npx", args: ["-y", spec] };
555
- }
556
659
  if (p.registry === "pypi") {
557
660
  return { command: "uvx", args: [p.version ? `${p.name}@${p.version}` : p.name] };
558
661
  }
@@ -929,9 +1032,29 @@ async function c01Injection(ctx) {
929
1032
 
930
1033
  // ../probes/src/docker/egress-runner.ts
931
1034
  import { randomUUID as randomUUID4 } from "crypto";
1035
+
1036
+ // ../probes/src/probes/host-match.ts
1037
+ function normalizeHost(h) {
1038
+ let s = h.trim().toLowerCase();
1039
+ const colon = s.indexOf(":");
1040
+ if (colon !== -1) s = s.slice(0, colon);
1041
+ if (s.endsWith(".")) s = s.slice(0, -1);
1042
+ return s;
1043
+ }
1044
+ function hostMatchesPattern(host, pattern) {
1045
+ const h = normalizeHost(host);
1046
+ const p = pattern.trim().toLowerCase();
1047
+ if (p.startsWith("*.")) {
1048
+ const suffix = p.slice(1);
1049
+ return h.endsWith(suffix) && h.length > suffix.length;
1050
+ }
1051
+ return h === p;
1052
+ }
1053
+
1054
+ // ../probes/src/docker/egress-runner.ts
932
1055
  var IMAGE_TAG3 = "polygraph-egress-sniff:latest";
933
1056
  function notRan(reason) {
934
- return { ran: false, reason, attempts: [] };
1057
+ return { ran: false, reason, attempts: [], declaredEgress: [], baselineAllowlist: [] };
935
1058
  }
936
1059
  function parseSinkholeOutput(output) {
937
1060
  const attempts = [];
@@ -963,6 +1086,40 @@ function egressToFindings(attempts) {
963
1086
  ...a.firstBytes !== void 0 ? { firstBytes: a.firstBytes } : {}
964
1087
  }));
965
1088
  }
1089
+ function correlateEgress(attempts) {
1090
+ const pendingDnsHosts = [];
1091
+ const out = [];
1092
+ for (const a of attempts) {
1093
+ if (a.kind === "dns") {
1094
+ out.push({ ...a, hostSource: a.host ? "given" : "none" });
1095
+ if (a.host) pendingDnsHosts.push(a.host);
1096
+ } else if (a.host) {
1097
+ out.push({ ...a, hostSource: "given" });
1098
+ } else {
1099
+ const host = pendingDnsHosts.shift();
1100
+ out.push({ ...a, ...host ? { host } : {}, hostSource: host ? "dns-correlation" : "none" });
1101
+ }
1102
+ }
1103
+ return out;
1104
+ }
1105
+ function classifyEgress(correlated, allowlist) {
1106
+ return correlated.map((c) => {
1107
+ if (c.host !== void 0) {
1108
+ const matchedPattern = allowlist.find((p) => hostMatchesPattern(c.host, p));
1109
+ return matchedPattern ? { ...c, allowed: true, matchedPattern } : { ...c, allowed: false };
1110
+ }
1111
+ return { ...c, allowed: false };
1112
+ });
1113
+ }
1114
+ function egressAllowedFindings(classified) {
1115
+ return classified.filter((c) => c.allowed).map((c) => ({
1116
+ kind: "egress-allowed",
1117
+ severity: "low",
1118
+ match: `${c.host ?? "?"}${c.port ? `:${c.port}` : ""} (allowed: ${c.matchedPattern ?? "?"})`,
1119
+ ...c.host !== void 0 ? { host: c.host } : {},
1120
+ ...c.port !== void 0 ? { port: c.port } : {}
1121
+ }));
1122
+ }
966
1123
  function egressCanaryFindings(attempts, canaries) {
967
1124
  const findings = [];
968
1125
  for (const a of attempts) {
@@ -1041,7 +1198,8 @@ async function runEgressProbe(ref, opts) {
1041
1198
  if (msg.includes("exposes no launchable bin")) return notRan(msg);
1042
1199
  throw err;
1043
1200
  }
1044
- const { volume: vol, entry } = staged;
1201
+ const vol = staged.volume;
1202
+ const entry = staged.bins[orderBinCandidates(Object.keys(staged.bins), parsed.name)[0]];
1045
1203
  await docker(["network", "create", "--internal", ...label, net]);
1046
1204
  await docker([
1047
1205
  "run",
@@ -1081,7 +1239,13 @@ async function runEgressProbe(ref, opts) {
1081
1239
  await conn.teardown();
1082
1240
  }
1083
1241
  const logs = await docker(["logs", sink]);
1084
- return { ran: true, reason: null, attempts: parseSinkholeOutput(logs) };
1242
+ return {
1243
+ ran: true,
1244
+ reason: null,
1245
+ attempts: parseSinkholeOutput(logs),
1246
+ declaredEgress: staged.declaredEgress,
1247
+ baselineAllowlist: opts.baselineAllowlist ?? []
1248
+ };
1085
1249
  } catch (err) {
1086
1250
  return notRan(`egress sandbox unavailable: ${err instanceof Error ? err.message : String(err)}`);
1087
1251
  } finally {
@@ -1095,6 +1259,28 @@ async function runEgressProbe(ref, opts) {
1095
1259
  }
1096
1260
  }
1097
1261
 
1262
+ // ../probes/src/probes/egress-allowlist.ts
1263
+ var DEFAULT_EGRESS_BASELINE = [];
1264
+ function normalizePattern(p) {
1265
+ return p.trim().toLowerCase();
1266
+ }
1267
+ function parseAllowlistEnv(raw) {
1268
+ if (!raw) return [];
1269
+ return raw.split(",").map(normalizePattern).filter((s) => s.length > 0);
1270
+ }
1271
+ function effectiveAllowlist(baseline, declared) {
1272
+ const seen = /* @__PURE__ */ new Set();
1273
+ const out = [];
1274
+ for (const p of [...baseline, ...declared]) {
1275
+ const n = normalizePattern(p);
1276
+ if (n.length > 0 && !seen.has(n)) {
1277
+ seen.add(n);
1278
+ out.push(n);
1279
+ }
1280
+ }
1281
+ return out;
1282
+ }
1283
+
1098
1284
  // ../probes/src/probes/c02-egress.ts
1099
1285
  function probe21Declaration(tools) {
1100
1286
  const findings = [];
@@ -1113,8 +1299,18 @@ function probe21Declaration(tools) {
1113
1299
  }
1114
1300
  function probe22Egress(egress) {
1115
1301
  if (!egress.ran) return { id: "2.2", status: "skipped", findings: [], reason: egress.reason };
1116
- const findings = egressToFindings(egress.attempts);
1117
- return { id: "2.2", status: findings.length > 0 ? "fail" : "pass", findings };
1302
+ const allowlist = effectiveAllowlist(egress.baselineAllowlist, egress.declaredEgress);
1303
+ const classified = classifyEgress(correlateEgress(egress.attempts), allowlist);
1304
+ const overreach = classified.filter((c) => !c.allowed);
1305
+ const allowed = classified.filter((c) => c.allowed);
1306
+ const findings = [...egressToFindings(overreach), ...egressAllowedFindings(allowed)];
1307
+ if (overreach.length > 0) return { id: "2.2", status: "fail", findings };
1308
+ return {
1309
+ id: "2.2",
1310
+ status: "pass",
1311
+ findings,
1312
+ reason: allowed.length > 0 ? `${allowed.length} declared/baseline egress attempt(s) permitted; 0 overreach` : null
1313
+ };
1118
1314
  }
1119
1315
  function c02Permission(declaration, egress) {
1120
1316
  const egressProbe = probe22Egress(egress);
@@ -1245,11 +1441,14 @@ function gradeFromCategories(categories) {
1245
1441
  if (c02?.status === "fail") {
1246
1442
  return {
1247
1443
  grade: "D",
1248
- rationale: "Unexpected egress (C-02 failed); no injection or data leak. Egress is serious but not proven exfiltration, so the grade caps at D."
1444
+ rationale: "Egress overreach (C-02 failed): reached a host outside its declared/baseline allowlist (or mislabeled a tool). No injection or data leak, so the grade caps at D."
1249
1445
  };
1250
1446
  }
1251
1447
  if (c01?.status === "pass" && c02?.status === "pass" && c03?.status === "pass") {
1252
- return { grade: "A", rationale: "All three categories passed. No injection, no unexpected egress, no data leak." };
1448
+ return {
1449
+ grade: "A",
1450
+ rationale: "All three categories passed. No injection, no data leak, and no egress overreach \u2014 declared/baseline egress, if any, was permitted (A means no overreach, not no network)."
1451
+ };
1253
1452
  }
1254
1453
  if (c01?.status === "pass") {
1255
1454
  const note = skipped.length ? ` Not verified: ${skipped.join(", ")} (${skipped.map((c) => byCode(c)?.reason).filter(Boolean).join("; ")}).` : "";
@@ -1304,6 +1503,7 @@ function assembleBundle(input) {
1304
1503
  async function runLitmus(target, opts = {}) {
1305
1504
  const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
1306
1505
  const ranAt = (/* @__PURE__ */ new Date()).toISOString();
1506
+ const baselineAllowlist = [...DEFAULT_EGRESS_BASELINE, ...parseAllowlistEnv(process.env.LITMUS_EGRESS_ALLOWLIST)];
1307
1507
  const dockerAvailable = await checkDocker();
1308
1508
  const canaries = mintCanaries();
1309
1509
  const seedEnv = canaryEnv(canaries);
@@ -1344,10 +1544,12 @@ async function runLitmus(target, opts = {}) {
1344
1544
  stateChangingTools,
1345
1545
  allowStateChanging: opts.allowStateChanging ?? false
1346
1546
  };
1347
- const egress = dockerAvailable && typeof target === "string" && !/^https?:\/\//i.test(target) ? await runEgressProbe(target, { canaryEnv: seedEnv, ...opts.runLabel ? { runLabel: opts.runLabel } : {} }) : {
1547
+ const egress = dockerAvailable && typeof target === "string" && !/^https?:\/\//i.test(target) ? await runEgressProbe(target, { canaryEnv: seedEnv, baselineAllowlist, ...opts.runLabel ? { runLabel: opts.runLabel } : {} }) : {
1348
1548
  ran: false,
1349
1549
  reason: dockerAvailable ? "egress not run for this target" : "no sandbox (Docker unavailable)",
1350
- attempts: []
1550
+ attempts: [],
1551
+ declaredEgress: [],
1552
+ baselineAllowlist: []
1351
1553
  };
1352
1554
  assertEgressRanUnderIsolation(egress, isolation, isStdio);
1353
1555
  const categories = [
@@ -1359,7 +1561,9 @@ async function runLitmus(target, opts = {}) {
1359
1561
  return assembleBundle({
1360
1562
  serverRef: conn.serverRef,
1361
1563
  resolvedVersion: conn.resolvedVersion,
1362
- target: conn.descriptor,
1564
+ // Surface the server's declared egress in the bundle (disclosure: a
1565
+ // declaration is not exoneration — the consumer/agent-gate can judge).
1566
+ target: egress.declaredEgress.length ? { ...conn.descriptor, declaredEgress: egress.declaredEgress } : conn.descriptor,
1363
1567
  toolDefsFingerprint: fingerprint,
1364
1568
  toolDefs: canonical,
1365
1569
  categories,
@@ -1437,7 +1641,7 @@ function withTimeout(p, ms, label) {
1437
1641
  }
1438
1642
  function checkDocker() {
1439
1643
  return new Promise((resolve) => {
1440
- const child = execFile2("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
1644
+ const child = execFile3("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
1441
1645
  child.on("error", () => resolve(false));
1442
1646
  });
1443
1647
  }
@@ -1,13 +1,13 @@
1
1
  import {
2
2
  resolveTarget
3
- } from "./chunk-BIALP22F.js";
3
+ } from "./chunk-WBXHDYIV.js";
4
4
  import {
5
5
  runLitmus
6
- } from "./chunk-2K6T4FZX.js";
6
+ } from "./chunk-MB5EPL2V.js";
7
7
  import {
8
8
  CATEGORY_STATUS_UINT8,
9
9
  METHODOLOGY_VERSION
10
- } from "./chunk-SAZKXB35.js";
10
+ } from "./chunk-K7UEK2BA.js";
11
11
 
12
12
  // ../onchain/src/networks.ts
13
13
  var NETWORKS = {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  canonicalStringify
3
- } from "./chunk-SAZKXB35.js";
3
+ } from "./chunk-K7UEK2BA.js";
4
4
 
5
5
  // ../cli/src/litmus.ts
6
6
  import { existsSync } from "fs";
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
44
44
  );
45
45
  return 2;
46
46
  }
47
- const { runLitmus } = await import("./src-XIEFSTXC.js");
47
+ const { runLitmus } = await import("./src-PTK3WEGQ.js");
48
48
  const input = resolveTarget(target);
49
49
  try {
50
50
  const bundle = await runLitmus(input, { headers, allowStateChanging });
package/dist/cli.js CHANGED
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-BIALP22F.js";
4
+ } from "./chunk-WBXHDYIV.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
8
- } from "./chunk-SAZKXB35.js";
8
+ } from "./chunk-K7UEK2BA.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import { readFileSync } from "fs";
package/dist/index.d.ts CHANGED
@@ -11,13 +11,16 @@ import { z } from 'zod';
11
11
  /** Package registries a server ref can name. */
12
12
  type Registry = "npm" | "pypi" | "github";
13
13
  /** The methodology this build implements; embedded in every bundle + attestation.
14
- * v2 adds C-02 probe 2.1 (declared-permission honesty), a new fail condition —
15
- * a pass/fail-semantics change, so the version bumps per litmus-test §8. */
16
- declare const METHODOLOGY_VERSION: "litmus-v2";
14
+ * v3 reframes C-02 probe 2.2 from default-deny (any egress fails) to OVERREACH:
15
+ * egress to a host the server declared (`polygraph.egress`) or on the operator
16
+ * baseline allowlist is permitted; only egress beyond that union fails. A
17
+ * pass/fail-semantics change → version bumps per litmus-test §8. NOTE: under v3,
18
+ * grade "A" means "no overreach", NOT "no network". (v2 added probe 2.1.) */
19
+ declare const METHODOLOGY_VERSION: "litmus-v3";
17
20
  /** Evidence-bundle format version (owned by onchain-proof-spec §2).
18
- * 1.1.0 adds the optional `harness.stdioIsolation` field and permits the
19
- * disclaimer to vary by run mode; 1.0.0 bundles remain valid. */
20
- declare const BUNDLE_SCHEMA_VERSION: "1.1.0";
21
+ * 1.2.0 adds the optional `target.declaredEgress` field and the `egress-allowed`
22
+ * finding kind (litmus-v3); 1.1.0 adds `harness.stdioIsolation`; older remain valid. */
23
+ declare const BUNDLE_SCHEMA_VERSION: "1.2.0";
21
24
  type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
22
25
  /** Probe IDs carry their family number (1=injection, 2=permission, 4=sensitive). */
23
26
  type ProbeId = "1.1" | "1.2" | "2.1" | "2.2" | "4.1" | "4.2";
@@ -27,7 +30,7 @@ type LitmusGrade = "A" | "B" | "C" | "D" | "F";
27
30
  type Severity = "low" | "medium" | "high";
28
31
  /** uint8 encoding for per-category verdicts on the attestation (onchain-proof-spec §5). */
29
32
  declare const CATEGORY_STATUS_UINT8: Record<CategoryStatus, number>;
30
- type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "permission-mislabel";
33
+ type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "egress-allowed" | "permission-mislabel";
31
34
  interface Finding {
32
35
  kind: FindingKind;
33
36
  severity: Severity;
@@ -61,6 +64,9 @@ interface TargetDescriptor {
61
64
  command?: string | null;
62
65
  /** http: the remote MCP URL. */
63
66
  url?: string | null;
67
+ /** The server's declared egress host patterns (`polygraph.egress`, C-02
68
+ * litmus-v3). Present only when non-empty. Disclosure, not exoneration. */
69
+ declaredEgress?: string[];
64
70
  }
65
71
  /** The canonicalized fields of a tool that the fingerprint hashes. */
66
72
  interface ToolDef {
@@ -159,6 +165,11 @@ declare function canonicalStringify(value: unknown): string;
159
165
  * - an explicit `{command,args}` (for in-repo demo servers and tests) launches
160
166
  * over stdio directly.
161
167
  *
168
+ * For an npm ref the package may ship several bins (e.g. a CLI plus a `*-mcp`
169
+ * server) or a default bin that isn't an MCP server. We enumerate the bins and
170
+ * PROBE them in order (mcp-named first), keeping the first that completes the MCP
171
+ * handshake — so a CLI-first or multi-bin package still grades.
172
+ *
162
173
  * Returns the connected `Client`, a descriptor for the evidence bundle, and a
163
174
  * teardown. The normal MCP handshake (`initialize`) happens inside `connect()`.
164
175
  */
package/dist/index.js CHANGED
@@ -14,11 +14,11 @@ import {
14
14
  rpcUrl,
15
15
  runLitmusInputShape,
16
16
  selectedNetwork
17
- } from "./chunk-JK3UGN2G.js";
17
+ } from "./chunk-UA4BIHP4.js";
18
18
  import {
19
19
  parseAuthFlags,
20
20
  resolveTarget
21
- } from "./chunk-BIALP22F.js";
21
+ } from "./chunk-WBXHDYIV.js";
22
22
  import {
23
23
  assembleBundle,
24
24
  canaryMatch,
@@ -32,7 +32,7 @@ import {
32
32
  markdownTricks,
33
33
  runLitmus,
34
34
  stateChangingToolNames
35
- } from "./chunk-2K6T4FZX.js";
35
+ } from "./chunk-MB5EPL2V.js";
36
36
  import {
37
37
  BUNDLE_SCHEMA_VERSION,
38
38
  CATEGORY_STATUS_UINT8,
@@ -42,7 +42,7 @@ import {
42
42
  formatServerRef,
43
43
  parseServerRef,
44
44
  serverKey
45
- } from "./chunk-SAZKXB35.js";
45
+ } from "./chunk-K7UEK2BA.js";
46
46
 
47
47
  // ../agent/src/gate.ts
48
48
  function sameServer(a, b) {
package/dist/mcp.js CHANGED
@@ -7,13 +7,13 @@ import {
7
7
  readAttestation,
8
8
  runLitmusInputShape,
9
9
  selectedNetwork
10
- } from "./chunk-JK3UGN2G.js";
11
- import "./chunk-BIALP22F.js";
12
- import "./chunk-2K6T4FZX.js";
10
+ } from "./chunk-UA4BIHP4.js";
11
+ import "./chunk-WBXHDYIV.js";
12
+ import "./chunk-MB5EPL2V.js";
13
13
  import {
14
14
  parseServerRef,
15
15
  serverKey
16
- } from "./chunk-SAZKXB35.js";
16
+ } from "./chunk-K7UEK2BA.js";
17
17
 
18
18
  // src/mcp.ts
19
19
  import { realpathSync } from "fs";
@@ -11,8 +11,8 @@ import {
11
11
  markdownTricks,
12
12
  runLitmus,
13
13
  stateChangingToolNames
14
- } from "./chunk-2K6T4FZX.js";
15
- import "./chunk-SAZKXB35.js";
14
+ } from "./chunk-MB5EPL2V.js";
15
+ import "./chunk-K7UEK2BA.js";
16
16
  export {
17
17
  assembleBundle,
18
18
  canaryMatch,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -58,10 +58,10 @@
58
58
  "typescript": "^5.9.3",
59
59
  "vitest": "^2.1.0",
60
60
  "@polygraph/core": "0.0.0",
61
- "@polygraph/onchain": "0.0.0",
62
61
  "@polygraph/probes": "0.0.0",
63
- "@polygraph/agent": "0.0.0",
62
+ "@polygraph/onchain": "0.0.0",
64
63
  "@polygraph/mcp": "0.0.0",
64
+ "@polygraph/agent": "0.0.0",
65
65
  "@polygraph/cli": "0.0.0"
66
66
  },
67
67
  "publishConfig": {