@polygraphso/litmus 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
44
44
  );
45
45
  return 2;
46
46
  }
47
- const { runLitmus } = await import("./src-E5F7GEFI.js");
47
+ const { runLitmus } = await import("./src-AKEARKCO.js");
48
48
  const input = resolveTarget(target);
49
49
  try {
50
50
  const bundle = await runLitmus(input, { headers, allowStateChanging });
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  resolveTarget
3
- } from "./chunk-HVBVNMLR.js";
3
+ } from "./chunk-6OTL43QM.js";
4
4
  import {
5
5
  runLitmus
6
- } from "./chunk-7PIRSQJR.js";
6
+ } from "./chunk-SVFIME2A.js";
7
7
  import {
8
8
  CATEGORY_STATUS_UINT8,
9
9
  METHODOLOGY_VERSION
@@ -1218,45 +1218,6 @@ function egressTargetArgs(opts) {
1218
1218
  opts.entry
1219
1219
  ];
1220
1220
  }
1221
- function egressSleeperArgs(opts) {
1222
- const runtimeFlags = opts.runtime ? ["--runtime", opts.runtime] : [];
1223
- return [
1224
- "run",
1225
- "-d",
1226
- "--name",
1227
- opts.targetName,
1228
- "--network",
1229
- opts.net,
1230
- "--dns",
1231
- opts.sinkIp,
1232
- "-v",
1233
- `${opts.vol}:/stage:ro`,
1234
- "--user",
1235
- "node",
1236
- "--read-only",
1237
- "--tmpfs",
1238
- "/tmp:rw,size=64m,mode=1777",
1239
- "--cap-drop=ALL",
1240
- "--sysctl",
1241
- "net.ipv6.conf.all.disable_ipv6=1",
1242
- "--sysctl",
1243
- "net.ipv6.conf.default.disable_ipv6=1",
1244
- "--cpus",
1245
- "1",
1246
- "--security-opt",
1247
- "no-new-privileges",
1248
- "--pids-limit",
1249
- "256",
1250
- "--memory",
1251
- "512m",
1252
- ...opts.label,
1253
- ...runtimeFlags,
1254
- "--entrypoint",
1255
- "sleep",
1256
- IMAGE_TAG3,
1257
- "3600"
1258
- ];
1259
- }
1260
1221
  async function runEgressProbe(ref, opts) {
1261
1222
  let parsed;
1262
1223
  try {
@@ -1319,8 +1280,12 @@ async function runGatewayCapture(common) {
1319
1280
  const net = `pg-egw-${randomUUID4().slice(0, 8)}`;
1320
1281
  const sink = `pg-sink-${randomUUID4().slice(0, 8)}`;
1321
1282
  const targetName = `pg-target-${randomUUID4().slice(0, 8)}`;
1283
+ let rules = null;
1322
1284
  try {
1323
1285
  await docker(["network", "create", "-o", "com.docker.network.bridge.enable_ip_masquerade=false", ...common.label, net]);
1286
+ const netId = (await docker(["network", "inspect", "-f", "{{.Id}}", net])).trim();
1287
+ const bridge = `br-${netId.slice(0, 12)}`;
1288
+ const subnet = (await docker(["network", "inspect", "-f", "{{(index .IPAM.Config 0).Subnet}}", net])).trim();
1324
1289
  await docker([
1325
1290
  "run",
1326
1291
  "-d",
@@ -1341,26 +1306,23 @@ async function runGatewayCapture(common) {
1341
1306
  IMAGE_TAG3
1342
1307
  ]);
1343
1308
  const sinkIp = (await docker(["inspect", "-f", `{{(index .NetworkSettings.Networks "${net}").IPAddress}}`, sink])).trim();
1344
- if (!sinkIp) return null;
1345
- await docker(
1346
- egressSleeperArgs({ targetName, net, sinkIp, vol: common.vol, label: common.label, ...common.runtime ? { runtime: common.runtime } : {} })
1347
- );
1348
- if (!await applyAndVerifySinkRoute(targetName, sinkIp, common.runtime, common.label)) {
1349
- return null;
1350
- }
1351
- const execArgs = [
1352
- "exec",
1353
- "-i",
1354
- "--user",
1355
- "node",
1356
- ...Object.entries(common.canaryEnv).flatMap(([k, v]) => ["-e", `${k}=${v}`]),
1309
+ if (!sinkIp || !bridge || !subnet) return null;
1310
+ const scope = { bridge, subnet, sinkIp };
1311
+ if (!await applyHostDnat(scope, common.label)) return null;
1312
+ rules = scope;
1313
+ const targetArgs = egressTargetArgs({
1357
1314
  targetName,
1358
- "node",
1359
- common.entry
1360
- ];
1315
+ net,
1316
+ sinkIp,
1317
+ vol: common.vol,
1318
+ entry: common.entry,
1319
+ canaryEnv: common.canaryEnv,
1320
+ label: common.label,
1321
+ ...common.runtime ? { runtime: common.runtime } : {}
1322
+ });
1361
1323
  let conn;
1362
1324
  try {
1363
- conn = await connectTarget({ command: "docker", args: execArgs, serverRef: `npm/${common.pkgSpec}` });
1325
+ conn = await connectTarget({ command: "docker", args: targetArgs, serverRef: `npm/${common.pkgSpec}` });
1364
1326
  } catch {
1365
1327
  return null;
1366
1328
  }
@@ -1370,12 +1332,51 @@ async function runGatewayCapture(common) {
1370
1332
  } finally {
1371
1333
  await docker(["rm", "-f", targetName]).catch(() => {
1372
1334
  });
1335
+ if (rules) await removeHostDnat(rules, common.label).catch(() => {
1336
+ });
1373
1337
  await docker(["rm", "-f", sink]).catch(() => {
1374
1338
  });
1375
1339
  await docker(["network", "rm", net]).catch(() => {
1376
1340
  });
1377
1341
  }
1378
1342
  }
1343
+ function hostDnatCommands(op, s) {
1344
+ const at = op === "I" ? "-I" : "-D";
1345
+ const pos = op === "I" ? " 1" : "";
1346
+ return [
1347
+ `iptables -t nat ${at} PREROUTING${pos} -i ${s.bridge} -p tcp ! -d ${s.subnet} -j DNAT --to-destination ${s.sinkIp}:8443`,
1348
+ `iptables -t nat ${at} POSTROUTING${pos} -o ${s.bridge} -p tcp -d ${s.sinkIp} --dport 8443 -j MASQUERADE`,
1349
+ `iptables ${at} FORWARD${pos} -i ${s.bridge} -o ${s.bridge} -j ACCEPT`
1350
+ ];
1351
+ }
1352
+ function hostDnatHelperArgs(op, s, label) {
1353
+ return [
1354
+ "run",
1355
+ "--rm",
1356
+ "--network",
1357
+ "host",
1358
+ "--cap-add=NET_ADMIN",
1359
+ "--cap-drop=ALL",
1360
+ ...label,
1361
+ "--entrypoint",
1362
+ "sh",
1363
+ IMAGE_TAG3,
1364
+ "-c",
1365
+ hostDnatCommands(op, s).join("; ")
1366
+ ];
1367
+ }
1368
+ async function applyHostDnat(s, label) {
1369
+ try {
1370
+ await docker(hostDnatHelperArgs("I", s, label));
1371
+ return true;
1372
+ } catch {
1373
+ return false;
1374
+ }
1375
+ }
1376
+ async function removeHostDnat(s, label) {
1377
+ await docker(hostDnatHelperArgs("D", s, label)).catch(() => {
1378
+ });
1379
+ }
1379
1380
  async function runInternalCapture(common) {
1380
1381
  const net = `pg-egress-${randomUUID4().slice(0, 8)}`;
1381
1382
  const sink = `pg-sink-${randomUUID4().slice(0, 8)}`;
@@ -1421,47 +1422,6 @@ async function runInternalCapture(common) {
1421
1422
  });
1422
1423
  }
1423
1424
  }
1424
- function egressDelay(ms) {
1425
- return new Promise((resolve) => {
1426
- const t = setTimeout(resolve, ms);
1427
- t.unref?.();
1428
- });
1429
- }
1430
- async function waitForContainerRunning(name, timeoutMs) {
1431
- const deadline = Date.now() + timeoutMs;
1432
- while (Date.now() < deadline) {
1433
- const state = (await docker(["inspect", "-f", "{{.State.Running}}", name]).catch(() => "")).trim();
1434
- if (state === "true") return true;
1435
- await egressDelay(100);
1436
- }
1437
- return false;
1438
- }
1439
- async function applyAndVerifySinkRoute(targetName, sinkIp, runtime, label) {
1440
- if (!await waitForContainerRunning(targetName, 15e3)) return false;
1441
- const runtimeFlags = runtime ? ["--runtime", runtime] : [];
1442
- await docker([
1443
- "run",
1444
- "--rm",
1445
- "--network",
1446
- `container:${targetName}`,
1447
- "--cap-add=NET_ADMIN",
1448
- ...runtimeFlags,
1449
- ...label,
1450
- "--entrypoint",
1451
- "sh",
1452
- IMAGE_TAG3,
1453
- "-c",
1454
- `ip route del default 2>/dev/null; ip route add default via ${sinkIp}`
1455
- ]).catch(() => {
1456
- });
1457
- const wanted = `default via ${sinkIp} `;
1458
- for (let i = 0; i < 20; i++) {
1459
- const routes = await docker(["exec", targetName, "ip", "route"]).catch(() => "");
1460
- if (routes.split("\n").some((l) => (l + " ").startsWith(wanted))) return true;
1461
- await egressDelay(100);
1462
- }
1463
- return false;
1464
- }
1465
1425
 
1466
1426
  // ../probes/src/probes/egress-allowlist.ts
1467
1427
  var DEFAULT_EGRESS_BASELINE = [];
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-HVBVNMLR.js";
4
+ } from "./chunk-6OTL43QM.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
@@ -6,16 +6,16 @@
6
6
  * (any port) to our listener, where we log `{host, port, firstBytes}` and drop
7
7
  * the connection — never completing it. One `EGRESS {json}` line per attempt.
8
8
  *
9
- * CAPTURE MODES (egress-runner.ts): in litmus-v4 GATEWAY mode (default) the sink
10
- * is the target's default route on a regular bridge (host masquerade off), so the
11
- * iptables REDIRECT funnels EVERY outbound TCP including a hard-coded IP literal
12
- * or DoH/DoT to a fixed resolver to this listener, regardless of DNS. The legacy
13
- * `--internal` FALLBACK (when the default-route swap can't be applied, e.g. gVisor)
14
- * is DNS-ROUTED only: an IP-literal connection issues no sinkholed lookup and is
15
- * dropped at routing, so C-02 reads a false "no egress" pass there — the real data
16
- * still never leaves the box (`--internal` blocks all egress). Residual either way:
17
- * non-TCP egress (UDP/QUIC) is not captured by the TCP listener. See
18
- * docs/litmus-test-v1.md §7.
9
+ * CAPTURE MODES (egress-runner.ts): in litmus-v4 GATEWAY mode (default) a HOST
10
+ * iptables DNAT redirects the target's off-subnet egress to this sink capturing
11
+ * EVERY outbound TCP, including a hard-coded IP literal or DoH/DoT to a fixed
12
+ * resolver, regardless of DNS. Because it intercepts below the container runtime it
13
+ * works identically under runc and gVisor. The legacy `--internal` FALLBACK (when
14
+ * the host rules can't be applied) is DNS-ROUTED only: an IP-literal connection
15
+ * issues no sinkholed lookup and is dropped at routing, so C-02 reads a false "no
16
+ * egress" pass there — the real data still never leaves the box (`--internal` blocks
17
+ * all egress). Residual either way: non-TCP egress (UDP/QUIC) is not captured by the
18
+ * TCP listener. See docs/litmus-test-v1.md §7.
19
19
  */
20
20
 
21
21
  import dgram from "node:dgram";
package/dist/index.js CHANGED
@@ -14,11 +14,11 @@ import {
14
14
  rpcUrl,
15
15
  runLitmusInputShape,
16
16
  selectedNetwork
17
- } from "./chunk-FMJZCIT3.js";
17
+ } from "./chunk-QWXX34ZJ.js";
18
18
  import {
19
19
  parseAuthFlags,
20
20
  resolveTarget
21
- } from "./chunk-HVBVNMLR.js";
21
+ } from "./chunk-6OTL43QM.js";
22
22
  import {
23
23
  assembleBundle,
24
24
  canaryMatch,
@@ -33,7 +33,7 @@ import {
33
33
  markdownTricks,
34
34
  runLitmus,
35
35
  stateChangingToolNames
36
- } from "./chunk-7PIRSQJR.js";
36
+ } from "./chunk-SVFIME2A.js";
37
37
  import {
38
38
  BUNDLE_SCHEMA_VERSION,
39
39
  CATEGORY_STATUS_UINT8,
package/dist/mcp.js CHANGED
@@ -7,9 +7,9 @@ import {
7
7
  readAttestation,
8
8
  runLitmusInputShape,
9
9
  selectedNetwork
10
- } from "./chunk-FMJZCIT3.js";
11
- import "./chunk-HVBVNMLR.js";
12
- import "./chunk-7PIRSQJR.js";
10
+ } from "./chunk-QWXX34ZJ.js";
11
+ import "./chunk-6OTL43QM.js";
12
+ import "./chunk-SVFIME2A.js";
13
13
  import {
14
14
  parseServerRef,
15
15
  serverKey
@@ -12,7 +12,7 @@ import {
12
12
  markdownTricks,
13
13
  runLitmus,
14
14
  stateChangingToolNames
15
- } from "./chunk-7PIRSQJR.js";
15
+ } from "./chunk-SVFIME2A.js";
16
16
  import "./chunk-D5MOKALT.js";
17
17
  export {
18
18
  assembleBundle,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -63,8 +63,8 @@
63
63
  "typescript": "^5.9.3",
64
64
  "vitest": "^2.1.0",
65
65
  "@polygraph/core": "0.0.0",
66
- "@polygraph/onchain": "0.0.0",
67
66
  "@polygraph/probes": "0.0.0",
67
+ "@polygraph/onchain": "0.0.0",
68
68
  "@polygraph/agent": "0.0.0",
69
69
  "@polygraph/mcp": "0.0.0",
70
70
  "@polygraph/cli": "0.0.0"