@polygraphso/litmus 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HVBVNMLR.js → chunk-6OTL43QM.js} +1 -1
- package/dist/{chunk-FMJZCIT3.js → chunk-QWXX34ZJ.js} +2 -2
- package/dist/{chunk-7PIRSQJR.js → chunk-SVFIME2A.js} +57 -97
- package/dist/cli.js +1 -1
- package/dist/docker/sinkhole.mjs +10 -10
- package/dist/index.js +3 -3
- package/dist/mcp.js +3 -3
- package/dist/{src-E5F7GEFI.js → src-AKEARKCO.js} +1 -1
- package/package.json +2 -2
|
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
|
|
|
44
44
|
);
|
|
45
45
|
return 2;
|
|
46
46
|
}
|
|
47
|
-
const { runLitmus } = await import("./src-
|
|
47
|
+
const { runLitmus } = await import("./src-AKEARKCO.js");
|
|
48
48
|
const input = resolveTarget(target);
|
|
49
49
|
try {
|
|
50
50
|
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
|
@@ -1218,45 +1218,6 @@ function egressTargetArgs(opts) {
|
|
|
1218
1218
|
opts.entry
|
|
1219
1219
|
];
|
|
1220
1220
|
}
|
|
1221
|
-
function egressSleeperArgs(opts) {
|
|
1222
|
-
const runtimeFlags = opts.runtime ? ["--runtime", opts.runtime] : [];
|
|
1223
|
-
return [
|
|
1224
|
-
"run",
|
|
1225
|
-
"-d",
|
|
1226
|
-
"--name",
|
|
1227
|
-
opts.targetName,
|
|
1228
|
-
"--network",
|
|
1229
|
-
opts.net,
|
|
1230
|
-
"--dns",
|
|
1231
|
-
opts.sinkIp,
|
|
1232
|
-
"-v",
|
|
1233
|
-
`${opts.vol}:/stage:ro`,
|
|
1234
|
-
"--user",
|
|
1235
|
-
"node",
|
|
1236
|
-
"--read-only",
|
|
1237
|
-
"--tmpfs",
|
|
1238
|
-
"/tmp:rw,size=64m,mode=1777",
|
|
1239
|
-
"--cap-drop=ALL",
|
|
1240
|
-
"--sysctl",
|
|
1241
|
-
"net.ipv6.conf.all.disable_ipv6=1",
|
|
1242
|
-
"--sysctl",
|
|
1243
|
-
"net.ipv6.conf.default.disable_ipv6=1",
|
|
1244
|
-
"--cpus",
|
|
1245
|
-
"1",
|
|
1246
|
-
"--security-opt",
|
|
1247
|
-
"no-new-privileges",
|
|
1248
|
-
"--pids-limit",
|
|
1249
|
-
"256",
|
|
1250
|
-
"--memory",
|
|
1251
|
-
"512m",
|
|
1252
|
-
...opts.label,
|
|
1253
|
-
...runtimeFlags,
|
|
1254
|
-
"--entrypoint",
|
|
1255
|
-
"sleep",
|
|
1256
|
-
IMAGE_TAG3,
|
|
1257
|
-
"3600"
|
|
1258
|
-
];
|
|
1259
|
-
}
|
|
1260
1221
|
async function runEgressProbe(ref, opts) {
|
|
1261
1222
|
let parsed;
|
|
1262
1223
|
try {
|
|
@@ -1319,8 +1280,12 @@ async function runGatewayCapture(common) {
|
|
|
1319
1280
|
const net = `pg-egw-${randomUUID4().slice(0, 8)}`;
|
|
1320
1281
|
const sink = `pg-sink-${randomUUID4().slice(0, 8)}`;
|
|
1321
1282
|
const targetName = `pg-target-${randomUUID4().slice(0, 8)}`;
|
|
1283
|
+
let rules = null;
|
|
1322
1284
|
try {
|
|
1323
1285
|
await docker(["network", "create", "-o", "com.docker.network.bridge.enable_ip_masquerade=false", ...common.label, net]);
|
|
1286
|
+
const netId = (await docker(["network", "inspect", "-f", "{{.Id}}", net])).trim();
|
|
1287
|
+
const bridge = `br-${netId.slice(0, 12)}`;
|
|
1288
|
+
const subnet = (await docker(["network", "inspect", "-f", "{{(index .IPAM.Config 0).Subnet}}", net])).trim();
|
|
1324
1289
|
await docker([
|
|
1325
1290
|
"run",
|
|
1326
1291
|
"-d",
|
|
@@ -1341,26 +1306,23 @@ async function runGatewayCapture(common) {
|
|
|
1341
1306
|
IMAGE_TAG3
|
|
1342
1307
|
]);
|
|
1343
1308
|
const sinkIp = (await docker(["inspect", "-f", `{{(index .NetworkSettings.Networks "${net}").IPAddress}}`, sink])).trim();
|
|
1344
|
-
if (!sinkIp) return null;
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
return null;
|
|
1350
|
-
}
|
|
1351
|
-
const execArgs = [
|
|
1352
|
-
"exec",
|
|
1353
|
-
"-i",
|
|
1354
|
-
"--user",
|
|
1355
|
-
"node",
|
|
1356
|
-
...Object.entries(common.canaryEnv).flatMap(([k, v]) => ["-e", `${k}=${v}`]),
|
|
1309
|
+
if (!sinkIp || !bridge || !subnet) return null;
|
|
1310
|
+
const scope = { bridge, subnet, sinkIp };
|
|
1311
|
+
if (!await applyHostDnat(scope, common.label)) return null;
|
|
1312
|
+
rules = scope;
|
|
1313
|
+
const targetArgs = egressTargetArgs({
|
|
1357
1314
|
targetName,
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1315
|
+
net,
|
|
1316
|
+
sinkIp,
|
|
1317
|
+
vol: common.vol,
|
|
1318
|
+
entry: common.entry,
|
|
1319
|
+
canaryEnv: common.canaryEnv,
|
|
1320
|
+
label: common.label,
|
|
1321
|
+
...common.runtime ? { runtime: common.runtime } : {}
|
|
1322
|
+
});
|
|
1361
1323
|
let conn;
|
|
1362
1324
|
try {
|
|
1363
|
-
conn = await connectTarget({ command: "docker", args:
|
|
1325
|
+
conn = await connectTarget({ command: "docker", args: targetArgs, serverRef: `npm/${common.pkgSpec}` });
|
|
1364
1326
|
} catch {
|
|
1365
1327
|
return null;
|
|
1366
1328
|
}
|
|
@@ -1370,12 +1332,51 @@ async function runGatewayCapture(common) {
|
|
|
1370
1332
|
} finally {
|
|
1371
1333
|
await docker(["rm", "-f", targetName]).catch(() => {
|
|
1372
1334
|
});
|
|
1335
|
+
if (rules) await removeHostDnat(rules, common.label).catch(() => {
|
|
1336
|
+
});
|
|
1373
1337
|
await docker(["rm", "-f", sink]).catch(() => {
|
|
1374
1338
|
});
|
|
1375
1339
|
await docker(["network", "rm", net]).catch(() => {
|
|
1376
1340
|
});
|
|
1377
1341
|
}
|
|
1378
1342
|
}
|
|
1343
|
+
function hostDnatCommands(op, s) {
|
|
1344
|
+
const at = op === "I" ? "-I" : "-D";
|
|
1345
|
+
const pos = op === "I" ? " 1" : "";
|
|
1346
|
+
return [
|
|
1347
|
+
`iptables -t nat ${at} PREROUTING${pos} -i ${s.bridge} -p tcp ! -d ${s.subnet} -j DNAT --to-destination ${s.sinkIp}:8443`,
|
|
1348
|
+
`iptables -t nat ${at} POSTROUTING${pos} -o ${s.bridge} -p tcp -d ${s.sinkIp} --dport 8443 -j MASQUERADE`,
|
|
1349
|
+
`iptables ${at} FORWARD${pos} -i ${s.bridge} -o ${s.bridge} -j ACCEPT`
|
|
1350
|
+
];
|
|
1351
|
+
}
|
|
1352
|
+
function hostDnatHelperArgs(op, s, label) {
|
|
1353
|
+
return [
|
|
1354
|
+
"run",
|
|
1355
|
+
"--rm",
|
|
1356
|
+
"--network",
|
|
1357
|
+
"host",
|
|
1358
|
+
"--cap-add=NET_ADMIN",
|
|
1359
|
+
"--cap-drop=ALL",
|
|
1360
|
+
...label,
|
|
1361
|
+
"--entrypoint",
|
|
1362
|
+
"sh",
|
|
1363
|
+
IMAGE_TAG3,
|
|
1364
|
+
"-c",
|
|
1365
|
+
hostDnatCommands(op, s).join("; ")
|
|
1366
|
+
];
|
|
1367
|
+
}
|
|
1368
|
+
async function applyHostDnat(s, label) {
|
|
1369
|
+
try {
|
|
1370
|
+
await docker(hostDnatHelperArgs("I", s, label));
|
|
1371
|
+
return true;
|
|
1372
|
+
} catch {
|
|
1373
|
+
return false;
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
async function removeHostDnat(s, label) {
|
|
1377
|
+
await docker(hostDnatHelperArgs("D", s, label)).catch(() => {
|
|
1378
|
+
});
|
|
1379
|
+
}
|
|
1379
1380
|
async function runInternalCapture(common) {
|
|
1380
1381
|
const net = `pg-egress-${randomUUID4().slice(0, 8)}`;
|
|
1381
1382
|
const sink = `pg-sink-${randomUUID4().slice(0, 8)}`;
|
|
@@ -1421,47 +1422,6 @@ async function runInternalCapture(common) {
|
|
|
1421
1422
|
});
|
|
1422
1423
|
}
|
|
1423
1424
|
}
|
|
1424
|
-
function egressDelay(ms) {
|
|
1425
|
-
return new Promise((resolve) => {
|
|
1426
|
-
const t = setTimeout(resolve, ms);
|
|
1427
|
-
t.unref?.();
|
|
1428
|
-
});
|
|
1429
|
-
}
|
|
1430
|
-
async function waitForContainerRunning(name, timeoutMs) {
|
|
1431
|
-
const deadline = Date.now() + timeoutMs;
|
|
1432
|
-
while (Date.now() < deadline) {
|
|
1433
|
-
const state = (await docker(["inspect", "-f", "{{.State.Running}}", name]).catch(() => "")).trim();
|
|
1434
|
-
if (state === "true") return true;
|
|
1435
|
-
await egressDelay(100);
|
|
1436
|
-
}
|
|
1437
|
-
return false;
|
|
1438
|
-
}
|
|
1439
|
-
async function applyAndVerifySinkRoute(targetName, sinkIp, runtime, label) {
|
|
1440
|
-
if (!await waitForContainerRunning(targetName, 15e3)) return false;
|
|
1441
|
-
const runtimeFlags = runtime ? ["--runtime", runtime] : [];
|
|
1442
|
-
await docker([
|
|
1443
|
-
"run",
|
|
1444
|
-
"--rm",
|
|
1445
|
-
"--network",
|
|
1446
|
-
`container:${targetName}`,
|
|
1447
|
-
"--cap-add=NET_ADMIN",
|
|
1448
|
-
...runtimeFlags,
|
|
1449
|
-
...label,
|
|
1450
|
-
"--entrypoint",
|
|
1451
|
-
"sh",
|
|
1452
|
-
IMAGE_TAG3,
|
|
1453
|
-
"-c",
|
|
1454
|
-
`ip route del default 2>/dev/null; ip route add default via ${sinkIp}`
|
|
1455
|
-
]).catch(() => {
|
|
1456
|
-
});
|
|
1457
|
-
const wanted = `default via ${sinkIp} `;
|
|
1458
|
-
for (let i = 0; i < 20; i++) {
|
|
1459
|
-
const routes = await docker(["exec", targetName, "ip", "route"]).catch(() => "");
|
|
1460
|
-
if (routes.split("\n").some((l) => (l + " ").startsWith(wanted))) return true;
|
|
1461
|
-
await egressDelay(100);
|
|
1462
|
-
}
|
|
1463
|
-
return false;
|
|
1464
|
-
}
|
|
1465
1425
|
|
|
1466
1426
|
// ../probes/src/probes/egress-allowlist.ts
|
|
1467
1427
|
var DEFAULT_EGRESS_BASELINE = [];
|
package/dist/cli.js
CHANGED
package/dist/docker/sinkhole.mjs
CHANGED
|
@@ -6,16 +6,16 @@
|
|
|
6
6
|
* (any port) to our listener, where we log `{host, port, firstBytes}` and drop
|
|
7
7
|
* the connection — never completing it. One `EGRESS {json}` line per attempt.
|
|
8
8
|
*
|
|
9
|
-
* CAPTURE MODES (egress-runner.ts): in litmus-v4 GATEWAY mode (default)
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
* is DNS-ROUTED only: an IP-literal connection
|
|
15
|
-
* dropped at routing, so C-02 reads a false "no
|
|
16
|
-
* still never leaves the box (`--internal` blocks
|
|
17
|
-
* non-TCP egress (UDP/QUIC) is not captured by the
|
|
18
|
-
* docs/litmus-test-v1.md §7.
|
|
9
|
+
* CAPTURE MODES (egress-runner.ts): in litmus-v4 GATEWAY mode (default) a HOST
|
|
10
|
+
* iptables DNAT redirects the target's off-subnet egress to this sink — capturing
|
|
11
|
+
* EVERY outbound TCP, including a hard-coded IP literal or DoH/DoT to a fixed
|
|
12
|
+
* resolver, regardless of DNS. Because it intercepts below the container runtime it
|
|
13
|
+
* works identically under runc and gVisor. The legacy `--internal` FALLBACK (when
|
|
14
|
+
* the host rules can't be applied) is DNS-ROUTED only: an IP-literal connection
|
|
15
|
+
* issues no sinkholed lookup and is dropped at routing, so C-02 reads a false "no
|
|
16
|
+
* egress" pass there — the real data still never leaves the box (`--internal` blocks
|
|
17
|
+
* all egress). Residual either way: non-TCP egress (UDP/QUIC) is not captured by the
|
|
18
|
+
* TCP listener. See docs/litmus-test-v1.md §7.
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
21
|
import dgram from "node:dgram";
|
package/dist/index.js
CHANGED
|
@@ -14,11 +14,11 @@ import {
|
|
|
14
14
|
rpcUrl,
|
|
15
15
|
runLitmusInputShape,
|
|
16
16
|
selectedNetwork
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-QWXX34ZJ.js";
|
|
18
18
|
import {
|
|
19
19
|
parseAuthFlags,
|
|
20
20
|
resolveTarget
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-6OTL43QM.js";
|
|
22
22
|
import {
|
|
23
23
|
assembleBundle,
|
|
24
24
|
canaryMatch,
|
|
@@ -33,7 +33,7 @@ import {
|
|
|
33
33
|
markdownTricks,
|
|
34
34
|
runLitmus,
|
|
35
35
|
stateChangingToolNames
|
|
36
|
-
} from "./chunk-
|
|
36
|
+
} from "./chunk-SVFIME2A.js";
|
|
37
37
|
import {
|
|
38
38
|
BUNDLE_SCHEMA_VERSION,
|
|
39
39
|
CATEGORY_STATUS_UINT8,
|
package/dist/mcp.js
CHANGED
|
@@ -7,9 +7,9 @@ import {
|
|
|
7
7
|
readAttestation,
|
|
8
8
|
runLitmusInputShape,
|
|
9
9
|
selectedNetwork
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-QWXX34ZJ.js";
|
|
11
|
+
import "./chunk-6OTL43QM.js";
|
|
12
|
+
import "./chunk-SVFIME2A.js";
|
|
13
13
|
import {
|
|
14
14
|
parseServerRef,
|
|
15
15
|
serverKey
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -63,8 +63,8 @@
|
|
|
63
63
|
"typescript": "^5.9.3",
|
|
64
64
|
"vitest": "^2.1.0",
|
|
65
65
|
"@polygraph/core": "0.0.0",
|
|
66
|
-
"@polygraph/onchain": "0.0.0",
|
|
67
66
|
"@polygraph/probes": "0.0.0",
|
|
67
|
+
"@polygraph/onchain": "0.0.0",
|
|
68
68
|
"@polygraph/agent": "0.0.0",
|
|
69
69
|
"@polygraph/mcp": "0.0.0",
|
|
70
70
|
"@polygraph/cli": "0.0.0"
|