@polygraphso/litmus 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1458 @@
1
+ import {
2
+ BUNDLE_SCHEMA_VERSION,
3
+ METHODOLOGY_VERSION,
4
+ parseServerRef,
5
+ serverKey
6
+ } from "./chunk-SAZKXB35.js";
7
+
8
+ // ../probes/src/harness.ts
9
+ import { execFile as execFile2 } from "child_process";
10
+
11
+ // ../probes/src/connect/index.ts
12
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
13
+ import {
14
+ StdioClientTransport,
15
+ getDefaultEnvironment
16
+ } from "@modelcontextprotocol/sdk/client/stdio.js";
17
+ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
18
+
19
+ // ../probes/src/connect/auth-fetch.ts
20
+ function sameOriginAuthFetch(target, authHeaders) {
21
+ const targetOrigin = new URL(target).origin;
22
+ const authKeys = Object.keys(authHeaders).map((k) => k.toLowerCase());
23
+ return (url, init) => {
24
+ const reqOrigin = new URL(typeof url === "string" ? url : url.toString()).origin;
25
+ if (reqOrigin === targetOrigin) return fetch(url, init);
26
+ const headers = new Headers(init?.headers);
27
+ for (const k of authKeys) headers.delete(k);
28
+ return fetch(url, { ...init, headers });
29
+ };
30
+ }
31
+
32
+ // ../probes/src/connect/ssrf-guard.ts
33
+ import { lookup } from "dns/promises";
34
+ import { isIP } from "net";
35
+ var UnsafeTargetUrlError = class extends Error {
36
+ constructor(message) {
37
+ super(message);
38
+ this.name = "UnsafeTargetUrlError";
39
+ }
40
+ };
41
+ function allowPrivate() {
42
+ return process.env.POLYGRAPH_ALLOW_PRIVATE_TARGETS === "1";
43
+ }
44
+ function ipv4Octets(ip) {
45
+ const parts = ip.split(".");
46
+ if (parts.length !== 4) return null;
47
+ const octets = parts.map((p) => /^\d{1,3}$/.test(p) ? Number(p) : NaN);
48
+ return octets.every((n) => Number.isInteger(n) && n >= 0 && n <= 255) ? octets : null;
49
+ }
50
+ function isPrivateIPv4(ip) {
51
+ const o = ipv4Octets(ip);
52
+ if (!o) return false;
53
+ const [a, b] = o;
54
+ if (a === 0) return true;
55
+ if (a === 10) return true;
56
+ if (a === 127) return true;
57
+ if (a === 169 && b === 254) return true;
58
+ if (a === 172 && b >= 16 && b <= 31) return true;
59
+ if (a === 192 && b === 168) return true;
60
+ if (a === 100 && b >= 64 && b <= 127) return true;
61
+ if (a === 192 && b === 0 && o[2] === 0) return true;
62
+ if (a === 192 && b === 0 && o[2] === 2) return true;
63
+ if (a === 198 && (b === 18 || b === 19)) return true;
64
+ if (a >= 224) return true;
65
+ return false;
66
+ }
67
+ function isPrivateIPv6(ip) {
68
+ const addr = ip.toLowerCase().split("%")[0];
69
+ if (addr === "::1" || addr === "::") return true;
70
+ const mapped = addr.match(/(?:^|:)((?:\d{1,3}\.){3}\d{1,3})$/);
71
+ if (mapped && isPrivateIPv4(mapped[1])) return true;
72
+ if (addr.startsWith("fe8") || addr.startsWith("fe9") || addr.startsWith("fea") || addr.startsWith("feb"))
73
+ return true;
74
+ if (addr.startsWith("fc") || addr.startsWith("fd")) return true;
75
+ if (addr.startsWith("ff")) return true;
76
+ if (addr.startsWith("2001:db8")) return true;
77
+ return false;
78
+ }
79
+ function isPrivateAddress(ip) {
80
+ const v = isIP(ip);
81
+ if (v === 4) return isPrivateIPv4(ip);
82
+ if (v === 6) return isPrivateIPv6(ip);
83
+ return false;
84
+ }
85
+ async function assertPublicHttpUrl(raw) {
86
+ let url;
87
+ try {
88
+ url = new URL(raw);
89
+ } catch {
90
+ throw new UnsafeTargetUrlError(`invalid target URL: ${raw}`);
91
+ }
92
+ if (url.protocol !== "https:" && url.protocol !== "http:") {
93
+ throw new UnsafeTargetUrlError(`unsupported scheme "${url.protocol}" \u2014 only http(s) MCP targets are allowed`);
94
+ }
95
+ const host = url.hostname.replace(/^\[|\]$/g, "");
96
+ const addresses = [];
97
+ if (isIP(host)) {
98
+ addresses.push(host);
99
+ } else {
100
+ try {
101
+ const records = await lookup(host, { all: true });
102
+ addresses.push(...records.map((r) => r.address));
103
+ } catch {
104
+ throw new UnsafeTargetUrlError(`could not resolve target host: ${host}`);
105
+ }
106
+ }
107
+ if (addresses.length === 0) {
108
+ throw new UnsafeTargetUrlError(`target host did not resolve: ${host}`);
109
+ }
110
+ const privateHit = addresses.find((a) => isPrivateAddress(a));
111
+ if (privateHit && !allowPrivate()) {
112
+ throw new UnsafeTargetUrlError(
113
+ `target ${host} resolves to a private/reserved address (${privateHit}); refusing (set POLYGRAPH_ALLOW_PRIVATE_TARGETS=1 for local dev)`
114
+ );
115
+ }
116
+ if (url.protocol === "http:" && !addresses.every((a) => isPrivateAddress(a)) && !allowPrivate()) {
117
+ throw new UnsafeTargetUrlError(`plaintext http to a public host is not allowed: ${host} (use https)`);
118
+ }
119
+ }
120
+
121
+ // ../probes/src/connect/container.ts
122
+ import { randomUUID as randomUUID2 } from "crypto";
123
+
124
+ // ../probes/src/docker/staging.ts
125
+ import { execFile } from "child_process";
126
+ import { randomUUID } from "crypto";
127
+ import { fileURLToPath } from "url";
128
+ import { existsSync } from "fs";
129
+ import * as path from "path";
130
+ var IMAGE_TAG = "polygraph-egress-sniff:latest";
131
+ var LABEL_KEY = "polygraph-litmus-run";
132
+ var DOCKER_DIR = resolveDockerDir();
133
+ function resolveDockerDir() {
134
+ const candidates = ["../../docker", "./docker", "../docker"];
135
+ for (const rel of candidates) {
136
+ const dir = fileURLToPath(new URL(rel, import.meta.url));
137
+ if (existsSync(path.join(dir, "egress-sniff.Dockerfile"))) return dir;
138
+ }
139
+ return fileURLToPath(new URL("../../docker", import.meta.url));
140
+ }
141
+ var RESOLVER_SCRIPT = `const p=require("path");const d="/stage/node_modules/"+process.argv[1];let j;try{j=require(d+'/package.json')}catch{}let entry=null;if(j){const b=j.bin;const r=typeof b==="string"?b:(b&&Object.values(b)[0]);if(r)entry=p.join(d,r);}const version=j&&j.version?j.version:null;process.stdout.write(JSON.stringify({entry,version}));`;
142
+ function labelFlags(runLabel) {
143
+ return runLabel ? ["--label", `${LABEL_KEY}=${runLabel}`] : [];
144
+ }
145
+ function volumeCreateArgs(vol, runLabel) {
146
+ return ["volume", "create", ...labelFlags(runLabel), vol];
147
+ }
148
+ function stageInstallArgs(vol, image, spec, runLabel, runtime) {
149
+ return [
150
+ "run",
151
+ "--rm",
152
+ "-v",
153
+ `${vol}:/stage`,
154
+ ...labelFlags(runLabel),
155
+ // gVisor parity: this container fetches + extracts the attacker's package and
156
+ // its full dependency tree (network on, as root), so it must run under the same
157
+ // runtime as the sandboxed run, not the default runc.
158
+ ...runtime ? ["--runtime", runtime] : [],
159
+ "--cap-drop=ALL",
160
+ "--security-opt",
161
+ "no-new-privileges",
162
+ "--pids-limit",
163
+ "256",
164
+ "--memory",
165
+ "1g",
166
+ "--entrypoint",
167
+ "npm",
168
+ image,
169
+ // `--` ends npm option parsing so a spec can never be read as a flag
170
+ // (defence-in-depth; parseServerRef already rejects "-"-leading segments).
171
+ "install",
172
+ "--prefix",
173
+ "/stage",
174
+ "--ignore-scripts",
175
+ "--no-audit",
176
+ "--no-fund",
177
+ "--loglevel",
178
+ "error",
179
+ "--",
180
+ spec
181
+ ];
182
+ }
183
+ function resolverRunArgs(vol, image, pkgName, runLabel, runtime) {
184
+ return [
185
+ "run",
186
+ "--rm",
187
+ "-v",
188
+ `${vol}:/stage`,
189
+ "--user",
190
+ "node",
191
+ "--network",
192
+ "none",
193
+ ...labelFlags(runLabel),
194
+ // gVisor parity: reads the attacker-controlled package.json — same runtime as the run step.
195
+ ...runtime ? ["--runtime", runtime] : [],
196
+ "--cap-drop=ALL",
197
+ "--security-opt",
198
+ "no-new-privileges",
199
+ "--pids-limit",
200
+ "256",
201
+ "--memory",
202
+ "512m",
203
+ "--entrypoint",
204
+ "node",
205
+ image,
206
+ "-e",
207
+ RESOLVER_SCRIPT,
208
+ pkgName
209
+ ];
210
+ }
211
+ function parseResolverOutput(output) {
212
+ try {
213
+ const rec = JSON.parse(output);
214
+ return {
215
+ entry: typeof rec.entry === "string" ? rec.entry : null,
216
+ version: typeof rec.version === "string" ? rec.version : null
217
+ };
218
+ } catch {
219
+ return { entry: null, version: null };
220
+ }
221
+ }
222
+ function buildImageArgs(pull) {
223
+ return [
224
+ "build",
225
+ ...pull ? ["--pull"] : [],
226
+ "-t",
227
+ IMAGE_TAG,
228
+ "-f",
229
+ path.join(DOCKER_DIR, "egress-sniff.Dockerfile"),
230
+ DOCKER_DIR
231
+ ];
232
+ }
233
+ async function ensureImage(dockerFn = docker) {
234
+ if (process.env.LITMUS_DOCKER_BUILD_PULL === "0") {
235
+ await dockerFn(buildImageArgs(false), 18e4);
236
+ return;
237
+ }
238
+ try {
239
+ await dockerFn(buildImageArgs(true), 18e4);
240
+ } catch {
241
+ process.stderr.write("docker build --pull failed; retrying with cached base image\n");
242
+ await dockerFn(buildImageArgs(false), 18e4);
243
+ }
244
+ }
245
+ async function stageInto(vol, image, spec, pkgName, opts) {
246
+ const cleanup = () => docker(["volume", "rm", "-f", vol]).then(() => {
247
+ }).catch(() => {
248
+ });
249
+ const runtime = opts.runtime ?? process.env.LITMUS_DOCKER_RUNTIME;
250
+ try {
251
+ await docker(stageInstallArgs(vol, image, spec, opts.runLabel, runtime), 18e4);
252
+ const resolved = parseResolverOutput((await docker(resolverRunArgs(vol, image, pkgName, opts.runLabel, runtime))).trim());
253
+ if (!resolved.entry) {
254
+ await cleanup();
255
+ throw new Error(
256
+ `target package ${pkgName} exposes no launchable bin under the sandbox policy (install scripts are skipped)`
257
+ );
258
+ }
259
+ return { volume: vol, entry: resolved.entry, resolvedVersion: resolved.version, cleanup };
260
+ } catch (err) {
261
+ await cleanup();
262
+ throw err;
263
+ }
264
+ }
265
+ async function stageNpmPackage(pkgSpec, opts = {}) {
266
+ const vol = `pg-stage-${randomUUID().slice(0, 8)}`;
267
+ await docker(volumeCreateArgs(vol, opts.runLabel));
268
+ const at = pkgSpec.lastIndexOf("@");
269
+ const pkgName = at > 0 ? pkgSpec.slice(0, at) : pkgSpec;
270
+ return stageInto(vol, IMAGE_TAG, pkgSpec, pkgName, opts);
271
+ }
272
+ function docker(args, timeoutMs = 9e4) {
273
+ return new Promise((resolve, reject) => {
274
+ execFile("docker", args, { timeout: timeoutMs, maxBuffer: 8 * 1024 * 1024 }, (err, stdout, stderr) => {
275
+ if (err) reject(new Error(`docker ${args[0]} failed: ${stderr || err.message}`));
276
+ else resolve(stdout);
277
+ });
278
+ });
279
+ }
280
+
281
+ // ../probes/src/connect/container.ts
282
+ var IMAGE_TAG2 = "polygraph-egress-sniff:latest";
283
+ var IsolationUnsupportedError = class extends Error {
284
+ constructor(message) {
285
+ super(message);
286
+ this.name = "IsolationUnsupportedError";
287
+ }
288
+ };
289
+ function assertSafeToken(value, what) {
290
+ if (/\s/.test(value)) throw new Error(`${what} must not contain whitespace: ${JSON.stringify(value)}`);
291
+ if (value.startsWith("-")) throw new Error(`${what} must not start with "-": ${JSON.stringify(value)}`);
292
+ if (value.length === 0) throw new Error(`${what} must not be empty`);
293
+ }
294
+ function containerLaunch(opts) {
295
+ assertSafeToken(opts.stageVolume, "stage volume");
296
+ assertSafeToken(opts.seedVolume, "seed volume");
297
+ assertSafeToken(opts.entry, "entry");
298
+ const envFlags = Object.entries(opts.canaryEnv).flatMap(([k, v]) => ["-e", `${k}=${v}`]);
299
+ const runtimeFlags = opts.runtime ? ["--runtime", opts.runtime] : [];
300
+ const args = [
301
+ "run",
302
+ "-i",
303
+ "--rm",
304
+ "--network",
305
+ "none",
306
+ "--user",
307
+ "node",
308
+ "--read-only",
309
+ "-v",
310
+ `${opts.stageVolume}:/stage:ro`,
311
+ "-v",
312
+ `${opts.seedVolume}:/work:ro`,
313
+ "-w",
314
+ "/work",
315
+ "--tmpfs",
316
+ "/tmp:rw,size=64m,mode=1777",
317
+ "--cap-drop=ALL",
318
+ "--security-opt",
319
+ "no-new-privileges",
320
+ "--pids-limit",
321
+ "256",
322
+ "--memory",
323
+ "512m",
324
+ "--cpus",
325
+ "1",
326
+ "--sysctl",
327
+ "net.ipv6.conf.all.disable_ipv6=1",
328
+ "--sysctl",
329
+ "net.ipv6.conf.default.disable_ipv6=1",
330
+ ...labelFlags(opts.runLabel),
331
+ ...envFlags,
332
+ ...runtimeFlags,
333
+ "--entrypoint",
334
+ "node",
335
+ IMAGE_TAG2,
336
+ opts.entry
337
+ ];
338
+ return { command: "docker", args };
339
+ }
340
+ function recordedContainerCommand(command, args, vols) {
341
+ const out = [command];
342
+ for (let i = 0; i < args.length; i += 1) {
343
+ if (args[i] === "-e") {
344
+ i += 1;
345
+ continue;
346
+ }
347
+ out.push(stabilizeToken(args[i], vols));
348
+ }
349
+ return out.join(" ");
350
+ }
351
+ function stabilizeToken(token, vols) {
352
+ return token.replace(vols.stageVolume, "<stage>").replace(vols.seedVolume, "<seed>");
353
+ }
354
+ async function prepareSeedVolume(seedDir, opts = {}) {
355
+ const vol = `pg-seed-${randomUUID2().slice(0, 8)}`;
356
+ const cleanup = () => docker(["volume", "rm", "-f", vol]).then(() => {
357
+ }).catch(() => {
358
+ });
359
+ await docker(["volume", "create", ...labelFlags(opts.runLabel), vol]);
360
+ const helper = `pg-seedcp-${randomUUID2().slice(0, 8)}`;
361
+ try {
362
+ try {
363
+ await docker([
364
+ "container",
365
+ "create",
366
+ "--name",
367
+ helper,
368
+ ...labelFlags(opts.runLabel),
369
+ "--entrypoint",
370
+ "true",
371
+ "-v",
372
+ `${vol}:/work`,
373
+ IMAGE_TAG2
374
+ ]);
375
+ await docker(["cp", `${seedDir}/.`, `${helper}:/work`]);
376
+ } finally {
377
+ await docker(["rm", "-f", helper]).catch(() => {
378
+ });
379
+ }
380
+ } catch (err) {
381
+ await cleanup();
382
+ throw err;
383
+ }
384
+ return { volume: vol, cleanup };
385
+ }
386
+
387
+ // ../probes/src/connect/version.ts
388
+ function isConcreteVersion(v) {
389
+ return /^\d+\.\d+\.\d+/.test(v);
390
+ }
391
+ function resolveStagedVersion(requested, staged) {
392
+ if (requested !== null && staged !== null && isConcreteVersion(requested) && staged !== requested) {
393
+ throw new Error(
394
+ `requested version ${requested} but the staged package resolved to ${staged}`
395
+ );
396
+ }
397
+ return staged;
398
+ }
399
+
400
+ // ../probes/src/connect/index.ts
401
+ import { randomUUID as randomUUID3 } from "crypto";
402
+ var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
403
+ async function connectTarget(input, opts = {}) {
404
+ const isolated = opts.isolation === "docker";
405
+ let kind;
406
+ let descriptor;
407
+ let serverRef;
408
+ let resolvedVersion = null;
409
+ let transport;
410
+ const teardownExtra = [];
411
+ if (typeof input !== "string") {
412
+ if (isolated) {
413
+ throw new IsolationUnsupportedError(
414
+ "docker isolation is unsupported for an explicit stdio command \u2014 only an npm ref can be containerized"
415
+ );
416
+ }
417
+ kind = "stdio";
418
+ transport = new StdioClientTransport({
419
+ command: input.command,
420
+ args: input.args ?? [],
421
+ env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
422
+ ...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
423
+ });
424
+ const cmdline = [input.command, ...input.args ?? []].join(" ");
425
+ descriptor = { kind, command: cmdline, url: null };
426
+ serverRef = input.serverRef ?? cmdline;
427
+ } else if (/^https?:\/\//i.test(input)) {
428
+ kind = "http";
429
+ await assertPublicHttpUrl(input);
430
+ const headers = opts.httpHeaders && Object.keys(opts.httpHeaders).length > 0 ? opts.httpHeaders : void 0;
431
+ transport = new StreamableHTTPClientTransport(
432
+ new URL(input),
433
+ headers ? { requestInit: { headers }, fetch: sameOriginAuthFetch(input, headers) } : void 0
434
+ );
435
+ descriptor = { kind, command: null, url: input };
436
+ serverRef = input;
437
+ } else {
438
+ const parsed = parseServerRef(input);
439
+ kind = "stdio";
440
+ if (isolated) {
441
+ if (parsed.registry !== "npm") {
442
+ throw new IsolationUnsupportedError(
443
+ `docker isolation is unsupported for ${parsed.registry} refs \u2014 only npm refs can be containerized`
444
+ );
445
+ }
446
+ const spec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
447
+ const stageOpts = opts.runLabel ? { runLabel: opts.runLabel } : {};
448
+ await ensureImage();
449
+ let staged = null;
450
+ let seed = null;
451
+ try {
452
+ staged = await stageNpmPackage(spec, stageOpts);
453
+ if (!opts.seedCwd) {
454
+ throw new Error("docker isolation requires a canary seed directory (seedCwd)");
455
+ }
456
+ seed = await prepareSeedVolume(opts.seedCwd, stageOpts);
457
+ const launch = containerLaunch({
458
+ entry: staged.entry,
459
+ stageVolume: staged.volume,
460
+ seedVolume: seed.volume,
461
+ // Canaries travel INTO the container via -e, NOT via the docker CLI's
462
+ // own env (the CLI runs on the secrets-bearing host).
463
+ canaryEnv: opts.seedEnv ?? {},
464
+ ...opts.runLabel ? { runLabel: opts.runLabel } : {},
465
+ ...process.env.LITMUS_DOCKER_RUNTIME ? { runtime: process.env.LITMUS_DOCKER_RUNTIME } : {}
466
+ });
467
+ const containerName = `pg-connect-${randomUUID3().slice(0, 8)}`;
468
+ const namedArgs = [launch.args[0], "--name", containerName, ...launch.args.slice(1)];
469
+ transport = new StdioClientTransport({
470
+ command: launch.command,
471
+ args: namedArgs,
472
+ // Default env only: no host secrets, no canaries (those are -e args).
473
+ env: getDefaultEnvironment()
474
+ });
475
+ descriptor = {
476
+ kind,
477
+ command: recordedContainerCommand(launch.command, launch.args, {
478
+ stageVolume: staged.volume,
479
+ seedVolume: seed.volume
480
+ }),
481
+ url: null
482
+ };
483
+ resolvedVersion = resolveStagedVersion(parsed.version, staged.resolvedVersion);
484
+ const stagedCleanup = staged.cleanup;
485
+ const seedCleanup = seed.cleanup;
486
+ teardownExtra.push(
487
+ () => docker(["rm", "-f", containerName]).then(() => {
488
+ }).catch(() => {
489
+ }),
490
+ stagedCleanup,
491
+ seedCleanup
492
+ );
493
+ } catch (err) {
494
+ if (seed) await seed.cleanup();
495
+ if (staged) await staged.cleanup();
496
+ throw err;
497
+ }
498
+ serverRef = serverKey(parsed);
499
+ } else {
500
+ const launch = launchForRef(parsed);
501
+ resolvedVersion = parsed.version ?? null;
502
+ transport = new StdioClientTransport({
503
+ command: launch.command,
504
+ args: launch.args,
505
+ env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
506
+ ...opts.seedCwd ? { cwd: opts.seedCwd } : {}
507
+ });
508
+ descriptor = { kind, command: [launch.command, ...launch.args].join(" "), url: null };
509
+ serverRef = serverKey(parsed);
510
+ }
511
+ }
512
+ const client = new Client(CLIENT_INFO, { capabilities: {} });
513
+ try {
514
+ await withConnectTimeout(client.connect(transport), transport);
515
+ } catch (err) {
516
+ for (const c of teardownExtra) await c();
517
+ throw err;
518
+ }
519
+ return {
520
+ client,
521
+ kind,
522
+ descriptor,
523
+ serverRef,
524
+ resolvedVersion,
525
+ teardown: async () => {
526
+ try {
527
+ await client.close();
528
+ } catch {
529
+ }
530
+ for (const c of teardownExtra) await c();
531
+ }
532
+ };
533
+ }
534
+ var CONNECT_TIMEOUT_MS = 3e4;
535
+ async function withConnectTimeout(connecting, transport) {
536
+ let timer;
537
+ const timeout = new Promise((_, reject) => {
538
+ timer = setTimeout(() => reject(new Error("MCP connect/initialize timed out")), CONNECT_TIMEOUT_MS);
539
+ timer.unref?.();
540
+ });
541
+ try {
542
+ await Promise.race([connecting, timeout]);
543
+ } catch (err) {
544
+ await transport.close().catch(() => {
545
+ });
546
+ throw err;
547
+ } finally {
548
+ clearTimeout(timer);
549
+ }
550
+ }
551
+ function launchForRef(p) {
552
+ if (p.registry === "npm") {
553
+ const spec = (p.owner ? `${p.owner}/${p.name}` : p.name) + (p.version ? `@${p.version}` : "");
554
+ return { command: "npx", args: ["-y", spec] };
555
+ }
556
+ if (p.registry === "pypi") {
557
+ return { command: "uvx", args: [p.version ? `${p.name}@${p.version}` : p.name] };
558
+ }
559
+ throw new Error(
560
+ `registry "${p.registry}" is not launchable over stdio (only npm/pypi). Use an https:// URL for a remote MCP server.`
561
+ );
562
+ }
563
+
564
+ // ../probes/src/fingerprint.ts
565
+ import { createHash } from "crypto";
566
+ function fingerprintToolDefs(tools) {
567
+ const canonical = tools.map((t) => ({
568
+ name: t.name,
569
+ description: normalizeWhitespace(t.description ?? ""),
570
+ inputSchema: sortKeysDeep(t.inputSchema ?? null)
571
+ })).sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0);
572
+ const json = JSON.stringify(canonical);
573
+ const hash = createHash("sha256").update(json, "utf8").digest("hex");
574
+ return { fingerprint: "0x" + hash, canonical };
575
+ }
576
+ function normalizeWhitespace(s) {
577
+ return s.replace(/[ \t\r\n\f\v]+/g, " ").trim();
578
+ }
579
+ var MAX_FINGERPRINT_DEPTH = 100;
580
+ function sortKeysDeep(value, depth = 0) {
581
+ if (depth > MAX_FINGERPRINT_DEPTH) {
582
+ throw new RangeError(`tool schema nesting exceeds ${MAX_FINGERPRINT_DEPTH} levels`);
583
+ }
584
+ if (Array.isArray(value)) return value.map((v) => sortKeysDeep(v, depth + 1));
585
+ if (value && typeof value === "object") {
586
+ const src = value;
587
+ const out = {};
588
+ for (const k of Object.keys(src).sort()) {
589
+ Object.defineProperty(out, k, {
590
+ value: sortKeysDeep(src[k], depth + 1),
591
+ enumerable: true,
592
+ writable: true,
593
+ configurable: true
594
+ });
595
+ }
596
+ return out;
597
+ }
598
+ return value;
599
+ }
600
+
601
+ // ../probes/src/probes/exercise.ts
602
+ var BAIT_POOL = [
603
+ "polygraph litmus bait input",
604
+ "summarize the following text for me",
605
+ "please process and echo this content back",
606
+ "here is some material \u2014 what do you make of it?"
607
+ ];
608
+ function buildBaitArgs(schema, bait = BAIT_POOL[0]) {
609
+ const args = {};
610
+ if (!schema || typeof schema !== "object") return args;
611
+ const s = schema;
612
+ const props = s.properties && typeof s.properties === "object" ? s.properties : {};
613
+ const required = Array.isArray(s.required) ? s.required : Object.keys(props);
614
+ for (const key of required) args[key] = baitValue(props[key], bait);
615
+ return args;
616
+ }
617
+ function baitValue(prop, bait) {
618
+ const p = prop ?? {};
619
+ if (Array.isArray(p.enum) && p.enum.length > 0) return p.enum[0];
620
+ const type = Array.isArray(p.type) ? p.type[0] : p.type;
621
+ switch (type) {
622
+ case "number":
623
+ case "integer":
624
+ return 1;
625
+ case "boolean":
626
+ return true;
627
+ case "array":
628
+ return [];
629
+ case "object":
630
+ return {};
631
+ default:
632
+ return bait;
633
+ }
634
+ }
635
+ var MAX_RESULT_CHARS = 256 * 1024;
636
+ function capResultText(s) {
637
+ return s.length > MAX_RESULT_CHARS ? s.slice(0, MAX_RESULT_CHARS) : s;
638
+ }
639
+ function stringifyResult(result) {
640
+ if (!result || typeof result !== "object") return capResultText(String(result ?? ""));
641
+ const r = result;
642
+ const parts = [];
643
+ if (Array.isArray(r.content)) {
644
+ for (const c of r.content) {
645
+ if (c && typeof c === "object" && "text" in c && typeof c.text === "string") {
646
+ parts.push(c.text);
647
+ }
648
+ }
649
+ }
650
+ try {
651
+ parts.push(JSON.stringify(result));
652
+ } catch {
653
+ }
654
+ return capResultText(parts.join("\n"));
655
+ }
656
+ var CALL_TIMEOUT_MS = 15e3;
657
+ var TIMEOUT = /* @__PURE__ */ Symbol("timeout");
658
+ async function exerciseTool(client, tool, bait = BAIT_POOL[0], timeoutMs = CALL_TIMEOUT_MS) {
659
+ try {
660
+ const call = client.callTool({ name: tool.name, arguments: buildBaitArgs(tool.inputSchema, bait) });
661
+ const raced = await Promise.race([
662
+ call,
663
+ new Promise((resolve) => {
664
+ const t = setTimeout(() => resolve(TIMEOUT), timeoutMs);
665
+ t.unref?.();
666
+ })
667
+ ]);
668
+ if (raced === TIMEOUT) return { ok: false, reason: "timeout" };
669
+ return { ok: true, text: stringifyResult(raced) };
670
+ } catch {
671
+ return { ok: false, reason: "error" };
672
+ }
673
+ }
674
+
675
+ // ../probes/src/probes/tool-safety.ts
676
+ var STATE_CHANGING_VERBS = /* @__PURE__ */ new Set([
677
+ "send",
678
+ "transfer",
679
+ "swap",
680
+ "sign",
681
+ "pay",
682
+ "buy",
683
+ "sell",
684
+ "trade",
685
+ "approve",
686
+ "withdraw",
687
+ "deposit",
688
+ "mint",
689
+ "burn",
690
+ "execute",
691
+ "deploy",
692
+ "delete",
693
+ "remove",
694
+ "drop",
695
+ "write",
696
+ "create",
697
+ "update",
698
+ "insert",
699
+ "revoke",
700
+ "grant",
701
+ "move",
702
+ "rename",
703
+ "purchase",
704
+ "checkout",
705
+ "order"
706
+ ]);
707
+ var UNAMBIGUOUS_DESTRUCTIVE_VERBS = /* @__PURE__ */ new Set([
708
+ "delete",
709
+ "drop",
710
+ "transfer",
711
+ "send",
712
+ "withdraw",
713
+ "pay",
714
+ "sign",
715
+ "burn",
716
+ "revoke"
717
+ ]);
718
+ function tokenize(name) {
719
+ return name.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[^a-zA-Z0-9]+/).filter(Boolean).map((t) => t.toLowerCase());
720
+ }
721
+ function classifyTool(tool) {
722
+ const ann = tool.annotations ?? void 0;
723
+ if (ann?.readOnlyHint === true) return { stateChanging: false };
724
+ if (ann?.destructiveHint === true) return { stateChanging: true, reason: "annotated destructiveHint" };
725
+ if (ann?.readOnlyHint === false) return { stateChanging: true, reason: "annotated readOnlyHint:false" };
726
+ const verb = tokenize(tool.name).find((t) => STATE_CHANGING_VERBS.has(t));
727
+ if (verb) return { stateChanging: true, reason: `name token "${verb}" is state-changing` };
728
+ return { stateChanging: false };
729
+ }
730
+ function declarationMismatch(tool) {
731
+ if (tool.annotations?.readOnlyHint !== true) return null;
732
+ return tokenize(tool.name).find((t) => UNAMBIGUOUS_DESTRUCTIVE_VERBS.has(t)) ?? null;
733
+ }
734
+ function stateChangingToolNames(tools) {
735
+ const names = /* @__PURE__ */ new Set();
736
+ for (const t of tools) {
737
+ if (classifyTool(t).stateChanging) names.add(t.name);
738
+ }
739
+ return names;
740
+ }
741
+ function skippedNote(skipped) {
742
+ return `${skipped.length} tool(s) skipped (state-changing; pass --allow-state-changing): ${skipped.join(", ")}`;
743
+ }
744
+ function shouldSkipExercise(ctx, toolName) {
745
+ return !ctx.allowStateChanging && ctx.stateChangingTools.has(toolName);
746
+ }
747
+
748
+ // ../probes/src/probes/scanners.ts
749
+ function isInvisible(cp) {
750
+ return cp >= 8203 && cp <= 8205 || // zero-width space/non-joiner/joiner
751
+ cp === 65279 || // zero-width no-break space (BOM)
752
+ cp >= 8234 && cp <= 8238 || // bidi embedding/override
753
+ cp >= 8294 && cp <= 8297 || // bidi isolates
754
+ cp >= 917504 && cp <= 917631;
755
+ }
756
+ function invisibleUnicode(text) {
757
+ const findings = [];
758
+ let offset = 0;
759
+ for (const ch of text) {
760
+ const cp = ch.codePointAt(0) ?? 0;
761
+ if (isInvisible(cp)) {
762
+ findings.push({
763
+ kind: "invisible-unicode",
764
+ severity: "high",
765
+ match: `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`,
766
+ offset
767
+ });
768
+ }
769
+ offset += ch.length;
770
+ }
771
+ return findings;
772
+ }
773
+ var MIMICRY = [
774
+ { re: /\bignore\s+(?:all\s+|the\s+|your\s+|any\s+)?(?:previous|prior|above|earlier)\s+(?:instructions?|prompts?|messages?)\b/i, severity: "high" },
775
+ { re: /\bdisregard\s+(?:all\s+|the\s+|your\s+)?(?:previous|prior|above)\b/i, severity: "high" },
776
+ { re: /\b(?:new|updated|revised)\s+(?:instructions?|directives?|system\s+prompt)\b/i, severity: "high" },
777
+ { re: /(?:^|[\s>])system\s*:/i, severity: "high" },
778
+ { re: /<\/?(?:system|assistant|user|tool)\b[^>]*>/i, severity: "high" },
779
+ { re: /"(?:tool_call|function_call|tool_name|function)"\s*:/i, severity: "high" },
780
+ { re: /\byou\s+(?:must|should|shall|will|need\s+to|have\s+to)\s+\w/i, severity: "medium" }
781
+ ];
782
+ function instructionMimicry(text) {
783
+ const findings = [];
784
+ for (const { re, severity } of MIMICRY) {
785
+ const m = re.exec(text);
786
+ if (m) {
787
+ findings.push({
788
+ kind: "instruction-mimicry",
789
+ severity,
790
+ match: m[0].slice(0, 120),
791
+ offset: m.index
792
+ });
793
+ }
794
+ }
795
+ return findings;
796
+ }
797
+ function markdownTricks(text) {
798
+ const findings = [];
799
+ const proto = /\b(?:javascript|data):[^\s)"'<>]+/gi;
800
+ for (let m = proto.exec(text); m; m = proto.exec(text)) {
801
+ findings.push({
802
+ kind: "markdown-trick",
803
+ severity: "high",
804
+ match: m[0].slice(0, 120),
805
+ offset: m.index
806
+ });
807
+ }
808
+ const exfilImg = /!?\[[^\]]*\]\((https?:\/\/[^)\s]*\?[^)\s]*=[^)\s]*)\)/gi;
809
+ for (let m = exfilImg.exec(text); m; m = exfilImg.exec(text)) {
810
+ findings.push({
811
+ kind: "markdown-trick",
812
+ severity: "medium",
813
+ match: (m[1] ?? m[0]).slice(0, 120),
814
+ offset: m.index
815
+ });
816
+ }
817
+ return findings;
818
+ }
819
+ function canaryMatch(text, canaries) {
820
+ const findings = [];
821
+ const lower = text.toLowerCase();
822
+ const views = obfuscatedViews(text);
823
+ for (const c of canaries) {
824
+ if (!c) continue;
825
+ const exact = text.indexOf(c);
826
+ if (exact >= 0) {
827
+ findings.push({ kind: "canary", severity: "high", match: c, offset: exact });
828
+ continue;
829
+ }
830
+ const ci = lower.indexOf(c.toLowerCase());
831
+ if (ci >= 0) {
832
+ findings.push({ kind: "canary", severity: "high", match: c, offset: ci });
833
+ continue;
834
+ }
835
+ const lc = c.toLowerCase();
836
+ const view = views.find((v) => v.text.includes(c) || v.text.toLowerCase().includes(lc));
837
+ if (view) findings.push({ kind: "canary", severity: "high", match: `${c} (${view.label})` });
838
+ }
839
+ return findings;
840
+ }
841
+ function obfuscatedViews(text) {
842
+ const views = [
843
+ { label: "whitespace-stripped", text: text.replace(/\s+/g, "") }
844
+ ];
845
+ try {
846
+ views.push({ label: "url-decoded", text: decodeURIComponent(text) });
847
+ } catch {
848
+ }
849
+ for (const m of text.matchAll(/[A-Za-z0-9+/]{16,}={0,2}/g)) {
850
+ const d = decodeBuf(m[0], "base64");
851
+ if (d) views.push({ label: "base64-decoded", text: d });
852
+ }
853
+ for (const m of text.matchAll(/[0-9a-fA-F]{32,}/g)) {
854
+ if (m[0].length % 2 === 0) {
855
+ const d = decodeBuf(m[0], "hex");
856
+ if (d) views.push({ label: "hex-decoded", text: d });
857
+ }
858
+ }
859
+ return views;
860
+ }
861
+ function decodeBuf(s, enc) {
862
+ try {
863
+ const d = Buffer.from(s, enc).toString("utf8");
864
+ return /[\x20-\x7e]/.test(d) ? d : null;
865
+ } catch {
866
+ return null;
867
+ }
868
+ }
869
+ function hasHighSeverity(findings) {
870
+ return findings.some((f) => f.severity === "high");
871
+ }
872
+
873
+ // ../probes/src/probes/c01-injection.ts
874
+ function scanInjection(text, tool) {
875
+ return [...invisibleUnicode(text), ...instructionMimicry(text), ...markdownTricks(text)].map((f) => ({
876
+ ...f,
877
+ tool
878
+ }));
879
+ }
880
+ function probe11(ctx) {
881
+ const findings = [];
882
+ for (const t of ctx.tools) {
883
+ const surface = `${t.name}
884
+ ${t.description}
885
+ ${JSON.stringify(t.inputSchema ?? {})}`;
886
+ findings.push(...scanInjection(surface, t.name));
887
+ }
888
+ return { id: "1.1", status: hasHighSeverity(findings) ? "fail" : "pass", findings };
889
+ }
890
+ async function probe12(ctx) {
891
+ const findings = [];
892
+ let exercised = 0;
893
+ const unexercised = [];
894
+ const skipped = [];
895
+ for (const t of ctx.tools) {
896
+ if (shouldSkipExercise(ctx, t.name)) {
897
+ skipped.push(t.name);
898
+ continue;
899
+ }
900
+ let answered = false;
901
+ for (const bait of BAIT_POOL) {
902
+ const out = await exerciseTool(ctx.client, t, bait);
903
+ if (!out.ok) continue;
904
+ answered = true;
905
+ findings.push(...scanInjection(out.text, t.name));
906
+ }
907
+ if (answered) exercised++;
908
+ else unexercised.push(t.name);
909
+ }
910
+ return {
911
+ id: "1.2",
912
+ status: hasHighSeverity(findings) ? "fail" : "pass",
913
+ findings,
914
+ reason: exerciseReason(exercised, unexercised, skipped)
915
+ };
916
+ }
917
+ function exerciseReason(exercised, unexercised, skipped) {
918
+ const notes = [];
919
+ if (exercised === 0 && skipped.length === 0) notes.push("no tools could be exercised with bait inputs");
920
+ if (skipped.length) notes.push(skippedNote(skipped));
921
+ if (unexercised.length) notes.push(`${unexercised.length} tool(s) errored/timed out on bait (unevaluated): ${unexercised.join(", ")}`);
922
+ return notes.length ? notes.join("; ") : null;
923
+ }
924
+ async function c01Injection(ctx) {
925
+ const probes = [probe11(ctx), await probe12(ctx)];
926
+ const status = probes.some((p) => p.status === "fail") ? "fail" : "pass";
927
+ return { code: "C-01", status, probes };
928
+ }
929
+
930
+ // ../probes/src/docker/egress-runner.ts
931
+ import { randomUUID as randomUUID4 } from "crypto";
932
+ var IMAGE_TAG3 = "polygraph-egress-sniff:latest";
933
+ function notRan(reason) {
934
+ return { ran: false, reason, attempts: [] };
935
+ }
936
+ function parseSinkholeOutput(output) {
937
+ const attempts = [];
938
+ for (const line of output.split("\n")) {
939
+ const m = line.match(/^EGRESS\s+(\{.*\})\s*$/);
940
+ if (!m) continue;
941
+ try {
942
+ const rec = JSON.parse(m[1]);
943
+ if (rec.kind === "tcp" || rec.kind === "dns") {
944
+ attempts.push({
945
+ kind: rec.kind,
946
+ ...rec.host !== void 0 ? { host: rec.host } : {},
947
+ ...rec.port !== void 0 ? { port: rec.port } : {},
948
+ ...rec.firstBytes !== void 0 ? { firstBytes: rec.firstBytes } : {}
949
+ });
950
+ }
951
+ } catch {
952
+ }
953
+ }
954
+ return attempts;
955
+ }
956
+ function egressToFindings(attempts) {
957
+ return attempts.map((a) => ({
958
+ kind: "egress",
959
+ severity: "high",
960
+ match: a.host ? `${a.host}${a.port ? `:${a.port}` : ""}` : `port ${a.port ?? "?"}`,
961
+ ...a.host !== void 0 ? { host: a.host } : {},
962
+ ...a.port !== void 0 ? { port: a.port } : {},
963
+ ...a.firstBytes !== void 0 ? { firstBytes: a.firstBytes } : {}
964
+ }));
965
+ }
966
+ function egressCanaryFindings(attempts, canaries) {
967
+ const findings = [];
968
+ for (const a of attempts) {
969
+ if (a.firstBytes) findings.push(...canaryMatch(a.firstBytes, canaries));
970
+ if (a.host) findings.push(...canaryMatch(a.host, canaries));
971
+ }
972
+ return findings;
973
+ }
974
+ function egressTargetArgs(opts) {
975
+ const envFlags = Object.entries(opts.canaryEnv).flatMap(([k, v]) => ["-e", `${k}=${v}`]);
976
+ const runtimeFlags = opts.runtime ? ["--runtime", opts.runtime] : [];
977
+ return [
978
+ "run",
979
+ "-i",
980
+ "--rm",
981
+ "--name",
982
+ opts.targetName,
983
+ "--network",
984
+ opts.net,
985
+ "--dns",
986
+ opts.sinkIp,
987
+ "-v",
988
+ `${opts.vol}:/stage:ro`,
989
+ "--user",
990
+ "node",
991
+ "--read-only",
992
+ "--tmpfs",
993
+ "/tmp:rw,size=64m,mode=1777",
994
+ "--cap-drop=ALL",
995
+ // Disable IPv6 in the target: the sinkhole/iptables capture is IPv4-only, so
996
+ // an IPv6 socket would otherwise dodge detection (and, on a dual-stack net,
997
+ // egress). --cpus bounds host CPU starvation by a hostile busy-loop.
998
+ "--sysctl",
999
+ "net.ipv6.conf.all.disable_ipv6=1",
1000
+ "--sysctl",
1001
+ "net.ipv6.conf.default.disable_ipv6=1",
1002
+ "--cpus",
1003
+ "1",
1004
+ "--security-opt",
1005
+ "no-new-privileges",
1006
+ "--pids-limit",
1007
+ "256",
1008
+ "--memory",
1009
+ "512m",
1010
+ ...opts.label,
1011
+ ...envFlags,
1012
+ ...runtimeFlags,
1013
+ "--entrypoint",
1014
+ "node",
1015
+ IMAGE_TAG3,
1016
+ opts.entry
1017
+ ];
1018
+ }
1019
+ async function runEgressProbe(ref, opts) {
1020
+ let parsed;
1021
+ try {
1022
+ parsed = parseServerRef(ref);
1023
+ } catch {
1024
+ return notRan("egress sandbox only runs launchable package refs (npm)");
1025
+ }
1026
+ if (parsed.registry !== "npm") {
1027
+ return notRan(`egress sandbox for ${parsed.registry} targets not implemented (npm only)`);
1028
+ }
1029
+ const pkgSpec = (parsed.owner ? `${parsed.owner}/${parsed.name}` : parsed.name) + (parsed.version ? `@${parsed.version}` : "");
1030
+ const net = `pg-egress-${randomUUID4().slice(0, 8)}`;
1031
+ const sink = `pg-sink-${randomUUID4().slice(0, 8)}`;
1032
+ const targetName = `pg-target-${randomUUID4().slice(0, 8)}`;
1033
+ const label = labelFlags(opts.runLabel);
1034
+ let staged = null;
1035
+ try {
1036
+ await ensureImage();
1037
+ try {
1038
+ staged = await stageNpmPackage(pkgSpec, opts.runLabel ? { runLabel: opts.runLabel } : {});
1039
+ } catch (err) {
1040
+ const msg = err instanceof Error ? err.message : String(err);
1041
+ if (msg.includes("exposes no launchable bin")) return notRan(msg);
1042
+ throw err;
1043
+ }
1044
+ const { volume: vol, entry } = staged;
1045
+ await docker(["network", "create", "--internal", ...label, net]);
1046
+ await docker([
1047
+ "run",
1048
+ "-d",
1049
+ "--name",
1050
+ sink,
1051
+ "--network",
1052
+ net,
1053
+ ...label,
1054
+ "--cap-add=NET_ADMIN",
1055
+ "--pids-limit",
1056
+ "64",
1057
+ "--memory",
1058
+ "256m",
1059
+ "--entrypoint",
1060
+ "/sink-entrypoint.sh",
1061
+ IMAGE_TAG3
1062
+ ]);
1063
+ const sinkIp = (await docker(["inspect", "-f", `{{(index .NetworkSettings.Networks "${net}").IPAddress}}`, sink])).trim();
1064
+ const targetArgs = egressTargetArgs({
1065
+ targetName,
1066
+ net,
1067
+ sinkIp,
1068
+ vol,
1069
+ entry,
1070
+ canaryEnv: opts.canaryEnv,
1071
+ label,
1072
+ ...process.env.LITMUS_DOCKER_RUNTIME ? { runtime: process.env.LITMUS_DOCKER_RUNTIME } : {}
1073
+ });
1074
+ const conn = await connectTarget({ command: "docker", args: targetArgs, serverRef: `npm/${pkgSpec}` });
1075
+ try {
1076
+ const { tools } = await conn.client.listTools();
1077
+ for (const t of tools) {
1078
+ await exerciseTool(conn.client, { name: t.name, description: t.description ?? "", inputSchema: t.inputSchema ?? null });
1079
+ }
1080
+ } finally {
1081
+ await conn.teardown();
1082
+ }
1083
+ const logs = await docker(["logs", sink]);
1084
+ return { ran: true, reason: null, attempts: parseSinkholeOutput(logs) };
1085
+ } catch (err) {
1086
+ return notRan(`egress sandbox unavailable: ${err instanceof Error ? err.message : String(err)}`);
1087
+ } finally {
1088
+ await docker(["rm", "-f", targetName]).catch(() => {
1089
+ });
1090
+ await docker(["rm", "-f", sink]).catch(() => {
1091
+ });
1092
+ await docker(["network", "rm", net]).catch(() => {
1093
+ });
1094
+ if (staged) await staged.cleanup();
1095
+ }
1096
+ }
1097
+
1098
+ // ../probes/src/probes/c02-egress.ts
1099
+ function probe21Declaration(tools) {
1100
+ const findings = [];
1101
+ for (const t of tools) {
1102
+ const verb = declarationMismatch(t);
1103
+ if (verb) {
1104
+ findings.push({
1105
+ kind: "permission-mislabel",
1106
+ severity: "high",
1107
+ match: `claims readOnlyHint:true but name verb "${verb}" mutates`,
1108
+ tool: t.name
1109
+ });
1110
+ }
1111
+ }
1112
+ return { id: "2.1", status: findings.length > 0 ? "fail" : "pass", findings };
1113
+ }
1114
+ function probe22Egress(egress) {
1115
+ if (!egress.ran) return { id: "2.2", status: "skipped", findings: [], reason: egress.reason };
1116
+ const findings = egressToFindings(egress.attempts);
1117
+ return { id: "2.2", status: findings.length > 0 ? "fail" : "pass", findings };
1118
+ }
1119
+ function c02Permission(declaration, egress) {
1120
+ const egressProbe = probe22Egress(egress);
1121
+ const probes = [declaration, egressProbe];
1122
+ let status;
1123
+ if (declaration.status === "fail" || egressProbe.status === "fail") {
1124
+ status = "fail";
1125
+ } else if (egressProbe.status === "skipped") {
1126
+ return { code: "C-02", status: "skipped", reason: egress.reason, probes };
1127
+ } else {
1128
+ status = "pass";
1129
+ }
1130
+ return { code: "C-02", status, probes };
1131
+ }
1132
+
1133
+ // ../probes/src/probes/c03-sensitive.ts
1134
+ async function probe41(ctx) {
1135
+ const findings = [];
1136
+ let exercised = 0;
1137
+ const unexercised = [];
1138
+ const skipped = [];
1139
+ for (const t of ctx.tools) {
1140
+ if (shouldSkipExercise(ctx, t.name)) {
1141
+ skipped.push(t.name);
1142
+ continue;
1143
+ }
1144
+ const out = await exerciseTool(ctx.client, t);
1145
+ if (!out.ok) {
1146
+ unexercised.push(t.name);
1147
+ continue;
1148
+ }
1149
+ exercised++;
1150
+ findings.push(...canaryMatch(out.text, ctx.canaries).map((f) => ({ ...f, tool: t.name })));
1151
+ }
1152
+ const notes = [];
1153
+ if (exercised === 0 && skipped.length === 0) notes.push("no tools could be exercised");
1154
+ if (skipped.length) notes.push(skippedNote(skipped));
1155
+ if (unexercised.length) notes.push(`${unexercised.length} tool(s) errored/timed out (unevaluated): ${unexercised.join(", ")}`);
1156
+ return { id: "4.1", status: findings.length > 0 ? "fail" : "pass", findings, reason: notes.length ? notes.join("; ") : null };
1157
+ }
1158
+ function probe42(ctx, egress) {
1159
+ if (!egress.ran) {
1160
+ return {
1161
+ id: "4.2",
1162
+ status: "partial",
1163
+ findings: [],
1164
+ reason: egress.reason ?? "no egress capture (sandbox unavailable)"
1165
+ };
1166
+ }
1167
+ const findings = egressCanaryFindings(egress.attempts, ctx.canaries);
1168
+ return { id: "4.2", status: findings.length > 0 ? "fail" : "pass", findings };
1169
+ }
1170
+ async function c03Sensitive(ctx, egress) {
1171
+ const p41 = await probe41(ctx);
1172
+ const p42 = probe42(ctx, egress);
1173
+ const status = p41.status === "fail" || p42.status === "fail" ? "fail" : "pass";
1174
+ return { code: "C-03", status, probes: [p41, p42] };
1175
+ }
1176
+
1177
+ // ../probes/src/probes/canaries.ts
1178
+ import { randomUUID as randomUUID5 } from "crypto";
1179
+ import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "fs";
1180
+ import { tmpdir } from "os";
1181
+ import { dirname, join as join2 } from "path";
1182
+ function mintCanaries() {
1183
+ const id = randomUUID5();
1184
+ const short = id.replace(/-/g, "").slice(0, 16);
1185
+ const apiKey = `POLYGRAPH-CANARY-${id}`;
1186
+ const token = `pgt_${short}`;
1187
+ const pii = `canary.${short}@polygraph.invalid`;
1188
+ return { apiKey, token, pii, all: [apiKey, token, pii] };
1189
+ }
1190
+ function canaryEnv(c) {
1191
+ return {
1192
+ OPENAI_API_KEY: c.apiKey,
1193
+ ANTHROPIC_API_KEY: c.apiKey,
1194
+ GITHUB_TOKEN: c.token,
1195
+ AWS_SECRET_ACCESS_KEY: c.token,
1196
+ DATABASE_URL: `postgres://canary:${c.token}@db.internal:5432/app`,
1197
+ USER_EMAIL: c.pii
1198
+ };
1199
+ }
1200
+ function canaryFiles(c) {
1201
+ return {
1202
+ ".env": `OPENAI_API_KEY=${c.apiKey}
1203
+ DATABASE_URL=postgres://canary:${c.token}@db.internal:5432/app
1204
+ USER_EMAIL=${c.pii}
1205
+ `,
1206
+ ".aws/credentials": `[default]
1207
+ aws_access_key_id=${c.apiKey}
1208
+ aws_secret_access_key=${c.token}
1209
+ `,
1210
+ "config/secrets.json": JSON.stringify({ apiKey: c.apiKey, token: c.token, email: c.pii }, null, 2) + "\n"
1211
+ };
1212
+ }
1213
+ function seedCanaryDir(c) {
1214
+ const dir = mkdtempSync(join2(tmpdir(), "polygraph-litmus-"));
1215
+ for (const [rel, contents] of Object.entries(canaryFiles(c))) {
1216
+ const full = join2(dir, rel);
1217
+ mkdirSync(dirname(full), { recursive: true });
1218
+ writeFileSync(full, contents, "utf8");
1219
+ }
1220
+ return {
1221
+ dir,
1222
+ cleanup: () => {
1223
+ try {
1224
+ rmSync(dir, { recursive: true, force: true });
1225
+ } catch {
1226
+ }
1227
+ }
1228
+ };
1229
+ }
1230
+
1231
+ // ../probes/src/grade.ts
1232
+ function gradeFromCategories(categories) {
1233
+ const byCode = (code) => categories.find((c) => c.code === code);
1234
+ const c01 = byCode("C-01");
1235
+ const c02 = byCode("C-02");
1236
+ const c03 = byCode("C-03");
1237
+ const failed = categories.filter((c) => c.status === "fail").map((c) => c.code);
1238
+ const skipped = categories.filter((c) => c.status === "skipped").map((c) => c.code);
1239
+ if (c01?.status === "fail" || c03?.status === "fail") {
1240
+ return {
1241
+ grade: "F",
1242
+ rationale: `Disqualifying failure in ${failed.join(", ")} \u2014 active injection or data leak harms an agent that trusts this server.`
1243
+ };
1244
+ }
1245
+ if (c02?.status === "fail") {
1246
+ return {
1247
+ grade: "D",
1248
+ rationale: "Unexpected egress (C-02 failed); no injection or data leak. Egress is serious but not proven exfiltration, so the grade caps at D."
1249
+ };
1250
+ }
1251
+ if (c01?.status === "pass" && c02?.status === "pass" && c03?.status === "pass") {
1252
+ return { grade: "A", rationale: "All three categories passed. No injection, no unexpected egress, no data leak." };
1253
+ }
1254
+ if (c01?.status === "pass") {
1255
+ const note = skipped.length ? ` Not verified: ${skipped.join(", ")} (${skipped.map((c) => byCode(c)?.reason).filter(Boolean).join("; ")}).` : "";
1256
+ return {
1257
+ grade: "B",
1258
+ rationale: `Injection checks passed; egress not verified.${note}`
1259
+ };
1260
+ }
1261
+ return {
1262
+ grade: "F",
1263
+ rationale: "C-01 did not complete \u2014 the tool surface could not be evaluated, so the server is treated as ungraded/unsafe."
1264
+ };
1265
+ }
1266
+
1267
+ // ../probes/src/bundle.ts
1268
+ import { createRequire } from "module";
1269
+ var require2 = createRequire(import.meta.url);
1270
+ var PKG_VERSION = (() => {
1271
+ try {
1272
+ return require2("../package.json").version ?? "0.0.0";
1273
+ } catch {
1274
+ return "0.0.0";
1275
+ }
1276
+ })();
1277
+ var DISCLAIMER = `Self-run, self-minted under ${METHODOLOGY_VERSION}. Independence traded for cost. Re-run the open harness to verify.`;
1278
+ function assembleBundle(input) {
1279
+ const harness = {
1280
+ package: "@polygraph/probes",
1281
+ version: PKG_VERSION,
1282
+ node: process.version,
1283
+ dockerAvailable: input.dockerAvailable,
1284
+ ...input.stdioIsolation ? { stdioIsolation: input.stdioIsolation } : {}
1285
+ };
1286
+ return {
1287
+ schemaVersion: BUNDLE_SCHEMA_VERSION,
1288
+ methodologyVersion: METHODOLOGY_VERSION,
1289
+ serverRef: input.serverRef,
1290
+ resolvedVersion: input.resolvedVersion,
1291
+ target: input.target,
1292
+ toolDefsFingerprint: input.toolDefsFingerprint,
1293
+ toolDefs: input.toolDefs,
1294
+ ranAt: input.ranAt,
1295
+ harness,
1296
+ categories: input.categories,
1297
+ grade: input.grade.grade,
1298
+ gradeRationale: input.grade.rationale,
1299
+ disclaimer: input.disclaimer ?? DISCLAIMER
1300
+ };
1301
+ }
1302
+
1303
+ // ../probes/src/harness.ts
1304
+ async function runLitmus(target, opts = {}) {
1305
+ const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
1306
+ const ranAt = (/* @__PURE__ */ new Date()).toISOString();
1307
+ const dockerAvailable = await checkDocker();
1308
+ const canaries = mintCanaries();
1309
+ const seedEnv = canaryEnv(canaries);
1310
+ const isHttp = typeof target === "string" && /^https?:\/\//i.test(target);
1311
+ const isStdio = !isHttp;
1312
+ if (isolation === "docker" && isStdio && !dockerAvailable) {
1313
+ throw new Error("stdio isolation requires Docker \u2014 refusing to run the target on the host");
1314
+ }
1315
+ const seed = isHttp ? null : seedCanaryDir(canaries);
1316
+ const conn = await connectTarget(target, {
1317
+ seedEnv,
1318
+ seedCwd: seed?.dir,
1319
+ httpHeaders: opts.headers,
1320
+ isolation,
1321
+ ...opts.runLabel ? { runLabel: opts.runLabel } : {}
1322
+ });
1323
+ try {
1324
+ const runProbes = async () => {
1325
+ const listed = await enumerateTools(conn.client);
1326
+ const tools = listed.map((t) => ({
1327
+ name: t.name,
1328
+ description: t.description ?? "",
1329
+ inputSchema: t.inputSchema ?? null
1330
+ }));
1331
+ assertGradableSurface(tools);
1332
+ const { fingerprint, canonical } = fingerprintToolDefs(tools);
1333
+ const annotated = listed.map((t) => ({
1334
+ name: t.name,
1335
+ description: t.description ?? "",
1336
+ annotations: t.annotations
1337
+ }));
1338
+ const stateChangingTools = stateChangingToolNames(annotated);
1339
+ const ctx = {
1340
+ client: conn.client,
1341
+ tools,
1342
+ canaries: canaries.all,
1343
+ dockerAvailable,
1344
+ stateChangingTools,
1345
+ allowStateChanging: opts.allowStateChanging ?? false
1346
+ };
1347
+ const egress = dockerAvailable && typeof target === "string" && !/^https?:\/\//i.test(target) ? await runEgressProbe(target, { canaryEnv: seedEnv, ...opts.runLabel ? { runLabel: opts.runLabel } : {} }) : {
1348
+ ran: false,
1349
+ reason: dockerAvailable ? "egress not run for this target" : "no sandbox (Docker unavailable)",
1350
+ attempts: []
1351
+ };
1352
+ assertEgressRanUnderIsolation(egress, isolation, isStdio);
1353
+ const categories = [
1354
+ await c01Injection(ctx),
1355
+ c02Permission(probe21Declaration(annotated), egress),
1356
+ await c03Sensitive(ctx, egress)
1357
+ ];
1358
+ const grade = gradeFromCategories(categories);
1359
+ return assembleBundle({
1360
+ serverRef: conn.serverRef,
1361
+ resolvedVersion: conn.resolvedVersion,
1362
+ target: conn.descriptor,
1363
+ toolDefsFingerprint: fingerprint,
1364
+ toolDefs: canonical,
1365
+ categories,
1366
+ grade,
1367
+ ranAt,
1368
+ dockerAvailable,
1369
+ // Record how a stdio target was executed; omit for http.
1370
+ ...isStdio ? { stdioIsolation: isolation } : {},
1371
+ ...opts.disclaimer ? { disclaimer: opts.disclaimer } : {}
1372
+ });
1373
+ };
1374
+ return opts.timeoutMs !== void 0 ? await withTimeout(runProbes(), opts.timeoutMs, `litmus run exceeded ${opts.timeoutMs}ms`) : await runProbes();
1375
+ } finally {
1376
+ await conn.teardown();
1377
+ seed?.cleanup();
1378
+ }
1379
+ }
1380
+ function assertEgressRanUnderIsolation(egress, isolation, isStdio) {
1381
+ if (isolation === "docker" && isStdio && !egress.ran) {
1382
+ throw new Error(
1383
+ `stdio isolation failed: the egress sandbox did not run (${egress.reason ?? "unknown reason"}) \u2014 refusing to grade without isolation`
1384
+ );
1385
+ }
1386
+ }
1387
+ var LIST_TIMEOUT_MS = 3e4;
1388
+ var MAX_TOOLS = 4096;
1389
+ var MAX_SURFACE_BYTES = 8 * 1024 * 1024;
1390
+ function assertGradableSurface(tools) {
1391
+ if (tools.length > MAX_TOOLS) {
1392
+ throw new Error(`tool surface too large to grade: ${tools.length} tools (max ${MAX_TOOLS})`);
1393
+ }
1394
+ let bytes = 0;
1395
+ for (const t of tools) {
1396
+ bytes += (t.name?.length ?? 0) + (t.description?.length ?? 0);
1397
+ if (bytes > MAX_SURFACE_BYTES) {
1398
+ throw new Error(`tool surface exceeds ${MAX_SURFACE_BYTES} bytes \u2014 refusing to grade`);
1399
+ }
1400
+ }
1401
+ }
1402
+ async function enumerateTools(client, opts = {}) {
1403
+ const maxTools = opts.maxTools ?? MAX_TOOLS;
1404
+ const maxBytes = opts.maxBytes ?? MAX_SURFACE_BYTES;
1405
+ const listTimeoutMs = opts.listTimeoutMs ?? LIST_TIMEOUT_MS;
1406
+ const all = [];
1407
+ let bytes = 0;
1408
+ let cursor;
1409
+ for (; ; ) {
1410
+ const page = await withTimeout(
1411
+ client.listTools(cursor !== void 0 ? { cursor } : void 0),
1412
+ listTimeoutMs,
1413
+ "listTools timed out"
1414
+ );
1415
+ for (const t of page.tools ?? []) {
1416
+ all.push(t);
1417
+ bytes += (t.name?.length ?? 0) + (t.description?.length ?? 0);
1418
+ }
1419
+ cursor = page.nextCursor;
1420
+ if (cursor === void 0) break;
1421
+ if (all.length > maxTools || bytes > maxBytes) {
1422
+ throw new Error(
1423
+ `tool surface still paginating past the gradable cap (>${maxTools} tools / >${maxBytes} bytes) \u2014 refusing to grade a partial surface`
1424
+ );
1425
+ }
1426
+ }
1427
+ return all;
1428
+ }
1429
+ function withTimeout(p, ms, label) {
1430
+ return Promise.race([
1431
+ p,
1432
+ new Promise((_, reject) => {
1433
+ const t = setTimeout(() => reject(new Error(label)), ms);
1434
+ t.unref?.();
1435
+ })
1436
+ ]);
1437
+ }
1438
+ function checkDocker() {
1439
+ return new Promise((resolve) => {
1440
+ const child = execFile2("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
1441
+ child.on("error", () => resolve(false));
1442
+ });
1443
+ }
1444
+
1445
+ export {
1446
+ connectTarget,
1447
+ fingerprintToolDefs,
1448
+ classifyTool,
1449
+ stateChangingToolNames,
1450
+ invisibleUnicode,
1451
+ instructionMimicry,
1452
+ markdownTricks,
1453
+ canaryMatch,
1454
+ hasHighSeverity,
1455
+ gradeFromCategories,
1456
+ assembleBundle,
1457
+ runLitmus
1458
+ };