@polygraphso/litmus 0.12.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # @polygraphso/litmus
2
2
 
3
+ [![polygraph](https://polygraph.so/api/badge?server=npm/@polygraphso/litmus)](https://polygraph.so/mcp/npm/@polygraphso/litmus)
4
+
3
5
  The behavioral **litmus** harness for MCP servers, from [polygraph.so](https://polygraph.so).
4
6
 
5
7
  It connects to an MCP server the way an agent would, fingerprints its exact tool
@@ -52,6 +54,19 @@ your host: set `LITMUS_STDIO_ISOLATION=docker` to run the target only inside the
52
54
  hardened sandbox, or pass `--unsafe-host-exec` to accept host execution. Remote
53
55
  `https://` targets run no local code and need neither.
54
56
 
57
+ **Token-gated servers.** If a target is a token-gated `https://` server and you pass no
58
+ `--bearer` / `--header` / `LITMUS_BEARER`, litmus — on the auth failure — looks for a token you
59
+ already configured for that server (matched by URL in your MCP client config: project
60
+ `.mcp.json` / `.cursor/mcp.json` / `.vscode/mcp.json`, or your Claude Code / Claude Desktop /
61
+ Cursor config) and offers to reuse it. It is read-only, asks before sending, sends only to the
62
+ target origin, and never prints the token. In non-interactive use, pass `--use-discovered-auth`
63
+ to opt in without a prompt.
64
+
65
+ If the server uses **OAuth** (no static token to reuse), litmus opens your browser to authorize,
66
+ captures the token via a single-use `127.0.0.1` callback, and grades with it — used for that run
67
+ only, never stored. This happens automatically on an interactive terminal; use `--oauth` /
68
+ `--no-oauth` to force or skip it. From the `run_litmus` MCP tool, set `interactive_auth: true`.
69
+
55
70
  The `litmus` command exits non-zero on a failing grade (D/F), so it scripts in CI.
56
71
 
57
72
  To dispute a published grade, just re-run `litmus` against the same server: the harness is
@@ -3,7 +3,7 @@ import {
3
3
  METHODOLOGY_VERSION,
4
4
  parseServerRef,
5
5
  serverKey
6
- } from "./chunk-CKQZFK77.js";
6
+ } from "./chunk-IXX5YEBC.js";
7
7
 
8
8
  // ../probes/src/harness.ts
9
9
  import { execFile as execFile3 } from "child_process";
@@ -956,7 +956,25 @@ var STATE_CHANGING_VERBS = /* @__PURE__ */ new Set([
956
956
  "rename",
957
957
  "purchase",
958
958
  "checkout",
959
- "order"
959
+ "order",
960
+ "submit",
961
+ "confirm",
962
+ "finalize",
963
+ "cancel",
964
+ "publish",
965
+ "share",
966
+ "invite",
967
+ "book",
968
+ "schedule",
969
+ "subscribe",
970
+ "unsubscribe",
971
+ "register",
972
+ "upload",
973
+ "email",
974
+ "enable",
975
+ "disable",
976
+ "archive",
977
+ "restore"
960
978
  ]);
961
979
  var UNAMBIGUOUS_DESTRUCTIVE_VERBS = /* @__PURE__ */ new Set([
962
980
  "delete",
@@ -1034,6 +1052,13 @@ function stateChangingToolNames(tools) {
1034
1052
  }
1035
1053
  return names;
1036
1054
  }
1055
+ function unsafeToExerciseToolNames(tools) {
1056
+ const names = /* @__PURE__ */ new Set();
1057
+ for (const t of tools) {
1058
+ if (classifyTool(t).stateChanging || declarationMismatchV2(t) !== null) names.add(t.name);
1059
+ }
1060
+ return names;
1061
+ }
1037
1062
  function skippedNote(skipped) {
1038
1063
  return `${skipped.length} tool(s) skipped (state-changing; pass --allow-state-changing): ${skipped.join(", ")}`;
1039
1064
  }
@@ -2074,7 +2099,7 @@ async function runLitmus(target, opts = {}) {
2074
2099
  inputSchema: t.inputSchema ?? null,
2075
2100
  annotations: t.annotations
2076
2101
  }));
2077
- const stateChangingTools = stateChangingToolNames(annotated);
2102
+ const stateChangingTools = unsafeToExerciseToolNames(annotated);
2078
2103
  const ctx = {
2079
2104
  client: conn.client,
2080
2105
  tools,
@@ -2639,6 +2664,7 @@ export {
2639
2664
  fingerprintToolDefs,
2640
2665
  classifyTool,
2641
2666
  stateChangingToolNames,
2667
+ unsafeToExerciseToolNames,
2642
2668
  invisibleUnicode,
2643
2669
  instructionMimicry,
2644
2670
  markdownTricks,
@@ -1,9 +1,11 @@
1
1
  import {
2
2
  DEFAULT_RUN_TIMEOUT_MS,
3
+ acquireOAuthToken,
3
4
  checkHostExec,
5
+ isAuthError,
4
6
  parseAuthFlags,
5
7
  resolveTarget
6
- } from "./chunk-TTGWSGPC.js";
8
+ } from "./chunk-FFE6ZQPL.js";
7
9
  import {
8
10
  SKILL_CATEGORY_META,
9
11
  SKILL_METHODOLOGY_VERSION,
@@ -11,7 +13,7 @@ import {
11
13
  runSkillLitmus,
12
14
  runSkillQuality,
13
15
  runSkillQualityJudged
14
- } from "./chunk-OGOFUBLN.js";
16
+ } from "./chunk-7HI2KPXH.js";
15
17
  import {
16
18
  CATEGORY_META,
17
19
  CATEGORY_STATUS_UINT8,
@@ -20,7 +22,7 @@ import {
20
22
  parseSkillRef,
21
23
  serverKey,
22
24
  skillKey
23
- } from "./chunk-CKQZFK77.js";
25
+ } from "./chunk-IXX5YEBC.js";
24
26
 
25
27
  // ../onchain/src/networks.ts
26
28
  var NETWORKS = {
@@ -297,18 +299,20 @@ var RUN_LITMUS_TOOL_DESCRIPTION = [
297
299
  "",
298
300
  "server_ref examples: npm/@modelcontextprotocol/server-filesystem \xB7",
299
301
  "https://example.com/mcp \xB7 ./build/index.js. For a token-gated https:// target,",
300
- "pass `bearer`. If Docker is unavailable, C-02 is skipped and the grade is capped",
301
- "at B for that run."
302
+ "pass `bearer`; for an OAuth-gated one, set `interactive_auth: true` to open a",
303
+ "browser and authorize. If Docker is unavailable, C-02 is skipped and the grade is",
304
+ "capped at B for that run."
302
305
  ].join("\n");
303
306
  var runLitmusInputShape = {
304
307
  server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
305
308
  bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
306
309
  header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.'),
307
310
  unsafe_host_exec: z.boolean().optional().describe("Required to grade a registry ref or local path: it launches the target's own code, and without Docker isolation that runs on THIS host. Set true to accept host execution. Ignored for https:// targets or when LITMUS_STDIO_ISOLATION=docker."),
308
- timeout_seconds: z.number().int().positive().max(3600).optional().describe("Aggregate wall-clock ceiling for the whole run, in seconds (default 900). Bounds a hostile server that stretches the run across many tools/probes.")
311
+ timeout_seconds: z.number().int().positive().max(3600).optional().describe("Aggregate wall-clock ceiling for the whole run, in seconds (default 900). Bounds a hostile server that stretches the run across many tools/probes."),
312
+ interactive_auth: z.boolean().optional().describe("If a token-gated https:// target uses OAuth, open a browser on THIS machine to authorize and grade with the obtained token (used for this run only, never stored). Default false: without it, an OAuth-gated target returns guidance instead of opening a browser. Ignored for stdio/local targets or when a bearer/header is supplied.")
309
313
  };
310
314
  var PROGRESS_TOTAL = 5;
311
- async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }, extra) {
315
+ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds, interactive_auth }, extra) {
312
316
  try {
313
317
  const argv = [
314
318
  ...bearer ? ["--bearer", bearer] : [],
@@ -331,11 +335,39 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
331
335
  params: { progressToken, progress, total: PROGRESS_TOTAL, message }
332
336
  }) : void 0;
333
337
  sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
334
- const bundle = await runLitmus(input, {
335
- ...Object.keys(headers).length ? { headers } : {},
338
+ const runOpts = {
336
339
  timeoutMs: timeout_seconds ? timeout_seconds * 1e3 : DEFAULT_RUN_TIMEOUT_MS,
337
340
  ...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
338
- });
341
+ };
342
+ const isHttp = typeof input === "string" && /^https?:\/\//i.test(input);
343
+ const hasExplicitAuth = Object.keys(headers).length > 0;
344
+ let bundle;
345
+ try {
346
+ bundle = await runLitmus(input, { ...hasExplicitAuth ? { headers } : {}, ...runOpts });
347
+ } catch (err) {
348
+ if (!(isHttp && !hasExplicitAuth && isAuthError(err))) throw err;
349
+ if (!interactive_auth) {
350
+ return {
351
+ content: [
352
+ {
353
+ type: "text",
354
+ text: `${server_ref} is token-gated and appears to use OAuth. Re-run with "interactive_auth": true \u2014 a browser window will open on this machine to log in \u2014 or grade it from the \`polygraphso litmus\` CLI.`
355
+ }
356
+ ]
357
+ };
358
+ }
359
+ sendProgress?.(0, "Opening your browser to authorize\u2026");
360
+ const token = await acquireOAuthToken(input, {
361
+ onAuthUrl: (u) => sendProgress?.(0, `Authorize in your browser: ${u}`)
362
+ });
363
+ if (!token) {
364
+ return {
365
+ isError: true,
366
+ content: [{ type: "text", text: `run_litmus failed: could not obtain an OAuth token for ${server_ref} (declined, timed out, or not an OAuth server).` }]
367
+ };
368
+ }
369
+ bundle = await runLitmus(input, { headers: { Authorization: `Bearer ${token}` }, ...runOpts });
370
+ }
339
371
  const payload = summarize(bundle);
340
372
  return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
341
373
  } catch (err) {
@@ -0,0 +1,524 @@
1
+ import {
2
+ CATEGORY_META,
3
+ canonicalStringify
4
+ } from "./chunk-IXX5YEBC.js";
5
+
6
+ // ../cli/src/litmus.ts
7
+ import { existsSync as existsSync2 } from "fs";
8
+ import { createRequire } from "module";
9
+ import * as path2 from "path";
10
+
11
+ // ../cli/src/format.ts
12
+ function formatBundle(b) {
13
+ const lines = [];
14
+ lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
15
+ if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
16
+ if (b.selfReportedVersion) lines.push(`\u2192 self-reported ${b.selfReportedVersion} (unverified)`);
17
+ lines.push("\u2192 checks");
18
+ const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
19
+ for (const c of b.categories) {
20
+ const { label, description } = CATEGORY_META[c.code];
21
+ lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}`);
22
+ lines.push(` ${description}`);
23
+ }
24
+ const c01 = b.categories.find((c) => c.code === "C-01");
25
+ if (c01?.status === "fail") {
26
+ const highs = c01.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high");
27
+ for (const f of highs.slice(0, 3)) {
28
+ lines.push(` \u26A0 ${f.tool ?? "?"}: ${f.kind} \u2014 ${truncate(f.match, 64)}`);
29
+ }
30
+ }
31
+ lines.push(`\u2192 fingerprint ${shortFp(b.toolDefsFingerprint)}`);
32
+ lines.push(`\u2192 grade: ${b.grade}`);
33
+ lines.push(` ${b.gradeRationale}`);
34
+ return lines.join("\n") + "\n";
35
+ }
36
+ function shortFp(fp) {
37
+ return fp.length > 14 ? `${fp.slice(0, 6)}\u2026${fp.slice(-4)}` : fp;
38
+ }
39
+ function truncate(s, n) {
40
+ return s.length > n ? `${s.slice(0, n)}\u2026` : s;
41
+ }
42
+
43
+ // ../cli/src/mcp-config.ts
44
+ import { existsSync, readFileSync } from "fs";
45
+ import { homedir } from "os";
46
+ import * as path from "path";
47
+ function normalizeUrl(u) {
48
+ try {
49
+ const url = new URL(u);
50
+ let pathname = url.pathname;
51
+ if (pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
52
+ return `${url.protocol.toLowerCase()}//${url.host.toLowerCase()}${pathname}`;
53
+ } catch {
54
+ return u;
55
+ }
56
+ }
57
+ function resolveEnvPlaceholders(value, env) {
58
+ return value.replace(/\$\{(?:env:)?([A-Za-z_][A-Za-z0-9_]*)\}/g, (_m, name) => env[name] ?? "");
59
+ }
60
+ function collectServerEntries(config) {
61
+ const out = [];
62
+ if (!config || typeof config !== "object") return out;
63
+ const c = config;
64
+ for (const key of ["mcpServers", "servers"]) {
65
+ const map = c[key];
66
+ if (map && typeof map === "object") out.push(...Object.values(map));
67
+ }
68
+ const projects = c.projects;
69
+ if (projects && typeof projects === "object") {
70
+ for (const proj of Object.values(projects)) out.push(...collectServerEntries(proj));
71
+ }
72
+ return out;
73
+ }
74
+ function extractMatchingHeaders(config, targetUrl, env) {
75
+ const target = normalizeUrl(targetUrl);
76
+ for (const entry of collectServerEntries(config)) {
77
+ if (typeof entry.url !== "string" || normalizeUrl(entry.url) !== target) continue;
78
+ if (!entry.headers || typeof entry.headers !== "object") continue;
79
+ const headers = {};
80
+ for (const [k, v] of Object.entries(entry.headers)) {
81
+ if (typeof v === "string") headers[k] = resolveEnvPlaceholders(v, env);
82
+ }
83
+ if (Object.keys(headers).length > 0) return headers;
84
+ }
85
+ return null;
86
+ }
87
+ function candidateConfigPaths(cwd, home) {
88
+ return [
89
+ path.join(cwd, ".mcp.json"),
90
+ path.join(cwd, ".cursor", "mcp.json"),
91
+ path.join(cwd, ".vscode", "mcp.json"),
92
+ path.join(home, ".claude.json"),
93
+ path.join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json"),
94
+ path.join(home, ".cursor", "mcp.json")
95
+ ];
96
+ }
97
+ function resolveHeadersFromClientConfig(targetUrl, opts = {}) {
98
+ const cwd = opts.cwd ?? process.cwd();
99
+ const home = opts.home ?? homedir();
100
+ const env = opts.env ?? process.env;
101
+ const read = opts.readFile ?? ((p) => existsSync(p) ? safeRead(p) : null);
102
+ for (const file of candidateConfigPaths(cwd, home)) {
103
+ const raw = read(file);
104
+ if (!raw) continue;
105
+ let parsed;
106
+ try {
107
+ parsed = JSON.parse(raw);
108
+ } catch {
109
+ continue;
110
+ }
111
+ const headers = extractMatchingHeaders(parsed, targetUrl, env);
112
+ if (headers) return { headers, source: file };
113
+ }
114
+ return null;
115
+ }
116
+ function isAuthError(err) {
117
+ const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
118
+ return /\b40[13]\b/.test(msg) || msg.includes("unauthor") || msg.includes("forbidden") || msg.includes("invalid_token") || msg.includes("invalid token") || msg.includes("www-authenticate") || msg.includes("no authorization");
119
+ }
120
+ function safeRead(p) {
121
+ try {
122
+ return readFileSync(p, "utf8");
123
+ } catch {
124
+ return null;
125
+ }
126
+ }
127
+
128
+ // ../cli/src/oauth.ts
129
+ import { createServer } from "http";
130
+ import { execFile } from "child_process";
131
+ import { randomUUID } from "crypto";
132
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
133
+ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
134
+ import { UnauthorizedError } from "@modelcontextprotocol/sdk/client/auth.js";
135
+ var CALLBACK_PATH = "/callback";
136
+ var DEFAULT_TIMEOUT_MS = 3 * 60 * 1e3;
137
+ var CLIENT_NAME = "polygraph-litmus";
138
+ var SUCCESS_HTML = '<!doctype html><meta charset="utf-8"><title>polygraph</title><body style="font-family:system-ui;padding:2rem;max-width:32rem"><h3>Authorization received</h3><p>You can close this tab and return to the terminal.</p></body>';
139
+ var LoopbackOAuthProvider = class {
140
+ constructor(_redirectUrl, _onRedirect, _clientName = CLIENT_NAME) {
141
+ this._redirectUrl = _redirectUrl;
142
+ this._onRedirect = _onRedirect;
143
+ this._clientName = _clientName;
144
+ }
145
+ _redirectUrl;
146
+ _onRedirect;
147
+ _clientName;
148
+ /** CSRF state, generated once; validated against the callback's `state`. */
149
+ issuedState = randomUUID();
150
+ _clientInfo;
151
+ _codeVerifier;
152
+ _tokens;
153
+ get redirectUrl() {
154
+ return this._redirectUrl;
155
+ }
156
+ get clientMetadata() {
157
+ return {
158
+ client_name: this._clientName,
159
+ redirect_uris: [this._redirectUrl],
160
+ token_endpoint_auth_method: "none",
161
+ grant_types: ["authorization_code", "refresh_token"],
162
+ response_types: ["code"]
163
+ };
164
+ }
165
+ state() {
166
+ return this.issuedState;
167
+ }
168
+ clientInformation() {
169
+ return this._clientInfo;
170
+ }
171
+ saveClientInformation(info) {
172
+ this._clientInfo = info;
173
+ }
174
+ tokens() {
175
+ return this._tokens;
176
+ }
177
+ saveTokens(tokens) {
178
+ this._tokens = tokens;
179
+ }
180
+ saveCodeVerifier(verifier) {
181
+ this._codeVerifier = verifier;
182
+ }
183
+ codeVerifier() {
184
+ if (!this._codeVerifier) throw new Error("PKCE code verifier missing");
185
+ return this._codeVerifier;
186
+ }
187
+ redirectToAuthorization(authorizationUrl) {
188
+ return this._onRedirect(authorizationUrl);
189
+ }
190
+ };
191
+ function parseCallbackParams(reqUrl) {
192
+ let u;
193
+ try {
194
+ u = new URL(reqUrl, "http://127.0.0.1");
195
+ } catch {
196
+ return null;
197
+ }
198
+ if (u.pathname !== CALLBACK_PATH) return null;
199
+ const code = u.searchParams.get("code");
200
+ if (!code) return null;
201
+ return { code, state: u.searchParams.get("state") };
202
+ }
203
+ function startCallbackServer() {
204
+ return new Promise((resolve2) => {
205
+ let pending;
206
+ let deliver = null;
207
+ let timer = null;
208
+ const server = createServer((req, res) => {
209
+ const parsed = req.url ? parseCallbackParams(req.url) : null;
210
+ if (!parsed) {
211
+ res.writeHead(404, { "content-type": "text/plain" });
212
+ res.end("not found");
213
+ return;
214
+ }
215
+ res.writeHead(200, { "content-type": "text/html" });
216
+ res.end(SUCCESS_HTML);
217
+ if (deliver) deliver(parsed);
218
+ else pending = parsed;
219
+ });
220
+ server.listen(0, "127.0.0.1", () => {
221
+ const addr = server.address();
222
+ const port = typeof addr === "object" && addr ? addr.port : 0;
223
+ resolve2({
224
+ redirectUrl: `http://127.0.0.1:${port}${CALLBACK_PATH}`,
225
+ waitForCode(timeoutMs) {
226
+ if (pending !== void 0) {
227
+ const r = pending;
228
+ pending = void 0;
229
+ return Promise.resolve(r);
230
+ }
231
+ return new Promise((res2) => {
232
+ timer = setTimeout(() => {
233
+ deliver = null;
234
+ timer = null;
235
+ res2(null);
236
+ }, timeoutMs);
237
+ deliver = (r) => {
238
+ if (timer) clearTimeout(timer);
239
+ timer = null;
240
+ res2(r);
241
+ };
242
+ });
243
+ },
244
+ close() {
245
+ if (timer) clearTimeout(timer);
246
+ timer = null;
247
+ deliver = null;
248
+ server.close();
249
+ }
250
+ });
251
+ });
252
+ });
253
+ }
254
+ function defaultOpenBrowser(url) {
255
+ const [cmd, args] = process.platform === "darwin" ? ["open", [url]] : process.platform === "win32" ? ["cmd", ["/c", "start", "", url]] : ["xdg-open", [url]];
256
+ execFile(cmd, args, () => {
257
+ });
258
+ }
259
+ async function acquireOAuthToken(targetUrl, opts = {}) {
260
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
261
+ const openBrowser = opts.openBrowser ?? defaultOpenBrowser;
262
+ const server = await startCallbackServer();
263
+ const provider = new LoopbackOAuthProvider(
264
+ server.redirectUrl,
265
+ async (url) => {
266
+ opts.onAuthUrl?.(url.toString());
267
+ await openBrowser(url.toString());
268
+ },
269
+ opts.clientName
270
+ );
271
+ const transport = new StreamableHTTPClientTransport(new URL(targetUrl), { authProvider: provider });
272
+ const client = new Client({ name: CLIENT_NAME, version: "0.0.0" }, {});
273
+ try {
274
+ try {
275
+ await client.connect(transport);
276
+ return (await provider.tokens())?.access_token ?? null;
277
+ } catch (err) {
278
+ if (!(err instanceof UnauthorizedError)) return null;
279
+ }
280
+ const cb = await server.waitForCode(timeoutMs);
281
+ if (!cb || cb.state !== provider.issuedState) return null;
282
+ await transport.finishAuth(cb.code);
283
+ return (await provider.tokens())?.access_token ?? null;
284
+ } catch {
285
+ return null;
286
+ } finally {
287
+ server.close();
288
+ await transport.close().catch(() => {
289
+ });
290
+ await client.close().catch(() => {
291
+ });
292
+ }
293
+ }
294
+
295
+ // ../cli/src/litmus.ts
296
+ var DEFAULT_RUN_TIMEOUT_MS = 15 * 60 * 1e3;
297
+ async function runLitmusCli(args) {
298
+ const json = args.includes("--json");
299
+ const useDiscoveredAuth = args.includes("--use-discovered-auth");
300
+ const oauthFlag = args.includes("--oauth");
301
+ const noOauth = args.includes("--no-oauth");
302
+ const { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals } = parseAuthFlags(args);
303
+ const target = positionals[0];
304
+ if (!target) {
305
+ process.stderr.write(
306
+ 'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] [--unsafe-host-exec] [--use-discovered-auth] [--oauth | --no-oauth] [--timeout <seconds>] <registry-ref | https-url | path-to-mcp>\n'
307
+ );
308
+ return 2;
309
+ }
310
+ const input = resolveTarget(target);
311
+ const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
312
+ const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
313
+ const probes = await import("./src-I63MJGJE.js");
314
+ const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
315
+ const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
316
+ if (decision.action === "refuse") {
317
+ process.stderr.write(`\u2192 litmus: ${decision.refuse}
318
+ `);
319
+ return 2;
320
+ }
321
+ if (decision.action === "confirm" && !await promptYesNo(decision.prompt, decision.defaultYes)) {
322
+ process.stderr.write("\u2192 litmus: cancelled.\n");
323
+ return 2;
324
+ }
325
+ const isolation = decision.isolation;
326
+ if (decision.warn) process.stderr.write(`\u2192 ${decision.warn}
327
+ `);
328
+ if (!json) process.stderr.write(`\u2192 running litmus against ${target} \u2026 (~20\u201360s)
329
+ `);
330
+ const onProgress = (done, total, label) => {
331
+ if (!json) process.stderr.write(` \u2192 [${done}/${total}] ${label}
332
+ `);
333
+ };
334
+ const runOnce = async (effectiveHeaders) => {
335
+ const bundle = await probes.runLitmus(input, {
336
+ headers: effectiveHeaders,
337
+ allowStateChanging,
338
+ timeoutMs,
339
+ onProgress,
340
+ ...isolation ? { isolation } : {}
341
+ });
342
+ process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
343
+ return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
344
+ };
345
+ try {
346
+ return await runOnce(headers);
347
+ } catch (err) {
348
+ const targetUrl = typeof input === "string" && /^https?:\/\//i.test(input) ? input : null;
349
+ const hasExplicitAuth = Object.keys(headers).length > 0;
350
+ if (targetUrl && !hasExplicitAuth && isAuthError(err)) {
351
+ const found = resolveHeadersFromClientConfig(targetUrl);
352
+ if (found && (interactive || useDiscoveredAuth)) {
353
+ const proceed = useDiscoveredAuth || await promptYesNo(
354
+ `\u2192 Found a token for ${targetUrl} in ${found.source}.
355
+ Grading will make live, authenticated tool calls to that server AS YOU (read-only tools only).
356
+ Use it? [y/N] `,
357
+ false
358
+ );
359
+ if (proceed) {
360
+ try {
361
+ return await runOnce(found.headers);
362
+ } catch (err2) {
363
+ process.stderr.write(`\u2192 litmus failed: ${err2 instanceof Error ? err2.message : String(err2)}
364
+ `);
365
+ return 1;
366
+ }
367
+ }
368
+ } else if (!found) {
369
+ if (interactive && !noOauth || oauthFlag) {
370
+ process.stderr.write(`\u2192 ${targetUrl} is token-gated \u2014 opening your browser to authorize\u2026
371
+ `);
372
+ const token = await acquireOAuthToken(targetUrl, {
373
+ onAuthUrl: (u) => process.stderr.write(` \u2192 if your browser didn't open, visit:
374
+ ${u}
375
+ `)
376
+ });
377
+ if (token) {
378
+ try {
379
+ return await runOnce({ Authorization: `Bearer ${token}` });
380
+ } catch (err2) {
381
+ process.stderr.write(`\u2192 litmus failed: ${err2 instanceof Error ? err2.message : String(err2)}
382
+ `);
383
+ return 1;
384
+ }
385
+ }
386
+ }
387
+ process.stderr.write(
388
+ `\u2192 ${targetUrl} is token-gated. litmus connects as a fresh client, so it needs the
389
+ same bearer token your agent already uses for this server. Pass it with
390
+ --bearer <token> or set LITMUS_BEARER.
391
+ `
392
+ );
393
+ return 2;
394
+ }
395
+ }
396
+ process.stderr.write(`\u2192 litmus failed: ${err instanceof Error ? err.message : String(err)}
397
+ `);
398
+ return 1;
399
+ }
400
+ }
401
+ async function promptYesNo(prompt, defaultYes) {
402
+ const { createInterface } = await import("readline/promises");
403
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
404
+ try {
405
+ return isAffirmative(await rl.question(prompt), defaultYes);
406
+ } finally {
407
+ rl.close();
408
+ }
409
+ }
410
+ function parseAuthFlags(args, env = process.env) {
411
+ const headers = {};
412
+ const headerArgs = [];
413
+ let allowStateChanging = false;
414
+ let unsafeHostExec = false;
415
+ let timeoutMs = DEFAULT_RUN_TIMEOUT_MS;
416
+ let bearer = env.LITMUS_BEARER || void 0;
417
+ const positionals = [];
418
+ for (let i = 0; i < args.length; i++) {
419
+ const a = args[i];
420
+ if (a === "--json") continue;
421
+ if (a === "--allow-state-changing") {
422
+ allowStateChanging = true;
423
+ } else if (a === "--unsafe-host-exec") {
424
+ unsafeHostExec = true;
425
+ } else if (a === "--timeout") {
426
+ timeoutMs = timeoutSecondsToMs(args[++i]) ?? timeoutMs;
427
+ } else if (a.startsWith("--timeout=")) {
428
+ timeoutMs = timeoutSecondsToMs(a.slice("--timeout=".length)) ?? timeoutMs;
429
+ } else if (a === "--bearer") {
430
+ bearer = args[++i] ?? bearer;
431
+ } else if (a.startsWith("--bearer=")) {
432
+ bearer = a.slice("--bearer=".length);
433
+ } else if (a === "--header") {
434
+ const v = args[++i];
435
+ if (v) headerArgs.push(v);
436
+ } else if (a.startsWith("--header=")) {
437
+ headerArgs.push(a.slice("--header=".length));
438
+ } else if (a.startsWith("--")) {
439
+ } else {
440
+ positionals.push(a);
441
+ }
442
+ }
443
+ if (bearer) headers["Authorization"] = `Bearer ${bearer}`;
444
+ for (const h of headerArgs) {
445
+ const idx = h.indexOf(":");
446
+ if (idx === -1) continue;
447
+ const key = h.slice(0, idx).trim();
448
+ const value = h.slice(idx + 1).trim();
449
+ if (key) headers[key] = value;
450
+ }
451
+ return { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals };
452
+ }
453
+ function timeoutSecondsToMs(v) {
454
+ if (!v) return void 0;
455
+ const sec = Number(v);
456
+ return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
457
+ }
458
+ function checkHostExec(input, gate) {
459
+ const { optIn, dockerAvailable, interactive, optInHint = "--unsafe-host-exec", env = process.env } = gate;
460
+ const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
461
+ if (!isStdio) return { action: "allow" };
462
+ if (env.LITMUS_STDIO_ISOLATION === "docker") return { action: "allow", isolation: "docker" };
463
+ const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
464
+ const warn = `\u26A0 unsafe host execution \u2014 ${why}.`;
465
+ if (optIn) return { action: "allow", isolation: "none", warn };
466
+ if (interactive) {
467
+ if (dockerAvailable) {
468
+ return {
469
+ action: "confirm",
470
+ isolation: "docker",
471
+ defaultYes: true,
472
+ prompt: "Docker detected \u2014 the target will run sandboxed (recommended). Proceed? [Y/n] "
473
+ };
474
+ }
475
+ return {
476
+ action: "confirm",
477
+ isolation: "none",
478
+ defaultYes: false,
479
+ prompt: `No Docker found \u2014 this would run the target's own code on THIS host, unsandboxed.
480
+ Type "yes" to proceed, or set LITMUS_STDIO_ISOLATION=docker to sandbox: `,
481
+ warn
482
+ };
483
+ }
484
+ return {
485
+ action: "refuse",
486
+ refuse: `refusing host execution \u2014 ${why}.
487
+ \u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
488
+ \u2022 accept the risk: re-run with ${optInHint}`
489
+ };
490
+ }
491
+ function isAffirmative(answer, defaultYes) {
492
+ const a = answer.trim().toLowerCase();
493
+ if (a === "") return defaultYes;
494
+ return a === "y" || a === "yes";
495
+ }
496
+ function resolveTarget(target) {
497
+ if (/^https?:\/\//i.test(target)) return target;
498
+ if (existsSync2(target)) {
499
+ const abs = path2.resolve(target);
500
+ if (abs.endsWith(".ts") || abs.endsWith(".mts") || abs.endsWith(".cts")) {
501
+ return { command: process.execPath, args: [tsxCli(), abs], serverRef: target };
502
+ }
503
+ return { command: process.execPath, args: [abs], serverRef: target };
504
+ }
505
+ return target;
506
+ }
507
+ function tsxCli() {
508
+ const require2 = createRequire(import.meta.url);
509
+ const pkgJsonPath = require2.resolve("tsx/package.json");
510
+ const dir = path2.dirname(pkgJsonPath);
511
+ const bin = require2(pkgJsonPath).bin;
512
+ const rel = typeof bin === "string" ? bin : bin.tsx ?? "./dist/cli.mjs";
513
+ return path2.join(dir, rel);
514
+ }
515
+
516
+ export {
517
+ isAuthError,
518
+ acquireOAuthToken,
519
+ DEFAULT_RUN_TIMEOUT_MS,
520
+ runLitmusCli,
521
+ parseAuthFlags,
522
+ checkHostExec,
523
+ resolveTarget
524
+ };
@@ -1,5 +1,5 @@
1
1
  // ../core/src/types.ts
2
- var METHODOLOGY_VERSION = "litmus-v5";
2
+ var METHODOLOGY_VERSION = "litmus-v6";
3
3
  var BUNDLE_SCHEMA_VERSION = "1.5.0";
4
4
  var CATEGORY_META = {
5
5
  "C-01": { label: "tool-output injection", description: "whether it tries to hijack the caller through tool output" },
package/dist/cli-skill.js CHANGED
@@ -5,8 +5,8 @@ import {
5
5
  runSkillLitmus,
6
6
  runSkillQuality,
7
7
  runSkillQualityJudged
8
- } from "./chunk-OGOFUBLN.js";
9
- import "./chunk-CKQZFK77.js";
8
+ } from "./chunk-7HI2KPXH.js";
9
+ import "./chunk-IXX5YEBC.js";
10
10
 
11
11
  // src/cli-skill.ts
12
12
  import { statSync } from "fs";
package/dist/cli.js CHANGED
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-TTGWSGPC.js";
4
+ } from "./chunk-FFE6ZQPL.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
8
- } from "./chunk-CKQZFK77.js";
8
+ } from "./chunk-IXX5YEBC.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import { readFileSync } from "fs";
package/dist/index.d.ts CHANGED
@@ -26,9 +26,11 @@ type Registry = "npm" | "pypi" | "github";
26
26
  * declared/baseline host is permitted; only egress beyond that union fails — "A"
27
27
  * means "no overreach", not "no network"); v2 added probe 2.1. A pass/fail-
28
28
  * semantics change → version bumps per litmus-test §8. The version is a string
29
- * field on the attestation, so v1–v5 attestations coexist and the agent gate does
30
- * not branch on it. */
31
- declare const METHODOLOGY_VERSION: "litmus-v5";
29
+ * field on the attestation, so v1–v6 attestations coexist and the agent gate does
30
+ * not branch on it. v6 widens the default tool-safety skip set: a tool that claims
31
+ * read-only but evidences mutation is no longer actively exercised, which can
32
+ * change which tools are probed (hence the grade) on such servers. */
33
+ declare const METHODOLOGY_VERSION: "litmus-v6";
32
34
  /** Evidence-bundle format version (owned by onchain-proof-spec §2).
33
35
  * 1.5.0 adds the optional `selfReportedVersion` field (the server's
34
36
  * self-asserted `serverInfo.version`, descriptive metadata only);
@@ -775,6 +777,16 @@ interface ToolSafety {
775
777
  declare function classifyTool(tool: ToolSafetyInput): ToolSafety;
776
778
  /** Names of the tools in a surface that are state-changing (skipped by default). */
777
779
  declare function stateChangingToolNames(tools: readonly ToolSafetyInput[]): Set<string>;
780
+ /**
781
+ * Names of tools that must NOT be actively bait-called by default — the union of
782
+ * (a) tools classified state-changing ({@link classifyTool}) and (b) tools that
783
+ * claim `readOnlyHint:true` but evidence mutation ({@link declarationMismatchV2}).
784
+ * (b) closes the gap where a server gets a destructive tool exercised by lying
785
+ * about it: the lie is still scored (C-02 2.1), and here it also removes the tool
786
+ * from active exercise. `--allow-state-changing` overrides this (it accepts side
787
+ * effects), so the union only gates the default path.
788
+ */
789
+ declare function unsafeToExerciseToolNames(tools: readonly ToolSafetyInput[]): Set<string>;
778
790
 
779
791
  /** What every probe receives: the live client, the tool surface, planted canaries. */
780
792
  interface ProbeContext {
@@ -1026,13 +1038,15 @@ declare const runLitmusInputShape: {
1026
1038
  header: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1027
1039
  unsafe_host_exec: z.ZodOptional<z.ZodBoolean>;
1028
1040
  timeout_seconds: z.ZodOptional<z.ZodNumber>;
1041
+ interactive_auth: z.ZodOptional<z.ZodBoolean>;
1029
1042
  };
1030
- declare function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }: {
1043
+ declare function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds, interactive_auth }: {
1031
1044
  server_ref: string;
1032
1045
  bearer?: string;
1033
1046
  header?: string[];
1034
1047
  unsafe_host_exec?: boolean;
1035
1048
  timeout_seconds?: number;
1049
+ interactive_auth?: boolean;
1036
1050
  }, extra: RequestHandlerExtra<ServerRequest, ServerNotification>): Promise<{
1037
1051
  isError: true;
1038
1052
  content: {
@@ -1114,6 +1128,7 @@ declare function handleVerifySkill({ skill_ref }: {
1114
1128
  * harness locally and print the grade. The heavy harness (`@polygraph/probes`)
1115
1129
  * is loaded lazily so the zero-dep `check`/`list` fast path stays intact.
1116
1130
  */
1131
+
1117
1132
  type StdioCommand = {
1118
1133
  command: string;
1119
1134
  args: string[];
@@ -1143,4 +1158,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
1143
1158
  /** A target is an https URL, a local MCP entry file, or a registry ref. */
1144
1159
  declare function resolveTarget(target: string): string | StdioCommand;
1145
1160
 
1146
- export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_META, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_CATEGORY_META, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, isDockerAvailable, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
1161
+ export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_META, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_CATEGORY_META, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, isDockerAvailable, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, unsafeToExerciseToolNames, verifySkillInputShape };
package/dist/index.js CHANGED
@@ -31,11 +31,11 @@ import {
31
31
  skillAttestationFields,
32
32
  skillSchemaUID,
33
33
  verifySkillInputShape
34
- } from "./chunk-PTWDLGI5.js";
34
+ } from "./chunk-ERMA3J2T.js";
35
35
  import {
36
36
  parseAuthFlags,
37
37
  resolveTarget
38
- } from "./chunk-TTGWSGPC.js";
38
+ } from "./chunk-FFE6ZQPL.js";
39
39
  import {
40
40
  SKILL_BUNDLE_SCHEMA_VERSION,
41
41
  SKILL_CATEGORY_META,
@@ -70,8 +70,9 @@ import {
70
70
  skillInjection,
71
71
  skillInjectionFails,
72
72
  stateChangingToolNames,
73
- stripExamples
74
- } from "./chunk-OGOFUBLN.js";
73
+ stripExamples,
74
+ unsafeToExerciseToolNames
75
+ } from "./chunk-7HI2KPXH.js";
75
76
  import {
76
77
  BUNDLE_SCHEMA_VERSION,
77
78
  CATEGORY_META,
@@ -86,7 +87,7 @@ import {
86
87
  parseSkillRef,
87
88
  serverKey,
88
89
  skillKey
89
- } from "./chunk-CKQZFK77.js";
90
+ } from "./chunk-IXX5YEBC.js";
90
91
 
91
92
  // ../agent/src/gate.ts
92
93
  function sameServer(a, b) {
@@ -216,5 +217,6 @@ export {
216
217
  skillSchemaUID,
217
218
  stateChangingToolNames,
218
219
  stripExamples,
220
+ unsafeToExerciseToolNames,
219
221
  verifySkillInputShape
220
222
  };
package/dist/mcp.js CHANGED
@@ -20,12 +20,12 @@ import {
20
20
  runSkillLitmusInputShape,
21
21
  verifyInputShape,
22
22
  verifySkillInputShape
23
- } from "./chunk-PTWDLGI5.js";
24
- import "./chunk-TTGWSGPC.js";
23
+ } from "./chunk-ERMA3J2T.js";
24
+ import "./chunk-FFE6ZQPL.js";
25
25
  import {
26
26
  judgeFromEnv
27
- } from "./chunk-OGOFUBLN.js";
28
- import "./chunk-CKQZFK77.js";
27
+ } from "./chunk-7HI2KPXH.js";
28
+ import "./chunk-IXX5YEBC.js";
29
29
 
30
30
  // src/mcp.ts
31
31
  import { realpathSync } from "fs";
@@ -32,9 +32,10 @@ import {
32
32
  skillInjection,
33
33
  skillInjectionFails,
34
34
  stateChangingToolNames,
35
- stripExamples
36
- } from "./chunk-OGOFUBLN.js";
37
- import "./chunk-CKQZFK77.js";
35
+ stripExamples,
36
+ unsafeToExerciseToolNames
37
+ } from "./chunk-7HI2KPXH.js";
38
+ import "./chunk-IXX5YEBC.js";
38
39
  export {
39
40
  SKILL_BUNDLE_SCHEMA_VERSION,
40
41
  SKILL_CATEGORY_META,
@@ -69,5 +70,6 @@ export {
69
70
  skillInjection,
70
71
  skillInjectionFails,
71
72
  stateChangingToolNames,
72
- stripExamples
73
+ stripExamples,
74
+ unsafeToExerciseToolNames
73
75
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.12.1",
3
+ "version": "0.13.0",
4
4
  "mcpName": "io.github.polygraphso/litmus",
5
5
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
6
6
  "license": "Apache-2.0",
@@ -67,8 +67,8 @@
67
67
  "@polygraph/probes": "0.0.0",
68
68
  "@polygraph/agent": "0.0.0",
69
69
  "@polygraph/onchain": "0.0.0",
70
- "@polygraph/mcp": "0.0.0",
71
- "@polygraph/cli": "0.0.0"
70
+ "@polygraph/cli": "0.0.0",
71
+ "@polygraph/mcp": "0.0.0"
72
72
  },
73
73
  "publishConfig": {
74
74
  "access": "public"
@@ -1,216 +0,0 @@
1
- import {
2
- CATEGORY_META,
3
- canonicalStringify
4
- } from "./chunk-CKQZFK77.js";
5
-
6
- // ../cli/src/litmus.ts
7
- import { existsSync } from "fs";
8
- import { createRequire } from "module";
9
- import * as path from "path";
10
-
11
- // ../cli/src/format.ts
12
- function formatBundle(b) {
13
- const lines = [];
14
- lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
15
- if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
16
- if (b.selfReportedVersion) lines.push(`\u2192 self-reported ${b.selfReportedVersion} (unverified)`);
17
- lines.push("\u2192 checks");
18
- const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
19
- for (const c of b.categories) {
20
- const { label, description } = CATEGORY_META[c.code];
21
- lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}`);
22
- lines.push(` ${description}`);
23
- }
24
- const c01 = b.categories.find((c) => c.code === "C-01");
25
- if (c01?.status === "fail") {
26
- const highs = c01.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high");
27
- for (const f of highs.slice(0, 3)) {
28
- lines.push(` \u26A0 ${f.tool ?? "?"}: ${f.kind} \u2014 ${truncate(f.match, 64)}`);
29
- }
30
- }
31
- lines.push(`\u2192 fingerprint ${shortFp(b.toolDefsFingerprint)}`);
32
- lines.push(`\u2192 grade: ${b.grade}`);
33
- lines.push(` ${b.gradeRationale}`);
34
- return lines.join("\n") + "\n";
35
- }
36
- function shortFp(fp) {
37
- return fp.length > 14 ? `${fp.slice(0, 6)}\u2026${fp.slice(-4)}` : fp;
38
- }
39
- function truncate(s, n) {
40
- return s.length > n ? `${s.slice(0, n)}\u2026` : s;
41
- }
42
-
43
- // ../cli/src/litmus.ts
44
- var DEFAULT_RUN_TIMEOUT_MS = 15 * 60 * 1e3;
45
- async function runLitmusCli(args) {
46
- const json = args.includes("--json");
47
- const { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals } = parseAuthFlags(args);
48
- const target = positionals[0];
49
- if (!target) {
50
- process.stderr.write(
51
- 'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] [--unsafe-host-exec] [--timeout <seconds>] <registry-ref | https-url | path-to-mcp>\n'
52
- );
53
- return 2;
54
- }
55
- const input = resolveTarget(target);
56
- const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
57
- const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
58
- const probes = await import("./src-ZHTFCKNR.js");
59
- const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
60
- const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
61
- if (decision.action === "refuse") {
62
- process.stderr.write(`\u2192 litmus: ${decision.refuse}
63
- `);
64
- return 2;
65
- }
66
- if (decision.action === "confirm" && !await promptYesNo(decision.prompt, decision.defaultYes)) {
67
- process.stderr.write("\u2192 litmus: cancelled.\n");
68
- return 2;
69
- }
70
- const isolation = decision.isolation;
71
- if (decision.warn) process.stderr.write(`\u2192 ${decision.warn}
72
- `);
73
- if (!json) process.stderr.write(`\u2192 running litmus against ${target} \u2026 (~20\u201360s)
74
- `);
75
- const onProgress = (done, total, label) => {
76
- if (!json) process.stderr.write(` \u2192 [${done}/${total}] ${label}
77
- `);
78
- };
79
- try {
80
- const bundle = await probes.runLitmus(input, {
81
- headers,
82
- allowStateChanging,
83
- timeoutMs,
84
- onProgress,
85
- ...isolation ? { isolation } : {}
86
- });
87
- process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
88
- return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
89
- } catch (err) {
90
- process.stderr.write(`\u2192 litmus failed: ${err instanceof Error ? err.message : String(err)}
91
- `);
92
- return 1;
93
- }
94
- }
95
- async function promptYesNo(prompt, defaultYes) {
96
- const { createInterface } = await import("readline/promises");
97
- const rl = createInterface({ input: process.stdin, output: process.stderr });
98
- try {
99
- return isAffirmative(await rl.question(prompt), defaultYes);
100
- } finally {
101
- rl.close();
102
- }
103
- }
104
- function parseAuthFlags(args, env = process.env) {
105
- const headers = {};
106
- const headerArgs = [];
107
- let allowStateChanging = false;
108
- let unsafeHostExec = false;
109
- let timeoutMs = DEFAULT_RUN_TIMEOUT_MS;
110
- let bearer = env.LITMUS_BEARER || void 0;
111
- const positionals = [];
112
- for (let i = 0; i < args.length; i++) {
113
- const a = args[i];
114
- if (a === "--json") continue;
115
- if (a === "--allow-state-changing") {
116
- allowStateChanging = true;
117
- } else if (a === "--unsafe-host-exec") {
118
- unsafeHostExec = true;
119
- } else if (a === "--timeout") {
120
- timeoutMs = timeoutSecondsToMs(args[++i]) ?? timeoutMs;
121
- } else if (a.startsWith("--timeout=")) {
122
- timeoutMs = timeoutSecondsToMs(a.slice("--timeout=".length)) ?? timeoutMs;
123
- } else if (a === "--bearer") {
124
- bearer = args[++i] ?? bearer;
125
- } else if (a.startsWith("--bearer=")) {
126
- bearer = a.slice("--bearer=".length);
127
- } else if (a === "--header") {
128
- const v = args[++i];
129
- if (v) headerArgs.push(v);
130
- } else if (a.startsWith("--header=")) {
131
- headerArgs.push(a.slice("--header=".length));
132
- } else if (a.startsWith("--")) {
133
- } else {
134
- positionals.push(a);
135
- }
136
- }
137
- if (bearer) headers["Authorization"] = `Bearer ${bearer}`;
138
- for (const h of headerArgs) {
139
- const idx = h.indexOf(":");
140
- if (idx === -1) continue;
141
- const key = h.slice(0, idx).trim();
142
- const value = h.slice(idx + 1).trim();
143
- if (key) headers[key] = value;
144
- }
145
- return { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals };
146
- }
147
- function timeoutSecondsToMs(v) {
148
- if (!v) return void 0;
149
- const sec = Number(v);
150
- return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
151
- }
152
- function checkHostExec(input, gate) {
153
- const { optIn, dockerAvailable, interactive, optInHint = "--unsafe-host-exec", env = process.env } = gate;
154
- const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
155
- if (!isStdio) return { action: "allow" };
156
- if (env.LITMUS_STDIO_ISOLATION === "docker") return { action: "allow", isolation: "docker" };
157
- const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
158
- const warn = `\u26A0 unsafe host execution \u2014 ${why}.`;
159
- if (optIn) return { action: "allow", isolation: "none", warn };
160
- if (interactive) {
161
- if (dockerAvailable) {
162
- return {
163
- action: "confirm",
164
- isolation: "docker",
165
- defaultYes: true,
166
- prompt: "Docker detected \u2014 the target will run sandboxed (recommended). Proceed? [Y/n] "
167
- };
168
- }
169
- return {
170
- action: "confirm",
171
- isolation: "none",
172
- defaultYes: false,
173
- prompt: `No Docker found \u2014 this would run the target's own code on THIS host, unsandboxed.
174
- Type "yes" to proceed, or set LITMUS_STDIO_ISOLATION=docker to sandbox: `,
175
- warn
176
- };
177
- }
178
- return {
179
- action: "refuse",
180
- refuse: `refusing host execution \u2014 ${why}.
181
- \u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
182
- \u2022 accept the risk: re-run with ${optInHint}`
183
- };
184
- }
185
- function isAffirmative(answer, defaultYes) {
186
- const a = answer.trim().toLowerCase();
187
- if (a === "") return defaultYes;
188
- return a === "y" || a === "yes";
189
- }
190
- function resolveTarget(target) {
191
- if (/^https?:\/\//i.test(target)) return target;
192
- if (existsSync(target)) {
193
- const abs = path.resolve(target);
194
- if (abs.endsWith(".ts") || abs.endsWith(".mts") || abs.endsWith(".cts")) {
195
- return { command: process.execPath, args: [tsxCli(), abs], serverRef: target };
196
- }
197
- return { command: process.execPath, args: [abs], serverRef: target };
198
- }
199
- return target;
200
- }
201
- function tsxCli() {
202
- const require2 = createRequire(import.meta.url);
203
- const pkgJsonPath = require2.resolve("tsx/package.json");
204
- const dir = path.dirname(pkgJsonPath);
205
- const bin = require2(pkgJsonPath).bin;
206
- const rel = typeof bin === "string" ? bin : bin.tsx ?? "./dist/cli.mjs";
207
- return path.join(dir, rel);
208
- }
209
-
210
- export {
211
- DEFAULT_RUN_TIMEOUT_MS,
212
- runLitmusCli,
213
- parseAuthFlags,
214
- checkHostExec,
215
- resolveTarget
216
- };