@polygraphso/litmus 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import {
2
+ CATEGORY_META,
2
3
  canonicalStringify
3
- } from "./chunk-44R4ZYOE.js";
4
+ } from "./chunk-X3P26XGS.js";
4
5
 
5
6
  // ../cli/src/litmus.ts
6
7
  import { existsSync } from "fs";
@@ -9,11 +10,16 @@ import * as path from "path";
9
10
 
10
11
  // ../cli/src/format.ts
11
12
  function formatBundle(b) {
12
- const status = (code) => b.categories.find((c) => c.code === code)?.status ?? "?";
13
13
  const lines = [];
14
14
  lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
15
15
  if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
16
- lines.push(`\u2192 C-01 ${status("C-01")} \xB7 C-02 ${status("C-02")} \xB7 C-03 ${status("C-03")} \xB7 C-04 ${status("C-04")}`);
16
+ lines.push("\u2192 checks");
17
+ const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
18
+ for (const c of b.categories) {
19
+ const { label, description } = CATEGORY_META[c.code];
20
+ lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}`);
21
+ lines.push(` ${description}`);
22
+ }
17
23
  const c01 = b.categories.find((c) => c.code === "C-01");
18
24
  if (c01?.status === "fail") {
19
25
  const highs = c01.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high");
@@ -46,17 +52,37 @@ async function runLitmusCli(args) {
46
52
  return 2;
47
53
  }
48
54
  const input = resolveTarget(target);
49
- const guard = checkHostExec(input, unsafeHostExec);
50
- if (!guard.allow) {
51
- process.stderr.write(`\u2192 litmus: ${guard.refuse}
55
+ const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
56
+ const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
57
+ const probes = await import("./src-4L5VHLRF.js");
58
+ const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
59
+ const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
60
+ if (decision.action === "refuse") {
61
+ process.stderr.write(`\u2192 litmus: ${decision.refuse}
52
62
  `);
53
63
  return 2;
54
64
  }
55
- if (guard.warn) process.stderr.write(`\u2192 ${guard.warn}
65
+ if (decision.action === "confirm" && !await promptYesNo(decision.prompt, decision.defaultYes)) {
66
+ process.stderr.write("\u2192 litmus: cancelled.\n");
67
+ return 2;
68
+ }
69
+ const isolation = decision.isolation;
70
+ if (decision.warn) process.stderr.write(`\u2192 ${decision.warn}
56
71
  `);
57
- const { runLitmus } = await import("./src-I6AGG4CJ.js");
72
+ if (!json) process.stderr.write(`\u2192 running litmus against ${target} \u2026 (~20\u201360s)
73
+ `);
74
+ const onProgress = (done, total, label) => {
75
+ if (!json) process.stderr.write(` \u2192 [${done}/${total}] ${label}
76
+ `);
77
+ };
58
78
  try {
59
- const bundle = await runLitmus(input, { headers, allowStateChanging, timeoutMs });
79
+ const bundle = await probes.runLitmus(input, {
80
+ headers,
81
+ allowStateChanging,
82
+ timeoutMs,
83
+ onProgress,
84
+ ...isolation ? { isolation } : {}
85
+ });
60
86
  process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
61
87
  return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
62
88
  } catch (err) {
@@ -65,6 +91,15 @@ async function runLitmusCli(args) {
65
91
  return 1;
66
92
  }
67
93
  }
94
+ async function promptYesNo(prompt, defaultYes) {
95
+ const { createInterface } = await import("readline/promises");
96
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
97
+ try {
98
+ return isAffirmative(await rl.question(prompt), defaultYes);
99
+ } finally {
100
+ rl.close();
101
+ }
102
+ }
68
103
  function parseAuthFlags(args, env = process.env) {
69
104
  const headers = {};
70
105
  const headerArgs = [];
@@ -113,19 +148,44 @@ function timeoutSecondsToMs(v) {
113
148
  const sec = Number(v);
114
149
  return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
115
150
  }
116
- function checkHostExec(input, optIn, optInHint = "--unsafe-host-exec", env = process.env) {
151
+ function checkHostExec(input, gate) {
152
+ const { optIn, dockerAvailable, interactive, optInHint = "--unsafe-host-exec", env = process.env } = gate;
117
153
  const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
118
- const dockerIsolated = env.LITMUS_STDIO_ISOLATION === "docker";
119
- if (!isStdio || dockerIsolated) return { allow: true };
154
+ if (!isStdio) return { action: "allow" };
155
+ if (env.LITMUS_STDIO_ISOLATION === "docker") return { action: "allow", isolation: "docker" };
120
156
  const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
121
- if (optIn) return { allow: true, warn: `\u26A0 unsafe host execution \u2014 ${why}.` };
157
+ const warn = `\u26A0 unsafe host execution \u2014 ${why}.`;
158
+ if (optIn) return { action: "allow", isolation: "none", warn };
159
+ if (interactive) {
160
+ if (dockerAvailable) {
161
+ return {
162
+ action: "confirm",
163
+ isolation: "docker",
164
+ defaultYes: true,
165
+ prompt: "Docker detected \u2014 the target will run sandboxed (recommended). Proceed? [Y/n] "
166
+ };
167
+ }
168
+ return {
169
+ action: "confirm",
170
+ isolation: "none",
171
+ defaultYes: false,
172
+ prompt: `No Docker found \u2014 this would run the target's own code on THIS host, unsandboxed.
173
+ Type "yes" to proceed, or set LITMUS_STDIO_ISOLATION=docker to sandbox: `,
174
+ warn
175
+ };
176
+ }
122
177
  return {
123
- allow: false,
178
+ action: "refuse",
124
179
  refuse: `refusing host execution \u2014 ${why}.
125
180
  \u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
126
181
  \u2022 accept the risk: re-run with ${optInHint}`
127
182
  };
128
183
  }
184
+ function isAffirmative(answer, defaultYes) {
185
+ const a = answer.trim().toLowerCase();
186
+ if (a === "") return defaultYes;
187
+ return a === "y" || a === "yes";
188
+ }
129
189
  function resolveTarget(target) {
130
190
  if (/^https?:\/\//i.test(target)) return target;
131
191
  if (existsSync(target)) {
@@ -3,7 +3,7 @@ import {
3
3
  METHODOLOGY_VERSION,
4
4
  parseServerRef,
5
5
  serverKey
6
- } from "./chunk-44R4ZYOE.js";
6
+ } from "./chunk-X3P26XGS.js";
7
7
 
8
8
  // ../probes/src/harness.ts
9
9
  import { execFile as execFile3 } from "child_process";
@@ -451,6 +451,10 @@ import { execFile as execFile2 } from "child_process";
451
451
  import { promisify } from "util";
452
452
  import { randomUUID as randomUUID3 } from "crypto";
453
453
  var execFileP = promisify(execFile2);
454
+ var TARGET_STDERR = process.env.LITMUS_DEBUG ? "inherit" : "pipe";
455
+ function discardStderr(transport) {
456
+ transport.stderr?.resume?.();
457
+ }
454
458
  var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
455
459
  async function connectTarget(input, opts = {}) {
456
460
  const isolated = opts.isolation === "docker";
@@ -464,6 +468,7 @@ async function connectTarget(input, opts = {}) {
464
468
  command: input.command,
465
469
  args: input.args ?? [],
466
470
  env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
471
+ stderr: TARGET_STDERR,
467
472
  ...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
468
473
  });
469
474
  const cmdline = [input.command, ...input.args ?? []].join(" ");
@@ -497,6 +502,7 @@ async function connectTarget(input, opts = {}) {
497
502
  command: launch.command,
498
503
  args: launch.args,
499
504
  env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
505
+ stderr: TARGET_STDERR,
500
506
  ...opts.seedCwd ? { cwd: opts.seedCwd } : {}
501
507
  });
502
508
  const client = await connectOrThrow(transport);
@@ -518,14 +524,14 @@ async function connectHostNpm(ref, parsed, opts) {
518
524
  const binNames = await fetchNpmBins(spec, parsed.name);
519
525
  if (!binNames || binNames.length === 0) {
520
526
  const args = ["-y", spec];
521
- const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
527
+ const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
522
528
  const client = await connectOrThrow(transport);
523
529
  return makeResult(client, "stdio", { kind: "stdio", command: ["npx", ...args].join(" "), url: null }, serverRefVal, resolvedVersion, []);
524
530
  }
525
531
  const candidates = orderBinCandidates(binNames, parsed.name);
526
532
  const { result } = await probeForMcpBin(ref, candidates, async (bin) => {
527
533
  const args = ["-y", "-p", spec, bin];
528
- const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
534
+ const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
529
535
  const client = await tryConnect(transport);
530
536
  return client ? { client, descriptor: { kind: "stdio", command: ["npx", ...args].join(" "), url: null } } : null;
531
537
  });
@@ -562,8 +568,9 @@ async function connectIsolatedNpm(ref, parsed, opts) {
562
568
  const transport = new StdioClientTransport({
563
569
  command: launch.command,
564
570
  args: namedArgs,
565
- env: getDefaultEnvironment()
571
+ env: getDefaultEnvironment(),
566
572
  // default env only: no host secrets, no canaries
573
+ stderr: TARGET_STDERR
567
574
  });
568
575
  const client = await tryConnect(transport);
569
576
  if (!client) {
@@ -608,6 +615,7 @@ async function tryConnect(transport) {
608
615
  const client = new Client(CLIENT_INFO, { capabilities: {} });
609
616
  try {
610
617
  await withConnectTimeout(client.connect(transport), transport);
618
+ discardStderr(transport);
611
619
  return client;
612
620
  } catch {
613
621
  try {
@@ -620,6 +628,7 @@ async function tryConnect(transport) {
620
628
  async function connectOrThrow(transport) {
621
629
  const client = new Client(CLIENT_INFO, { capabilities: {} });
622
630
  await withConnectTimeout(client.connect(transport), transport);
631
+ discardStderr(transport);
623
632
  return client;
624
633
  }
625
634
  function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardownExtra) {
@@ -2027,7 +2036,7 @@ async function runLitmus(target, opts = {}) {
2027
2036
  const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
2028
2037
  const ranAt = (/* @__PURE__ */ new Date()).toISOString();
2029
2038
  const baselineAllowlist = [...DEFAULT_EGRESS_BASELINE, ...parseAllowlistEnv(process.env.LITMUS_EGRESS_ALLOWLIST)];
2030
- const dockerAvailable = await checkDocker();
2039
+ const dockerAvailable = await isDockerAvailable();
2031
2040
  const canaries = mintCanaries();
2032
2041
  const seedEnv = canaryEnv(canaries);
2033
2042
  const isHttp = typeof target === "string" && /^https?:\/\//i.test(target);
@@ -2169,7 +2178,7 @@ function withTimeout(p, ms, label) {
2169
2178
  })
2170
2179
  ]);
2171
2180
  }
2172
- function checkDocker() {
2181
+ function isDockerAvailable() {
2173
2182
  return new Promise((resolve) => {
2174
2183
  const child = execFile3("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
2175
2184
  child.on("error", () => resolve(false));
@@ -2364,6 +2373,12 @@ function overBroadTrigger(description) {
2364
2373
  }
2365
2374
 
2366
2375
  // ../probes/src/skills/grade-skill.ts
2376
+ var SKILL_CATEGORY_META = {
2377
+ "S-01": { label: "prompt injection / context poisoning", description: "whether the skill body tries to hijack the agent" },
2378
+ "S-03": { label: "data-exfiltration instructions", description: "whether it instructs the agent to leak secrets" },
2379
+ "S-04": { label: "dangerous bundled commands", description: "whether it ships dangerous executable commands" },
2380
+ "S-05": { label: "tool / permission overreach", description: "whether it claims more capability than it needs" }
2381
+ };
2367
2382
  var DISQUALIFYING = /* @__PURE__ */ new Set(["S-01", "S-03"]);
2368
2383
  var CAPPING = /* @__PURE__ */ new Set(["S-04", "S-05"]);
2369
2384
  function gradeSkillCategories(categories) {
@@ -2629,6 +2644,7 @@ export {
2629
2644
  assembleBundle,
2630
2645
  runLitmus,
2631
2646
  enumerateTools,
2647
+ isDockerAvailable,
2632
2648
  SkillLoadError,
2633
2649
  loadSkill,
2634
2650
  stripExamples,
@@ -2637,6 +2653,7 @@ export {
2637
2653
  exfilInstruction,
2638
2654
  dangerousCommand,
2639
2655
  overBroadTrigger,
2656
+ SKILL_CATEGORY_META,
2640
2657
  gradeSkillCategories,
2641
2658
  SKILL_METHODOLOGY_VERSION,
2642
2659
  SKILL_BUNDLE_SCHEMA_VERSION,
@@ -3,22 +3,24 @@ import {
3
3
  checkHostExec,
4
4
  parseAuthFlags,
5
5
  resolveTarget
6
- } from "./chunk-GNPHHS6I.js";
6
+ } from "./chunk-EMMCE3LC.js";
7
7
  import {
8
+ SKILL_CATEGORY_META,
8
9
  SKILL_METHODOLOGY_VERSION,
9
10
  runLitmus,
10
11
  runSkillLitmus,
11
12
  runSkillQuality,
12
13
  runSkillQualityJudged
13
- } from "./chunk-63OICX66.js";
14
+ } from "./chunk-NPYDTMQ7.js";
14
15
  import {
16
+ CATEGORY_META,
15
17
  CATEGORY_STATUS_UINT8,
16
18
  METHODOLOGY_VERSION,
17
19
  parseServerRef,
18
20
  parseSkillRef,
19
21
  serverKey,
20
22
  skillKey
21
- } from "./chunk-44R4ZYOE.js";
23
+ } from "./chunk-X3P26XGS.js";
22
24
 
23
25
  // ../onchain/src/networks.ts
24
26
  var NETWORKS = {
@@ -314,9 +316,14 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
314
316
  ];
315
317
  const { headers } = parseAuthFlags(argv, {});
316
318
  const input = resolveTarget(server_ref);
317
- const guard = checkHostExec(input, unsafe_host_exec ?? false, 'set "unsafe_host_exec": true');
318
- if (!guard.allow) {
319
- return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${guard.refuse}` }] };
319
+ const decision = checkHostExec(input, {
320
+ optIn: unsafe_host_exec ?? false,
321
+ dockerAvailable: false,
322
+ interactive: false,
323
+ optInHint: 'set "unsafe_host_exec": true'
324
+ });
325
+ if (decision.action === "refuse") {
326
+ return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${decision.refuse}` }] };
320
327
  }
321
328
  const progressToken = extra._meta?.progressToken;
322
329
  const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
@@ -336,18 +343,19 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
336
343
  return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
337
344
  }
338
345
  }
339
- var CATEGORY_LABEL = {
340
- "C-01": "tool-output injection",
341
- "C-02": "permission / egress overreach",
342
- "C-03": "sensitive-data handling",
343
- "C-04": "adversarial-input handling"
344
- };
345
346
  function summarize(b) {
346
347
  const find = (code) => b.categories.find((c) => c.code === code);
347
348
  const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
348
349
  const c = find(code);
349
350
  const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
350
- return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
351
+ return {
352
+ code,
353
+ check: CATEGORY_META[code].label,
354
+ description: CATEGORY_META[code].description,
355
+ status: c?.status ?? "unknown",
356
+ reason: c?.reason ?? null,
357
+ findings
358
+ };
351
359
  });
352
360
  return {
353
361
  grade: b.grade,
@@ -417,15 +425,11 @@ async function handleRunSkillLitmus({ skill_ref }, ctx = {}) {
417
425
  function errorResult(message) {
418
426
  return { isError: true, content: [{ type: "text", text: `run_skill_litmus failed: ${message}` }] };
419
427
  }
420
- var CATEGORY_LABEL2 = {
421
- "S-01": "prompt injection / context poisoning",
422
- "S-03": "data-exfiltration instructions",
423
- "S-04": "dangerous bundled commands"
424
- };
425
428
  function summarize2(b) {
426
429
  const categories = b.categories.map((c) => ({
427
430
  code: c.code,
428
- check: CATEGORY_LABEL2[c.code] ?? c.code,
431
+ check: SKILL_CATEGORY_META[c.code]?.label ?? c.code,
432
+ description: SKILL_CATEGORY_META[c.code]?.description ?? null,
429
433
  status: c.status,
430
434
  reason: c.reason ?? null,
431
435
  findings: c.status === "fail" ? c.findings.filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ kind: f.kind, match: truncate2(f.match, 120), file: f.file })) : []
@@ -1,6 +1,12 @@
1
1
  // ../core/src/types.ts
2
2
  var METHODOLOGY_VERSION = "litmus-v5";
3
3
  var BUNDLE_SCHEMA_VERSION = "1.4.0";
4
+ var CATEGORY_META = {
5
+ "C-01": { label: "tool-output injection", description: "whether it tries to hijack the caller through tool output" },
6
+ "C-02": { label: "permission / egress overreach", description: "whether it reaches the network beyond what it declares" },
7
+ "C-03": { label: "sensitive-data handling", description: "whether it leaks planted secrets it was handed" },
8
+ "C-04": { label: "adversarial-input handling", description: "whether it stays stable on malformed or hostile input" }
9
+ };
4
10
  var CATEGORY_STATUS_UINT8 = {
5
11
  pass: 0,
6
12
  fail: 1,
@@ -174,6 +180,7 @@ function sortDeep(value, depth = 0) {
174
180
  export {
175
181
  METHODOLOGY_VERSION,
176
182
  BUNDLE_SCHEMA_VERSION,
183
+ CATEGORY_META,
177
184
  CATEGORY_STATUS_UINT8,
178
185
  ServerRefParseError,
179
186
  parseServerRef,
package/dist/cli-skill.js CHANGED
@@ -1,31 +1,18 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
+ SKILL_CATEGORY_META,
3
4
  judgeFromEnv,
4
5
  runSkillLitmus,
5
6
  runSkillQuality,
6
7
  runSkillQualityJudged
7
- } from "./chunk-63OICX66.js";
8
- import "./chunk-44R4ZYOE.js";
8
+ } from "./chunk-NPYDTMQ7.js";
9
+ import "./chunk-X3P26XGS.js";
9
10
 
10
11
  // src/cli-skill.ts
11
12
  import { statSync } from "fs";
12
- var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
13
-
14
- usage:
15
- polygraphso-litmus-skill [--json] <path-to-skill-dir>
16
- polygraphso-litmus-skill --help
17
-
18
- The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
19
- execution); an A means the static checks were clean, not that the skill is
20
- behaviorally safe.
21
13
 
22
- It also prints a separate, advisory quality signal. The optional LLM-judged
23
- axes (honesty, coherence) run only if you provide your own key \u2014 set
24
- LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
25
- endpoint). Without a key only the deterministic well-formedness checks run.
26
- More at https://polygraph.so
27
- `;
28
- function render(b) {
14
+ // src/format-skill.ts
15
+ function formatSkillSafety(b) {
29
16
  const lines = [
30
17
  `grade: ${b.grade} (${b.methodologyVersion})`,
31
18
  `${b.gradeRationale}`,
@@ -34,8 +21,11 @@ function render(b) {
34
21
  "",
35
22
  "categories:"
36
23
  ];
24
+ const labelWidth = Math.max(0, ...b.categories.map((c) => SKILL_CATEGORY_META[c.code].label.length));
37
25
  for (const c of b.categories) {
38
- lines.push(` ${c.code} ${c.status}${c.reason ? ` (${c.reason})` : ""}`);
26
+ const { label, description } = SKILL_CATEGORY_META[c.code];
27
+ lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}${c.reason ? ` (${c.reason})` : ""}`);
28
+ lines.push(` ${description}`);
39
29
  if (c.status === "fail") {
40
30
  for (const f of c.findings.filter((x) => x.severity === "high").slice(0, 5)) {
41
31
  lines.push(` ! ${f.kind}${f.file ? ` [${f.file}]` : ""}: ${f.match}`);
@@ -51,6 +41,24 @@ function render(b) {
51
41
  lines.push("", b.disclaimer);
52
42
  return lines.join("\n") + "\n";
53
43
  }
44
+
45
+ // src/cli-skill.ts
46
+ var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
47
+
48
+ usage:
49
+ polygraphso-litmus-skill [--json] <path-to-skill-dir>
50
+ polygraphso-litmus-skill --help
51
+
52
+ The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
53
+ execution); an A means the static checks were clean, not that the skill is
54
+ behaviorally safe.
55
+
56
+ It also prints a separate, advisory quality signal. The optional LLM-judged
57
+ axes (honesty, coherence) run only if you provide your own key \u2014 set
58
+ LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
59
+ endpoint). Without a key only the deterministic well-formedness checks run.
60
+ More at https://polygraph.so
61
+ `;
54
62
  function renderQuality(q) {
55
63
  const lines = ["", `quality (advisory, separate from the grade): ${q.verdict}`];
56
64
  for (const c of q.checks) lines.push(` ${c.status === "pass" ? "\xB7" : "!"} ${c.id}: ${c.detail}`);
@@ -87,7 +95,7 @@ async function main(argv) {
87
95
  const judge = judgeFromEnv();
88
96
  const quality = judge ? await runSkillQualityJudged(target, judge, { skillRef: target }) : runSkillQuality(target, { skillRef: target });
89
97
  process.stdout.write(
90
- json ? JSON.stringify({ safety, quality }, null, 2) + "\n" : render(safety) + renderQuality(quality)
98
+ json ? JSON.stringify({ safety, quality }, null, 2) + "\n" : formatSkillSafety(safety) + renderQuality(quality)
91
99
  );
92
100
  return 0;
93
101
  }
package/dist/cli.js CHANGED
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-GNPHHS6I.js";
4
+ } from "./chunk-EMMCE3LC.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
8
- } from "./chunk-44R4ZYOE.js";
8
+ } from "./chunk-X3P26XGS.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import { readFileSync } from "fs";
package/dist/index.d.ts CHANGED
@@ -37,6 +37,15 @@ declare const METHODOLOGY_VERSION: "litmus-v5";
37
37
  * `harness.stdioIsolation`; older remain valid. */
38
38
  declare const BUNDLE_SCHEMA_VERSION: "1.4.0";
39
39
  type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
40
+ /**
41
+ * Plain-English label + one-line description for each probe category, so CLI and
42
+ * MCP output is legible without knowing the probe IDs. The single source of these
43
+ * strings — both renderers and the MCP `run_litmus` summary read from here.
44
+ */
45
+ declare const CATEGORY_META: Record<CategoryCode, {
46
+ label: string;
47
+ description: string;
48
+ }>;
40
49
  /** Probe IDs carry their family number (1=injection, 2=permission,
41
50
  * 3=adversarial-input, 4=sensitive). 1.3 (second-order injection) added in v5. */
42
51
  type ProbeId = "1.1" | "1.2" | "1.3" | "2.1" | "2.2" | "3.1" | "3.2" | "4.1" | "4.2";
@@ -352,6 +361,9 @@ declare function enumerateTools(client: ListToolsClient, opts?: {
352
361
  maxBytes?: number;
353
362
  listTimeoutMs?: number;
354
363
  }): Promise<ListedTool[]>;
364
+ /** True if a Docker daemon is reachable (governs C-02 / probe 4.2, and the CLI's
365
+ * detect-and-confirm sandbox prompt). */
366
+ declare function isDockerAvailable(): Promise<boolean>;
355
367
 
356
368
  /**
357
369
  * Tool-surface fingerprint (litmus-test-v1 §6, technical-design §3).
@@ -470,6 +482,15 @@ declare function hasHighSeverity(findings: readonly Finding[]): boolean;
470
482
  */
471
483
 
472
484
  type SkillCategoryCode = "S-01" | "S-03" | "S-04" | "S-05";
485
+ /**
486
+ * Plain-English label + one-line description for each skill category, so the skill
487
+ * CLI/MCP output is legible without knowing the S-codes. The single source of these
488
+ * strings — both the renderer and the MCP `run_skill_litmus` summary read from here.
489
+ */
490
+ declare const SKILL_CATEGORY_META: Record<SkillCategoryCode, {
491
+ label: string;
492
+ description: string;
493
+ }>;
473
494
  interface SkillCategoryResult {
474
495
  code: SkillCategoryCode;
475
496
  status: CategoryStatus;
@@ -1109,4 +1130,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
1109
1130
  /** A target is an https URL, a local MCP entry file, or a registry ref. */
1110
1131
  declare function resolveTarget(target: string): string | StdioCommand;
1111
1132
 
1112
- export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
1133
+ export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_META, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_CATEGORY_META, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, isDockerAvailable, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
package/dist/index.js CHANGED
@@ -31,13 +31,14 @@ import {
31
31
  skillAttestationFields,
32
32
  skillSchemaUID,
33
33
  verifySkillInputShape
34
- } from "./chunk-VAOQNFW3.js";
34
+ } from "./chunk-TK4EI66E.js";
35
35
  import {
36
36
  parseAuthFlags,
37
37
  resolveTarget
38
- } from "./chunk-GNPHHS6I.js";
38
+ } from "./chunk-EMMCE3LC.js";
39
39
  import {
40
40
  SKILL_BUNDLE_SCHEMA_VERSION,
41
+ SKILL_CATEGORY_META,
41
42
  SKILL_METHODOLOGY_VERSION,
42
43
  SKILL_QUALITY_VERSION,
43
44
  SkillLoadError,
@@ -55,6 +56,7 @@ import {
55
56
  instructionMimicry,
56
57
  internalsLeak,
57
58
  invisibleUnicode,
59
+ isDockerAvailable,
58
60
  judgeFromEnv,
59
61
  judgeSkillQuality,
60
62
  loadSkill,
@@ -69,9 +71,10 @@ import {
69
71
  skillInjectionFails,
70
72
  stateChangingToolNames,
71
73
  stripExamples
72
- } from "./chunk-63OICX66.js";
74
+ } from "./chunk-NPYDTMQ7.js";
73
75
  import {
74
76
  BUNDLE_SCHEMA_VERSION,
77
+ CATEGORY_META,
75
78
  CATEGORY_STATUS_UINT8,
76
79
  METHODOLOGY_VERSION,
77
80
  ServerRefParseError,
@@ -83,7 +86,7 @@ import {
83
86
  parseSkillRef,
84
87
  serverKey,
85
88
  skillKey
86
- } from "./chunk-44R4ZYOE.js";
89
+ } from "./chunk-X3P26XGS.js";
87
90
 
88
91
  // ../agent/src/gate.ts
89
92
  function sameServer(a, b) {
@@ -131,6 +134,7 @@ async function liveFingerprint(target) {
131
134
  }
132
135
  export {
133
136
  BUNDLE_SCHEMA_VERSION,
137
+ CATEGORY_META,
134
138
  CATEGORY_STATUS_UINT8,
135
139
  DEFAULT_PASSING,
136
140
  LITMUS_SCHEMA,
@@ -144,6 +148,7 @@ export {
144
148
  RUN_SKILL_LITMUS_TOOL_NAME,
145
149
  RUN_SKILL_LITMUS_TOOL_TITLE,
146
150
  SKILL_BUNDLE_SCHEMA_VERSION,
151
+ SKILL_CATEGORY_META,
147
152
  SKILL_METHODOLOGY_VERSION,
148
153
  SKILL_QUALITY_VERSION,
149
154
  ServerRefParseError,
@@ -178,6 +183,7 @@ export {
178
183
  instructionMimicry,
179
184
  internalsLeak,
180
185
  invisibleUnicode,
186
+ isDockerAvailable,
181
187
  judgeFromEnv,
182
188
  judgeSkillQuality,
183
189
  litmusFields,
package/dist/mcp.js CHANGED
@@ -20,12 +20,12 @@ import {
20
20
  runSkillLitmusInputShape,
21
21
  verifyInputShape,
22
22
  verifySkillInputShape
23
- } from "./chunk-VAOQNFW3.js";
24
- import "./chunk-GNPHHS6I.js";
23
+ } from "./chunk-TK4EI66E.js";
24
+ import "./chunk-EMMCE3LC.js";
25
25
  import {
26
26
  judgeFromEnv
27
- } from "./chunk-63OICX66.js";
28
- import "./chunk-44R4ZYOE.js";
27
+ } from "./chunk-NPYDTMQ7.js";
28
+ import "./chunk-X3P26XGS.js";
29
29
 
30
30
  // src/mcp.ts
31
31
  import { realpathSync } from "fs";
@@ -1,5 +1,6 @@
1
1
  import {
2
2
  SKILL_BUNDLE_SCHEMA_VERSION,
3
+ SKILL_CATEGORY_META,
3
4
  SKILL_METHODOLOGY_VERSION,
4
5
  SKILL_QUALITY_VERSION,
5
6
  SkillLoadError,
@@ -17,6 +18,7 @@ import {
17
18
  instructionMimicry,
18
19
  internalsLeak,
19
20
  invisibleUnicode,
21
+ isDockerAvailable,
20
22
  judgeFromEnv,
21
23
  judgeSkillQuality,
22
24
  loadSkill,
@@ -31,10 +33,11 @@ import {
31
33
  skillInjectionFails,
32
34
  stateChangingToolNames,
33
35
  stripExamples
34
- } from "./chunk-63OICX66.js";
35
- import "./chunk-44R4ZYOE.js";
36
+ } from "./chunk-NPYDTMQ7.js";
37
+ import "./chunk-X3P26XGS.js";
36
38
  export {
37
39
  SKILL_BUNDLE_SCHEMA_VERSION,
40
+ SKILL_CATEGORY_META,
38
41
  SKILL_METHODOLOGY_VERSION,
39
42
  SKILL_QUALITY_VERSION,
40
43
  SkillLoadError,
@@ -52,6 +55,7 @@ export {
52
55
  instructionMimicry,
53
56
  internalsLeak,
54
57
  invisibleUnicode,
58
+ isDockerAvailable,
55
59
  judgeFromEnv,
56
60
  judgeSkillQuality,
57
61
  loadSkill,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -62,12 +62,12 @@
62
62
  "tsup": "^8.3.0",
63
63
  "typescript": "^5.9.3",
64
64
  "vitest": "^2.1.0",
65
- "@polygraph/onchain": "0.0.0",
66
65
  "@polygraph/core": "0.0.0",
67
- "@polygraph/probes": "0.0.0",
66
+ "@polygraph/onchain": "0.0.0",
68
67
  "@polygraph/agent": "0.0.0",
69
68
  "@polygraph/mcp": "0.0.0",
70
- "@polygraph/cli": "0.0.0"
69
+ "@polygraph/cli": "0.0.0",
70
+ "@polygraph/probes": "0.0.0"
71
71
  },
72
72
  "publishConfig": {
73
73
  "access": "public"