npm - @polygraphso/litmus - Versions diffs - 0.10.0 → 0.12.0 - Mend

@polygraphso/litmus 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/{chunk-44R4ZYOE.js → chunk-CKQZFK77.js} +8 -1
package/dist/{chunk-63OICX66.js → chunk-OGOFUBLN.js} +28 -6
package/dist/{chunk-VAOQNFW3.js → chunk-PTWDLGI5.js} +26 -19
package/dist/{chunk-GNPHHS6I.js → chunk-TTGWSGPC.js} +75 -14
package/dist/cli-skill.js +28 -20
package/dist/cli.js +2 -2
package/dist/index.d.ts +37 -3
package/dist/index.js +10 -4
package/dist/mcp.js +4 -4
package/dist/{src-I6AGG4CJ.js → src-ZHTFCKNR.js} +6 -2
package/package.json +3 -3

package/dist/{chunk-44R4ZYOE.js → chunk-CKQZFK77.js} RENAMED Viewed

@@ -1,6 +1,12 @@
 // ../core/src/types.ts
 var METHODOLOGY_VERSION = "litmus-v5";
-var BUNDLE_SCHEMA_VERSION = "1.4.0";
+var BUNDLE_SCHEMA_VERSION = "1.5.0";
+var CATEGORY_META = {
+  "C-01": { label: "tool-output injection", description: "whether it tries to hijack the caller through tool output" },
+  "C-02": { label: "permission / egress overreach", description: "whether it reaches the network beyond what it declares" },
+  "C-03": { label: "sensitive-data handling", description: "whether it leaks planted secrets it was handed" },
+  "C-04": { label: "adversarial-input handling", description: "whether it stays stable on malformed or hostile input" }
+};
 var CATEGORY_STATUS_UINT8 = {
   pass: 0,
   fail: 1,
@@ -174,6 +180,7 @@ function sortDeep(value, depth = 0) {
 export {
   METHODOLOGY_VERSION,
   BUNDLE_SCHEMA_VERSION,
+  CATEGORY_META,
   CATEGORY_STATUS_UINT8,
   ServerRefParseError,
   parseServerRef,

package/dist/{chunk-63OICX66.js → chunk-OGOFUBLN.js} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
   METHODOLOGY_VERSION,
   parseServerRef,
   serverKey
-} from "./chunk-44R4ZYOE.js";
+} from "./chunk-CKQZFK77.js";
 // ../probes/src/harness.ts
 import { execFile as execFile3 } from "child_process";
@@ -451,6 +451,10 @@ import { execFile as execFile2 } from "child_process";
 import { promisify } from "util";
 import { randomUUID as randomUUID3 } from "crypto";
 var execFileP = promisify(execFile2);
+var TARGET_STDERR = process.env.LITMUS_DEBUG ? "inherit" : "pipe";
+function discardStderr(transport) {
+  transport.stderr?.resume?.();
+}
 var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
 async function connectTarget(input, opts = {}) {
   const isolated = opts.isolation === "docker";
@@ -464,6 +468,7 @@ async function connectTarget(input, opts = {}) {
       command: input.command,
       args: input.args ?? [],
       env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
+      stderr: TARGET_STDERR,
       ...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
     });
     const cmdline = [input.command, ...input.args ?? []].join(" ");
@@ -497,6 +502,7 @@ async function connectTarget(input, opts = {}) {
     command: launch.command,
     args: launch.args,
     env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
+    stderr: TARGET_STDERR,
     ...opts.seedCwd ? { cwd: opts.seedCwd } : {}
   });
   const client = await connectOrThrow(transport);
@@ -518,14 +524,14 @@ async function connectHostNpm(ref, parsed, opts) {
   const binNames = await fetchNpmBins(spec, parsed.name);
   if (!binNames || binNames.length === 0) {
     const args = ["-y", spec];
-    const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
+    const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
     const client = await connectOrThrow(transport);
     return makeResult(client, "stdio", { kind: "stdio", command: ["npx", ...args].join(" "), url: null }, serverRefVal, resolvedVersion, []);
   }
   const candidates = orderBinCandidates(binNames, parsed.name);
   const { result } = await probeForMcpBin(ref, candidates, async (bin) => {
     const args = ["-y", "-p", spec, bin];
-    const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
+    const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
     const client = await tryConnect(transport);
     return client ? { client, descriptor: { kind: "stdio", command: ["npx", ...args].join(" "), url: null } } : null;
   });
@@ -562,8 +568,9 @@ async function connectIsolatedNpm(ref, parsed, opts) {
       const transport = new StdioClientTransport({
         command: launch.command,
         args: namedArgs,
-        env: getDefaultEnvironment()
+        env: getDefaultEnvironment(),
         // default env only: no host secrets, no canaries
+        stderr: TARGET_STDERR
       });
       const client = await tryConnect(transport);
       if (!client) {
@@ -608,6 +615,7 @@ async function tryConnect(transport) {
   const client = new Client(CLIENT_INFO, { capabilities: {} });
   try {
     await withConnectTimeout(client.connect(transport), transport);
+    discardStderr(transport);
     return client;
   } catch {
     try {
@@ -620,6 +628,7 @@ async function tryConnect(transport) {
 async function connectOrThrow(transport) {
   const client = new Client(CLIENT_INFO, { capabilities: {} });
   await withConnectTimeout(client.connect(transport), transport);
+  discardStderr(transport);
   return client;
 }
 function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardownExtra) {
@@ -629,6 +638,9 @@ function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardo
     descriptor,
     serverRef,
     resolvedVersion,
+    // The server's self-reported identity from the initialize handshake. The SDK
+    // exposes it post-connect via getServerVersion(); absent/blank → null.
+    selfReportedVersion: client.getServerVersion()?.version ?? null,
     teardown: async () => {
       try {
         await client.close();
@@ -2009,6 +2021,7 @@ function assembleBundle(input) {
     methodologyVersion: METHODOLOGY_VERSION,
     serverRef: input.serverRef,
     resolvedVersion: input.resolvedVersion,
+    selfReportedVersion: input.selfReportedVersion,
     target: input.target,
     toolDefsFingerprint: input.toolDefsFingerprint,
     toolDefs: input.toolDefs,
@@ -2027,7 +2040,7 @@ async function runLitmus(target, opts = {}) {
   const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
   const ranAt = (/* @__PURE__ */ new Date()).toISOString();
   const baselineAllowlist = [...DEFAULT_EGRESS_BASELINE, ...parseAllowlistEnv(process.env.LITMUS_EGRESS_ALLOWLIST)];
-  const dockerAvailable = await checkDocker();
+  const dockerAvailable = await isDockerAvailable();
   const canaries = mintCanaries();
   const seedEnv = canaryEnv(canaries);
   const isHttp = typeof target === "string" && /^https?:\/\//i.test(target);
@@ -2091,6 +2104,7 @@ async function runLitmus(target, opts = {}) {
       return assembleBundle({
         serverRef: conn.serverRef,
         resolvedVersion: conn.resolvedVersion,
+        selfReportedVersion: conn.selfReportedVersion,
         // Surface the server's declared egress in the bundle (disclosure: a
         // declaration is not exoneration — the consumer/agent-gate can judge).
         target: egress.declaredEgress.length ? { ...conn.descriptor, declaredEgress: egress.declaredEgress } : conn.descriptor,
@@ -2169,7 +2183,7 @@ function withTimeout(p, ms, label) {
     })
   ]);
 }
-function checkDocker() {
+function isDockerAvailable() {
   return new Promise((resolve) => {
     const child = execFile3("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
     child.on("error", () => resolve(false));
@@ -2364,6 +2378,12 @@ function overBroadTrigger(description) {
 }
 // ../probes/src/skills/grade-skill.ts
+var SKILL_CATEGORY_META = {
+  "S-01": { label: "prompt injection / context poisoning", description: "whether the skill body tries to hijack the agent" },
+  "S-03": { label: "data-exfiltration instructions", description: "whether it instructs the agent to leak secrets" },
+  "S-04": { label: "dangerous bundled commands", description: "whether it ships dangerous executable commands" },
+  "S-05": { label: "tool / permission overreach", description: "whether it claims more capability than it needs" }
+};
 var DISQUALIFYING = /* @__PURE__ */ new Set(["S-01", "S-03"]);
 var CAPPING = /* @__PURE__ */ new Set(["S-04", "S-05"]);
 function gradeSkillCategories(categories) {
@@ -2629,6 +2649,7 @@ export {
   assembleBundle,
   runLitmus,
   enumerateTools,
+  isDockerAvailable,
   SkillLoadError,
   loadSkill,
   stripExamples,
@@ -2637,6 +2658,7 @@ export {
   exfilInstruction,
   dangerousCommand,
   overBroadTrigger,
+  SKILL_CATEGORY_META,
   gradeSkillCategories,
   SKILL_METHODOLOGY_VERSION,
   SKILL_BUNDLE_SCHEMA_VERSION,

package/dist/{chunk-VAOQNFW3.js → chunk-PTWDLGI5.js} RENAMED Viewed

@@ -3,22 +3,24 @@ import {
   checkHostExec,
   parseAuthFlags,
   resolveTarget
-} from "./chunk-GNPHHS6I.js";
+} from "./chunk-TTGWSGPC.js";
 import {
+  SKILL_CATEGORY_META,
   SKILL_METHODOLOGY_VERSION,
   runLitmus,
   runSkillLitmus,
   runSkillQuality,
   runSkillQualityJudged
-} from "./chunk-63OICX66.js";
+} from "./chunk-OGOFUBLN.js";
 import {
+  CATEGORY_META,
   CATEGORY_STATUS_UINT8,
   METHODOLOGY_VERSION,
   parseServerRef,
   parseSkillRef,
   serverKey,
   skillKey
-} from "./chunk-44R4ZYOE.js";
+} from "./chunk-CKQZFK77.js";
 // ../onchain/src/networks.ts
 var NETWORKS = {
@@ -314,9 +316,14 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
     ];
     const { headers } = parseAuthFlags(argv, {});
     const input = resolveTarget(server_ref);
-    const guard = checkHostExec(input, unsafe_host_exec ?? false, 'set "unsafe_host_exec": true');
-    if (!guard.allow) {
-      return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${guard.refuse}` }] };
+    const decision = checkHostExec(input, {
+      optIn: unsafe_host_exec ?? false,
+      dockerAvailable: false,
+      interactive: false,
+      optInHint: 'set "unsafe_host_exec": true'
+    });
+    if (decision.action === "refuse") {
+      return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${decision.refuse}` }] };
     }
     const progressToken = extra._meta?.progressToken;
     const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
@@ -336,24 +343,28 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
     return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
   }
 }
-var CATEGORY_LABEL = {
-  "C-01": "tool-output injection",
-  "C-02": "permission / egress overreach",
-  "C-03": "sensitive-data handling",
-  "C-04": "adversarial-input handling"
-};
 function summarize(b) {
   const find = (code) => b.categories.find((c) => c.code === code);
   const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
     const c = find(code);
     const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
-    return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
+    return {
+      code,
+      check: CATEGORY_META[code].label,
+      description: CATEGORY_META[code].description,
+      status: c?.status ?? "unknown",
+      reason: c?.reason ?? null,
+      findings
+    };
   });
   return {
     grade: b.grade,
     summary: b.gradeRationale,
     serverRef: b.serverRef,
     resolvedVersion: b.resolvedVersion,
+    // The server's self-asserted serverInfo.version — descriptive only, not a
+    // re-fetchable pin (cf. resolvedVersion). Null when the server reports none.
+    selfReportedVersion: b.selfReportedVersion,
     fingerprint: b.toolDefsFingerprint,
     ranAt: b.ranAt,
     methodologyVersion: b.methodologyVersion,
@@ -417,15 +428,11 @@ async function handleRunSkillLitmus({ skill_ref }, ctx = {}) {
 function errorResult(message) {
   return { isError: true, content: [{ type: "text", text: `run_skill_litmus failed: ${message}` }] };
 }
-var CATEGORY_LABEL2 = {
-  "S-01": "prompt injection / context poisoning",
-  "S-03": "data-exfiltration instructions",
-  "S-04": "dangerous bundled commands"
-};
 function summarize2(b) {
   const categories = b.categories.map((c) => ({
     code: c.code,
-    check: CATEGORY_LABEL2[c.code] ?? c.code,
+    check: SKILL_CATEGORY_META[c.code]?.label ?? c.code,
+    description: SKILL_CATEGORY_META[c.code]?.description ?? null,
     status: c.status,
     reason: c.reason ?? null,
     findings: c.status === "fail" ? c.findings.filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ kind: f.kind, match: truncate2(f.match, 120), file: f.file })) : []

package/dist/{chunk-GNPHHS6I.js → chunk-TTGWSGPC.js} RENAMED Viewed

@@ -1,6 +1,7 @@
 import {
+  CATEGORY_META,
   canonicalStringify
-} from "./chunk-44R4ZYOE.js";
+} from "./chunk-CKQZFK77.js";
 // ../cli/src/litmus.ts
 import { existsSync } from "fs";
@@ -9,11 +10,17 @@ import * as path from "path";
 // ../cli/src/format.ts
 function formatBundle(b) {
-  const status = (code) => b.categories.find((c) => c.code === code)?.status ?? "?";
   const lines = [];
   lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
   if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
-  lines.push(`\u2192 C-01 ${status("C-01")} \xB7 C-02 ${status("C-02")} \xB7 C-03 ${status("C-03")} \xB7 C-04 ${status("C-04")}`);
+  if (b.selfReportedVersion) lines.push(`\u2192 self-reported ${b.selfReportedVersion} (unverified)`);
+  lines.push("\u2192 checks");
+  const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
+  for (const c of b.categories) {
+    const { label, description } = CATEGORY_META[c.code];
+    lines.push(`    ${c.code}  ${label.padEnd(labelWidth)}  ${c.status}`);
+    lines.push(`          ${description}`);
+  }
   const c01 = b.categories.find((c) => c.code === "C-01");
   if (c01?.status === "fail") {
     const highs = c01.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high");
@@ -46,17 +53,37 @@ async function runLitmusCli(args) {
     return 2;
   }
   const input = resolveTarget(target);
-  const guard = checkHostExec(input, unsafeHostExec);
-  if (!guard.allow) {
-    process.stderr.write(`\u2192 litmus: ${guard.refuse}
+  const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
+  const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
+  const probes = await import("./src-ZHTFCKNR.js");
+  const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
+  const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
+  if (decision.action === "refuse") {
+    process.stderr.write(`\u2192 litmus: ${decision.refuse}
 `);
     return 2;
   }
-  if (guard.warn) process.stderr.write(`\u2192 ${guard.warn}
+  if (decision.action === "confirm" && !await promptYesNo(decision.prompt, decision.defaultYes)) {
+    process.stderr.write("\u2192 litmus: cancelled.\n");
+    return 2;
+  }
+  const isolation = decision.isolation;
+  if (decision.warn) process.stderr.write(`\u2192 ${decision.warn}
 `);
-  const { runLitmus } = await import("./src-I6AGG4CJ.js");
+  if (!json) process.stderr.write(`\u2192 running litmus against ${target} \u2026 (~20\u201360s)
+`);
+  const onProgress = (done, total, label) => {
+    if (!json) process.stderr.write(`  \u2192 [${done}/${total}] ${label}
+`);
+  };
   try {
-    const bundle = await runLitmus(input, { headers, allowStateChanging, timeoutMs });
+    const bundle = await probes.runLitmus(input, {
+      headers,
+      allowStateChanging,
+      timeoutMs,
+      onProgress,
+      ...isolation ? { isolation } : {}
+    });
     process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
     return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
   } catch (err) {
@@ -65,6 +92,15 @@ async function runLitmusCli(args) {
     return 1;
   }
 }
+async function promptYesNo(prompt, defaultYes) {
+  const { createInterface } = await import("readline/promises");
+  const rl = createInterface({ input: process.stdin, output: process.stderr });
+  try {
+    return isAffirmative(await rl.question(prompt), defaultYes);
+  } finally {
+    rl.close();
+  }
+}
 function parseAuthFlags(args, env = process.env) {
   const headers = {};
   const headerArgs = [];
@@ -113,19 +149,44 @@ function timeoutSecondsToMs(v) {
   const sec = Number(v);
   return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
 }
-function checkHostExec(input, optIn, optInHint = "--unsafe-host-exec", env = process.env) {
+function checkHostExec(input, gate) {
+  const { optIn, dockerAvailable, interactive, optInHint = "--unsafe-host-exec", env = process.env } = gate;
   const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
-  const dockerIsolated = env.LITMUS_STDIO_ISOLATION === "docker";
-  if (!isStdio || dockerIsolated) return { allow: true };
+  if (!isStdio) return { action: "allow" };
+  if (env.LITMUS_STDIO_ISOLATION === "docker") return { action: "allow", isolation: "docker" };
   const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
-  if (optIn) return { allow: true, warn: `\u26A0 unsafe host execution \u2014 ${why}.` };
+  const warn = `\u26A0 unsafe host execution \u2014 ${why}.`;
+  if (optIn) return { action: "allow", isolation: "none", warn };
+  if (interactive) {
+    if (dockerAvailable) {
+      return {
+        action: "confirm",
+        isolation: "docker",
+        defaultYes: true,
+        prompt: "Docker detected \u2014 the target will run sandboxed (recommended). Proceed? [Y/n] "
+      };
+    }
+    return {
+      action: "confirm",
+      isolation: "none",
+      defaultYes: false,
+      prompt: `No Docker found \u2014 this would run the target's own code on THIS host, unsandboxed.
+  Type "yes" to proceed, or set LITMUS_STDIO_ISOLATION=docker to sandbox: `,
+      warn
+    };
+  }
   return {
-    allow: false,
+    action: "refuse",
     refuse: `refusing host execution \u2014 ${why}.
   \u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
   \u2022 accept the risk: re-run with ${optInHint}`
   };
 }
+function isAffirmative(answer, defaultYes) {
+  const a = answer.trim().toLowerCase();
+  if (a === "") return defaultYes;
+  return a === "y" || a === "yes";
+}
 function resolveTarget(target) {
   if (/^https?:\/\//i.test(target)) return target;
   if (existsSync(target)) {

package/dist/cli-skill.js CHANGED Viewed

@@ -1,31 +1,18 @@
 #!/usr/bin/env node
 import {
+  SKILL_CATEGORY_META,
   judgeFromEnv,
   runSkillLitmus,
   runSkillQuality,
   runSkillQualityJudged
-} from "./chunk-63OICX66.js";
-import "./chunk-44R4ZYOE.js";
+} from "./chunk-OGOFUBLN.js";
+import "./chunk-CKQZFK77.js";
 // src/cli-skill.ts
 import { statSync } from "fs";
-var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
-usage:
-  polygraphso-litmus-skill [--json] <path-to-skill-dir>
-  polygraphso-litmus-skill --help
-The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
-execution); an A means the static checks were clean, not that the skill is
-behaviorally safe.
-It also prints a separate, advisory quality signal. The optional LLM-judged
-axes (honesty, coherence) run only if you provide your own key \u2014 set
-LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
-endpoint). Without a key only the deterministic well-formedness checks run.
-More at https://polygraph.so
-`;
-function render(b) {
+// src/format-skill.ts
+function formatSkillSafety(b) {
   const lines = [
     `grade: ${b.grade}  (${b.methodologyVersion})`,
     `${b.gradeRationale}`,
@@ -34,8 +21,11 @@ function render(b) {
     "",
     "categories:"
   ];
+  const labelWidth = Math.max(0, ...b.categories.map((c) => SKILL_CATEGORY_META[c.code].label.length));
   for (const c of b.categories) {
-    lines.push(`  ${c.code}  ${c.status}${c.reason ? `  (${c.reason})` : ""}`);
+    const { label, description } = SKILL_CATEGORY_META[c.code];
+    lines.push(`  ${c.code}  ${label.padEnd(labelWidth)}  ${c.status}${c.reason ? `  (${c.reason})` : ""}`);
+    lines.push(`        ${description}`);
     if (c.status === "fail") {
       for (const f of c.findings.filter((x) => x.severity === "high").slice(0, 5)) {
         lines.push(`      ! ${f.kind}${f.file ? ` [${f.file}]` : ""}: ${f.match}`);
@@ -51,6 +41,24 @@ function render(b) {
   lines.push("", b.disclaimer);
   return lines.join("\n") + "\n";
 }
+// src/cli-skill.ts
+var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
+usage:
+  polygraphso-litmus-skill [--json] <path-to-skill-dir>
+  polygraphso-litmus-skill --help
+The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
+execution); an A means the static checks were clean, not that the skill is
+behaviorally safe.
+It also prints a separate, advisory quality signal. The optional LLM-judged
+axes (honesty, coherence) run only if you provide your own key \u2014 set
+LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
+endpoint). Without a key only the deterministic well-formedness checks run.
+More at https://polygraph.so
+`;
 function renderQuality(q) {
   const lines = ["", `quality (advisory, separate from the grade): ${q.verdict}`];
   for (const c of q.checks) lines.push(`  ${c.status === "pass" ? "\xB7" : "!"} ${c.id}: ${c.detail}`);
@@ -87,7 +95,7 @@ async function main(argv) {
   const judge = judgeFromEnv();
   const quality = judge ? await runSkillQualityJudged(target, judge, { skillRef: target }) : runSkillQuality(target, { skillRef: target });
   process.stdout.write(
-    json ? JSON.stringify({ safety, quality }, null, 2) + "\n" : render(safety) + renderQuality(quality)
+    json ? JSON.stringify({ safety, quality }, null, 2) + "\n" : formatSkillSafety(safety) + renderQuality(quality)
   );
   return 0;
 }

package/dist/cli.js CHANGED Viewed

@@ -1,11 +1,11 @@
 #!/usr/bin/env node
 import {
   runLitmusCli
-} from "./chunk-GNPHHS6I.js";
+} from "./chunk-TTGWSGPC.js";
 import {
   parseServerRef,
   serverKey
-} from "./chunk-44R4ZYOE.js";
+} from "./chunk-CKQZFK77.js";
 // src/cli.ts
 import { readFileSync } from "fs";

package/dist/index.d.ts CHANGED Viewed

@@ -30,13 +30,24 @@ type Registry = "npm" | "pypi" | "github";
  *  not branch on it. */
 declare const METHODOLOGY_VERSION: "litmus-v5";
 /** Evidence-bundle format version (owned by onchain-proof-spec §2).
+ *  1.5.0 adds the optional `selfReportedVersion` field (the server's
+ *  self-asserted `serverInfo.version`, descriptive metadata only);
  *  1.4.0 adds the C-01 probe id `1.3` (second-order injection, litmus-v5);
  *  1.3.0 adds the optional C-04 category and the `internals-leak`/`crash` finding
  *  kinds (litmus-v4); 1.2.0 adds the optional `target.declaredEgress` field and
  *  the `egress-allowed` finding kind (litmus-v3); 1.1.0 adds
  *  `harness.stdioIsolation`; older remain valid. */
-declare const BUNDLE_SCHEMA_VERSION: "1.4.0";
+declare const BUNDLE_SCHEMA_VERSION: "1.5.0";
 type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
+/**
+ * Plain-English label + one-line description for each probe category, so CLI and
+ * MCP output is legible without knowing the probe IDs. The single source of these
+ * strings — both renderers and the MCP `run_litmus` summary read from here.
+ */
+declare const CATEGORY_META: Record<CategoryCode, {
+    label: string;
+    description: string;
+}>;
 /** Probe IDs carry their family number (1=injection, 2=permission,
  *  3=adversarial-input, 4=sensitive). 1.3 (second-order injection) added in v5. */
 type ProbeId = "1.1" | "1.2" | "1.3" | "2.1" | "2.2" | "3.1" | "3.2" | "4.1" | "4.2";
@@ -108,8 +119,15 @@ interface EvidenceBundle {
     methodologyVersion: string;
     /** Canonical, versionless identity (serverKey). */
     serverRef: string;
-    /** The exact version actually run. */
+    /** The exact version actually run — a re-fetchable pin (npm/pypi version,
+     *  skill commit). Null when the target has no such identity (remote URL,
+     *  unpinned ref). This is the reproducibility anchor. */
     resolvedVersion: string | null;
+    /** The version the server reports about *itself* in the MCP `initialize`
+     *  handshake (`serverInfo.version`). Self-asserted and operator-controlled —
+     *  descriptive metadata only, never a reproducibility anchor (cf.
+     *  resolvedVersion). Null when the server reports none. */
+    selfReportedVersion: string | null;
     target: TargetDescriptor;
     /** sha256 of the canonical tool surface → `0x` + 64 hex (bytes32). */
     toolDefsFingerprint: string;
@@ -246,6 +264,9 @@ interface ConnectedTarget {
     /** Canonical versionless identity (serverKey), the URL, or the command line. */
     serverRef: string;
     resolvedVersion: string | null;
+    /** The server's self-asserted `serverInfo.version` from the MCP handshake.
+     *  Descriptive metadata only (see EvidenceBundle.selfReportedVersion). */
+    selfReportedVersion: string | null;
     teardown: () => Promise<void>;
 }
 interface ConnectOptions {
@@ -352,6 +373,9 @@ declare function enumerateTools(client: ListToolsClient, opts?: {
     maxBytes?: number;
     listTimeoutMs?: number;
 }): Promise<ListedTool[]>;
+/** True if a Docker daemon is reachable (governs C-02 / probe 4.2, and the CLI's
+ *  detect-and-confirm sandbox prompt). */
+declare function isDockerAvailable(): Promise<boolean>;
 /**
  * Tool-surface fingerprint (litmus-test-v1 §6, technical-design §3).
@@ -409,6 +433,7 @@ declare function gradeFromCategories(categories: readonly CategoryResult[]): Gra
 interface BundleInput {
     serverRef: string;
     resolvedVersion: string | null;
+    selfReportedVersion: string | null;
     target: TargetDescriptor;
     toolDefsFingerprint: string;
     toolDefs: ToolDef[];
@@ -470,6 +495,15 @@ declare function hasHighSeverity(findings: readonly Finding[]): boolean;
  */
 type SkillCategoryCode = "S-01" | "S-03" | "S-04" | "S-05";
+/**
+ * Plain-English label + one-line description for each skill category, so the skill
+ * CLI/MCP output is legible without knowing the S-codes. The single source of these
+ * strings — both the renderer and the MCP `run_skill_litmus` summary read from here.
+ */
+declare const SKILL_CATEGORY_META: Record<SkillCategoryCode, {
+    label: string;
+    description: string;
+}>;
 interface SkillCategoryResult {
     code: SkillCategoryCode;
     status: CategoryStatus;
@@ -1109,4 +1143,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
 /** A target is an https URL, a local MCP entry file, or a registry ref. */
 declare function resolveTarget(target: string): string | StdioCommand;
-export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
+export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_META, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_CATEGORY_META, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, isDockerAvailable, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };

package/dist/index.js CHANGED Viewed

@@ -31,13 +31,14 @@ import {
   skillAttestationFields,
   skillSchemaUID,
   verifySkillInputShape
-} from "./chunk-VAOQNFW3.js";
+} from "./chunk-PTWDLGI5.js";
 import {
   parseAuthFlags,
   resolveTarget
-} from "./chunk-GNPHHS6I.js";
+} from "./chunk-TTGWSGPC.js";
 import {
   SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_CATEGORY_META,
   SKILL_METHODOLOGY_VERSION,
   SKILL_QUALITY_VERSION,
   SkillLoadError,
@@ -55,6 +56,7 @@ import {
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  isDockerAvailable,
   judgeFromEnv,
   judgeSkillQuality,
   loadSkill,
@@ -69,9 +71,10 @@ import {
   skillInjectionFails,
   stateChangingToolNames,
   stripExamples
-} from "./chunk-63OICX66.js";
+} from "./chunk-OGOFUBLN.js";
 import {
   BUNDLE_SCHEMA_VERSION,
+  CATEGORY_META,
   CATEGORY_STATUS_UINT8,
   METHODOLOGY_VERSION,
   ServerRefParseError,
@@ -83,7 +86,7 @@ import {
   parseSkillRef,
   serverKey,
   skillKey
-} from "./chunk-44R4ZYOE.js";
+} from "./chunk-CKQZFK77.js";
 // ../agent/src/gate.ts
 function sameServer(a, b) {
@@ -131,6 +134,7 @@ async function liveFingerprint(target) {
 }
 export {
   BUNDLE_SCHEMA_VERSION,
+  CATEGORY_META,
   CATEGORY_STATUS_UINT8,
   DEFAULT_PASSING,
   LITMUS_SCHEMA,
@@ -144,6 +148,7 @@ export {
   RUN_SKILL_LITMUS_TOOL_NAME,
   RUN_SKILL_LITMUS_TOOL_TITLE,
   SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_CATEGORY_META,
   SKILL_METHODOLOGY_VERSION,
   SKILL_QUALITY_VERSION,
   ServerRefParseError,
@@ -178,6 +183,7 @@ export {
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  isDockerAvailable,
   judgeFromEnv,
   judgeSkillQuality,
   litmusFields,

package/dist/mcp.js CHANGED Viewed

@@ -20,12 +20,12 @@ import {
   runSkillLitmusInputShape,
   verifyInputShape,
   verifySkillInputShape
-} from "./chunk-VAOQNFW3.js";
-import "./chunk-GNPHHS6I.js";
+} from "./chunk-PTWDLGI5.js";
+import "./chunk-TTGWSGPC.js";
 import {
   judgeFromEnv
-} from "./chunk-63OICX66.js";
-import "./chunk-44R4ZYOE.js";
+} from "./chunk-OGOFUBLN.js";
+import "./chunk-CKQZFK77.js";
 // src/mcp.ts
 import { realpathSync } from "fs";

package/dist/{src-I6AGG4CJ.js → src-ZHTFCKNR.js} RENAMED Viewed

@@ -1,5 +1,6 @@
 import {
   SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_CATEGORY_META,
   SKILL_METHODOLOGY_VERSION,
   SKILL_QUALITY_VERSION,
   SkillLoadError,
@@ -17,6 +18,7 @@ import {
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  isDockerAvailable,
   judgeFromEnv,
   judgeSkillQuality,
   loadSkill,
@@ -31,10 +33,11 @@ import {
   skillInjectionFails,
   stateChangingToolNames,
   stripExamples
-} from "./chunk-63OICX66.js";
-import "./chunk-44R4ZYOE.js";
+} from "./chunk-OGOFUBLN.js";
+import "./chunk-CKQZFK77.js";
 export {
   SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_CATEGORY_META,
   SKILL_METHODOLOGY_VERSION,
   SKILL_QUALITY_VERSION,
   SkillLoadError,
@@ -52,6 +55,7 @@ export {
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  isDockerAvailable,
   judgeFromEnv,
   judgeSkillQuality,
   loadSkill,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polygraphso/litmus",
-  "version": "0.10.0",
+  "version": "0.12.0",
   "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
   "license": "Apache-2.0",
   "homepage": "https://polygraph.so",
@@ -62,11 +62,11 @@
     "tsup": "^8.3.0",
     "typescript": "^5.9.3",
     "vitest": "^2.1.0",
-    "@polygraph/onchain": "0.0.0",
     "@polygraph/core": "0.0.0",
-    "@polygraph/probes": "0.0.0",
+    "@polygraph/onchain": "0.0.0",
     "@polygraph/agent": "0.0.0",
     "@polygraph/mcp": "0.0.0",
+    "@polygraph/probes": "0.0.0",
     "@polygraph/cli": "0.0.0"
   },
   "publishConfig": {