npm - @polygraphso/litmus - Versions diffs - 0.8.1 → 0.9.1 - Mend

@polygraphso/litmus 0.8.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +58 -0
package/dist/{chunk-ZR6XRGMQ.js → chunk-44R4ZYOE.js} +67 -0
package/dist/{chunk-VOPISHBU.js → chunk-BUKDFSDO.js} +2 -2
package/dist/{chunk-35UOPCBW.js → chunk-RYJXVMCT.js} +482 -9
package/dist/chunk-Z66GKAQD.js +692 -0
package/dist/cli-skill.d.ts +1 -0
package/dist/cli-skill.js +98 -0
package/dist/cli.js +2 -2
package/dist/index.d.ts +437 -2
package/dist/index.js +86 -8
package/dist/mcp.js +130 -122
package/dist/src-TMJOIVGB.js +67 -0
package/package.json +4 -3
package/dist/chunk-BPS4YCDL.js +0 -250
package/dist/src-RSTPCEYU.js +0 -31

package/dist/{chunk-35UOPCBW.js → chunk-RYJXVMCT.js} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
   METHODOLOGY_VERSION,
   parseServerRef,
   serverKey
-} from "./chunk-ZR6XRGMQ.js";
+} from "./chunk-44R4ZYOE.js";
 // ../probes/src/harness.ts
 import { execFile as execFile3 } from "child_process";
@@ -1078,15 +1078,29 @@ function instructionMimicry(text) {
   }
   return findings;
 }
-var SENSITIVE_QUERY_KEY = /(?:^|[?&])[^=&]*(?:key|token|secret|password|passwd|auth|session|cookie|canary|api|env|cred)[^=&]*=/i;
+var SENSITIVE_QUERY_KEY_TOKENS = [
+  "key",
+  "token",
+  "secret",
+  "password",
+  "passwd",
+  "auth",
+  "session",
+  "cookie",
+  "canary",
+  "api",
+  "env",
+  "cred"
+];
 function looksExfilQuery(url) {
   const q = url.indexOf("?");
   if (q < 0) return false;
   const query = url.slice(q + 1);
-  if (SENSITIVE_QUERY_KEY.test(url)) return true;
   for (const pair of query.split("&")) {
     const eq = pair.indexOf("=");
     if (eq < 0) continue;
+    const key = pair.slice(0, eq).toLowerCase();
+    if (SENSITIVE_QUERY_KEY_TOKENS.some((t) => key.includes(t))) return true;
     let v = pair.slice(eq + 1);
     try {
       v = decodeURIComponent(v);
@@ -1100,7 +1114,7 @@ function looksExfilQuery(url) {
 }
 function markdownTricks(text) {
   const findings = [];
-  const proto = /\b(?:javascript|data):[^\s)"'<>]+/gi;
+  const proto = /\b(?:javascript|data):[^\s)"'<>*`]+/gi;
   for (let m = proto.exec(text); m; m = proto.exec(text)) {
     findings.push({
       kind: "markdown-trick",
@@ -1109,7 +1123,7 @@ function markdownTricks(text) {
       offset: m.index
     });
   }
-  const exfilImg = /!?\[[^\]]*\]\((https?:\/\/[^)\s]*\?[^)\s]*=[^)\s]*)\)/gi;
+  const exfilImg = /!?\[[^\]]{0,200}\]\((https?:\/\/[^)\s?]{0,400}\?[^)\s=]{0,200}=[^)\s]{0,200})\)/gi;
   for (let m = exfilImg.exec(text); m; m = exfilImg.exec(text)) {
     const url = m[1] ?? m[0];
     if (!looksExfilQuery(url)) continue;
@@ -1126,7 +1140,9 @@ var INTERNALS_LEAK = [
   // V8 / Node stack frame: `at fn (/abs/file.js:12:5)` or `at /abs/file.js:12:5`
   // (a leading path/drive/`node:`/`file:` is required, so a "meet at 10:30:45"
   // timestamp can't trip it).
-  /^\s*at\s+(?:.*\s)?\(?(?:\/|[A-Za-z]:[\\/]|node:|file:\/\/)[^\s()]*:\d+:\d+\)?\s*$/m,
+  // Bounded quantifiers ({0,300}) keep this linear: overlapping `.*\s` + `[^\s()]*`
+  // + trailing `\s*$` over untrusted output is otherwise polynomial (js/polynomial-redos).
+  /^\s*at\s+(?:[^\n]{0,300}\s)?\(?(?:\/|[A-Za-z]:[\\/]|node:|file:\/\/)[^\s()]{0,300}:\d+:\d+\)?\s*$/m,
   // Node uncaught-rejection / fatal banners.
   /\b(?:UnhandledPromiseRejection(?:Warning)?|unhandledRejection|FATAL ERROR:|Fatal error:)\b/,
   // Python traceback header + frame.
@@ -1138,8 +1154,9 @@ var INTERNALS_LEAK = [
   // Go panic with its goroutine dump (`panic: … goroutine 1 [running]:`).
   /\bpanic:[\s\S]{0,300}?\bgoroutine\s+\d+\s+\[/,
   // Ruby backtrace frame (`from app.rb:10:in 'method'` / older backtick form);
-  // requires a `.rb` file + `:line:in` so prose can't trip it.
-  /[\w./-]+\.rb:\d+:in\s+['\x60]/,
+  // requires a `.rb` file + `:line:in` so prose can't trip it. The lookbehind +
+  // bounded run keep `[\w./-]+\.rb` linear (the `.`-overlap is otherwise polynomial).
+  /(?<![\w./-])[\w./-]{1,200}\.rb:\d+:in\s+['\x60]/,
   // .NET stack frame (`at NS.Method() in C:\path\File.cs:line 12`).
   /\bat\s+[\w.<>+]+\([^)]*\)\s+in\s+\S+:line\s+\d+/i,
   // Rust panic banner (`thread 'main' panicked at …`).
@@ -2157,6 +2174,444 @@ function checkDocker() {
   });
 }
+// ../probes/src/skills/load-skill.ts
+import { readFileSync, readdirSync, statSync } from "fs";
+import { join as join3, relative, sep } from "path";
+import { createHash as createHash2 } from "crypto";
+var SkillLoadError = class extends Error {
+};
+var MAX_FILES = 4096;
+var EXEC_EXT = /\.(?:sh|bash|zsh|py|js|mjs|cjs|ts|rb|pl|php)$/i;
+function sha256hex(buf) {
+  return createHash2("sha256").update(buf).digest("hex");
+}
+function looksExecutable(relPath, bytes) {
+  if (EXEC_EXT.test(relPath)) return true;
+  return bytes.subarray(0, 2).toString("latin1") === "#!";
+}
+function enumerateFiles(dir) {
+  const out = [];
+  const walk = (d) => {
+    let entries;
+    try {
+      entries = readdirSync(d);
+    } catch {
+      return;
+    }
+    for (const name of entries) {
+      if (name === "node_modules" || name === ".git") continue;
+      const p = join3(d, name);
+      let st;
+      try {
+        st = statSync(p);
+      } catch {
+        continue;
+      }
+      if (st.isDirectory()) walk(p);
+      else if (st.isFile()) out.push(relative(dir, p).split(sep).join("/").normalize("NFC"));
+    }
+  };
+  walk(dir);
+  return out.sort();
+}
+function splitFrontmatter(src) {
+  if (!src.startsWith("---")) return { frontmatter: "", body: src };
+  const firstNL = src.indexOf("\n");
+  if (firstNL < 0) return { frontmatter: "", body: src };
+  const end = src.indexOf("\n---", firstNL);
+  if (end < 0) return { frontmatter: "", body: src };
+  const close = src.indexOf("\n", end + 1);
+  return {
+    frontmatter: src.slice(firstNL + 1, end),
+    body: close < 0 ? "" : src.slice(close + 1)
+  };
+}
+function extractDescription(frontmatter) {
+  const lines = frontmatter.split("\n");
+  for (let i = 0; i < lines.length; i++) {
+    const m = /^description\s*:\s*(.*)$/i.exec(lines[i]);
+    if (!m) continue;
+    const v = m[1].trim();
+    if (/^[>|][+-]?$/.test(v)) {
+      const collected = [];
+      for (let j = i + 1; j < lines.length; j++) {
+        const line = lines[j];
+        if (line.trim() === "") continue;
+        if (/^\s/.test(line)) collected.push(line.trim());
+        else break;
+      }
+      return collected.join(" ");
+    }
+    return v.replace(/^['"]|['"]$/g, "");
+  }
+  return "";
+}
+function loadSkill(dir) {
+  const relPaths = enumerateFiles(dir);
+  if (relPaths.length > MAX_FILES) relPaths.length = MAX_FILES;
+  const skillMdRel = relPaths.find((p) => p.toLowerCase() === "skill.md");
+  if (!skillMdRel) throw new SkillLoadError(`no SKILL.md in ${dir}`);
+  const files = [];
+  for (const relPath of relPaths) {
+    let bytes;
+    try {
+      bytes = readFileSync(join3(dir, relPath));
+    } catch {
+      continue;
+    }
+    files.push({ relPath, bytes, isExecutable: looksExecutable(relPath, bytes) });
+  }
+  const manifest = files.map((f) => `${f.relPath}\0${sha256hex(f.bytes)}`).join("\n");
+  const contentHash = "0x" + sha256hex(manifest);
+  const src = files.find((f) => f.relPath === skillMdRel).bytes.toString("utf8");
+  const { frontmatter, body } = splitFrontmatter(src);
+  return {
+    dir,
+    frontmatter,
+    description: extractDescription(frontmatter),
+    body,
+    files,
+    contentHash
+  };
+}
+// ../probes/src/skills/scanners-skill.ts
+function stripExamples(md) {
+  return md.replace(/```[\s\S]*?```/g, " ").replace(/~~~[\s\S]*?~~~/g, " ").replace(/`[^`\n]*`/g, " ").split("\n").filter((line) => !/^\s*>/.test(line)).join("\n");
+}
+function isBareSystemColon(f) {
+  return f.kind === "instruction-mimicry" && /(?:^|[\s>])system\s*:/i.test(f.match) && !f.match.includes("<");
+}
+function skillInjection(body) {
+  const text = stripExamples(body);
+  return [
+    ...invisibleUnicode(text),
+    ...instructionMimicry(text).filter((f) => !isBareSystemColon(f)),
+    ...markdownTricks(text)
+  ];
+}
+function skillInjectionFails(findings) {
+  return hasHighSeverity(findings);
+}
+var TRANSMIT_VERB = /\b(?:send|post|put|upload|exfiltrate|transmit|e-?mail|forward|leak|publish|curl|wget|fetch)\b/i;
+var SECRET_NOUN = /\b(?:credentials?|secrets?|tokens?|api[\s_-]?keys?|access[\s_-]?keys?|passwords?|passwd|private[\s_-]?keys?|ssh[\s_-]?keys?|\.env\b|env(?:ironment)?\s+variables?|aws[\s_-]?(?:secret|access)|bearer\s+token)\b/i;
+var SINK = /(?:https?:\/\/\S+|\bto\s+(?:a\s+|an\s+|the\s+|your\s+|our\s+)?(?:remote|external|attacker|third[\s-]?party)?\s*(?:server|endpoint|host|url|webhook|domain))/i;
+function exfilInstruction(text) {
+  const findings = [];
+  const stripped = stripExamples(text);
+  for (const raw of stripped.split(/(?<=[.!?])\s+|\n/)) {
+    const sentence = raw.trim();
+    if (!sentence) continue;
+    if (TRANSMIT_VERB.test(sentence) && SECRET_NOUN.test(sentence) && SINK.test(sentence)) {
+      findings.push({ kind: "exfil-instruction", severity: "high", match: sentence.slice(0, 160) });
+    }
+  }
+  return findings;
+}
+var DANGEROUS = [
+  // pipe a network fetch straight into a shell — the classic remote-exec.
+  { re: /\b(?:curl|wget|fetch)\b[^\n|]*\|\s*(?:sudo\s+)?(?:ba)?sh\b/i, severity: "high" },
+  // base64/hex decode piped into a shell or eval'd.
+  { re: /\bbase64\s+(?:--decode|-d|-D)\b[^\n|]*\|\s*(?:ba)?sh\b/i, severity: "high" },
+  // reverse shells.
+  { re: /\b(?:bash|sh)\s+-i\b[^\n]*(?:>&|\d>&)/i, severity: "high" },
+  { re: /\/dev\/tcp\/[^\s/]+\/\d+/i, severity: "high" },
+  { re: /\bn(?:et)?cat?\b[^\n]*\s-e\b/i, severity: "high" },
+  // lower-confidence: dynamic exec of strings / blanket destructive fs — MEDIUM,
+  // recorded but does not floor the letter on its own.
+  { re: /\beval\s*\(/i, severity: "medium" },
+  { re: /\bsubprocess\.[A-Za-z]+\([^)]*shell\s*=\s*True/i, severity: "medium" },
+  { re: /\bos\.system\s*\(/i, severity: "medium" },
+  { re: /\brm\s+-rf\s+(?:\/|~|\$)/i, severity: "medium" }
+];
+function dangerousCommand(text, file) {
+  const findings = [];
+  const scan = (s, label) => {
+    for (const { re, severity } of DANGEROUS) {
+      const m = re.exec(s);
+      if (m) {
+        findings.push({
+          kind: "dangerous-command",
+          severity,
+          match: (label ? `${label}: ` : "") + m[0].slice(0, 120),
+          offset: m.index,
+          ...file ? { file } : {}
+        });
+      }
+    }
+  };
+  scan(text);
+  for (const m of text.matchAll(/[A-Za-z0-9+/]{16,}={0,2}/g)) {
+    const d = decode(m[0], "base64");
+    if (d && /\|\s*(?:ba)?sh\b|\/dev\/tcp\//i.test(d)) scan(d, "base64-decoded");
+  }
+  return findings;
+}
+function decode(s, enc) {
+  try {
+    const d = Buffer.from(s, enc).toString("utf8");
+    return /[\x20-\x7e]/.test(d) ? d : null;
+  } catch {
+    return null;
+  }
+}
+var OVER_BROAD = /\b(?:always|every\s+(?:file|request|time|message|prompt)|all\s+(?:requests|files|prompts|messages)|regardless\s+of|no\s+matter\s+what)\b/i;
+function overBroadTrigger(description) {
+  const m = OVER_BROAD.exec(description);
+  return m ? [{ kind: "over-broad-trigger", severity: "low", match: m[0], offset: m.index }] : [];
+}
+// ../probes/src/skills/grade-skill.ts
+var DISQUALIFYING = /* @__PURE__ */ new Set(["S-01", "S-03"]);
+var CAPPING = /* @__PURE__ */ new Set(["S-04", "S-05"]);
+function gradeSkillCategories(categories) {
+  const byCode = (code) => categories.find((c) => c.code === code);
+  const s01 = byCode("S-01");
+  const failed = categories.filter((c) => c.status === "fail").map((c) => c.code);
+  const skipped = categories.filter((c) => c.status === "skipped").map((c) => c.code);
+  if (failed.some((c) => DISQUALIFYING.has(c))) {
+    const which = failed.filter((c) => DISQUALIFYING.has(c)).join(", ");
+    return {
+      grade: "F",
+      rationale: `Disqualifying failure in ${which} \u2014 the skill instructs prompt injection or data exfiltration into an agent that loads it.`
+    };
+  }
+  if (failed.some((c) => CAPPING.has(c))) {
+    const which = failed.filter((c) => CAPPING.has(c)).join(", ");
+    return {
+      grade: "D",
+      rationale: `Overreach in ${which} \u2014 a dangerous bundled command or an undeclared capability. No injection or exfil instruction, so the grade caps at D.`
+    };
+  }
+  if (s01?.status !== "pass") {
+    return {
+      grade: "F",
+      rationale: "S-01 did not complete \u2014 the SKILL.md could not be parsed/scanned, so the skill is treated as ungraded/unsafe."
+    };
+  }
+  const allPass = categories.every((c) => c.status === "pass");
+  if (allPass) {
+    return {
+      grade: "A",
+      rationale: "All skill categories passed: no injection or exfil instruction in the body, and no dangerous bundled command or undeclared capability. A reflects static scanning, not behavioral proof."
+    };
+  }
+  const note = skipped.length ? ` Not verified: ${skipped.join(", ")} (${skipped.map((c) => byCode(c)?.reason).filter(Boolean).join("; ")}).` : "";
+  return {
+    grade: "B",
+    rationale: `Injection and exfil checks passed; some categories not verified.${note}`
+  };
+}
+// ../probes/src/skills/skill-harness.ts
+var SKILL_METHODOLOGY_VERSION = "litmus-skill-v1";
+var SKILL_BUNDLE_SCHEMA_VERSION = "0.1.0";
+var DISCLAIMER2 = "litmus-skill-v1 is a deterministic STATIC scan of the skill's text and bundled files. It is not behavioral proof: a skill's instructions are interpreted by an agent at runtime, bundled scripts are not executed in this version, and a command constructed or fetched at runtime is not detectable by static scanning. An A means the static checks found no injection, exfil instruction, or dangerous bundled command \u2014 not that the skill is safe to run unsupervised.";
+function cat(code, status, findings, reason) {
+  return { code, status, findings, ...reason ? { reason } : {} };
+}
+function runSkillLitmus(dir, opts = {}) {
+  const ranAt = opts.ranAt ?? (/* @__PURE__ */ new Date()).toISOString();
+  const harness = { package: "@polygraph/probes", version: opts.harnessVersion ?? SKILL_METHODOLOGY_VERSION, node: process.version };
+  const base = { schemaVersion: SKILL_BUNDLE_SCHEMA_VERSION, methodologyVersion: SKILL_METHODOLOGY_VERSION, ranAt, harness, disclaimer: DISCLAIMER2 };
+  let loaded;
+  try {
+    loaded = loadSkill(dir);
+  } catch (e) {
+    const reason = e instanceof SkillLoadError ? e.message : "failed to load skill";
+    const categories2 = [cat("S-01", "skipped", [], reason)];
+    const { grade: grade2, rationale: rationale2 } = gradeSkillCategories(categories2);
+    return { ...base, skillRef: opts.skillRef ?? dir, contentHash: "0x", categories: categories2, advisories: [], grade: grade2, gradeRationale: rationale2 };
+  }
+  const injFindings = [...skillInjection(loaded.body), ...skillInjection(loaded.frontmatter)];
+  const s01 = cat("S-01", skillInjectionFails(injFindings) ? "fail" : "pass", injFindings);
+  const exfil = exfilInstruction(loaded.body);
+  const s03 = cat("S-03", exfil.some((f) => f.severity === "high") ? "fail" : "pass", exfil);
+  const execFiles = loaded.files.filter((f) => f.isExecutable);
+  const dangFindings = [];
+  for (const f of execFiles) dangFindings.push(...dangerousCommand(f.bytes.toString("utf8"), f.relPath));
+  const dangHigh = dangFindings.filter((f) => f.severity === "high");
+  const s04 = cat(
+    "S-04",
+    dangHigh.length > 0 ? "fail" : "pass",
+    dangHigh,
+    execFiles.length === 0 ? "no bundled executable scripts" : void 0
+  );
+  const categories = [s01, s03, s04];
+  const { grade, rationale } = gradeSkillCategories(categories);
+  const advisories = [
+    ...overBroadTrigger(loaded.description),
+    ...dangFindings.filter((f) => f.severity !== "high")
+  ];
+  return {
+    ...base,
+    skillRef: opts.skillRef ?? dir,
+    contentHash: loaded.contentHash,
+    categories,
+    advisories,
+    grade,
+    gradeRationale: rationale
+  };
+}
+// ../probes/src/skills/quality-judge.ts
+function openAICompatJudge(cfg) {
+  let base = cfg.baseUrl;
+  while (base.endsWith("/")) base = base.slice(0, -1);
+  const url = `${base}/chat/completions`;
+  return {
+    id: `openai-compat:${cfg.model}`,
+    async complete(system, user) {
+      const res = await fetch(url, {
+        method: "POST",
+        headers: { "content-type": "application/json", authorization: `Bearer ${cfg.apiKey}` },
+        body: JSON.stringify({
+          model: cfg.model,
+          max_tokens: 1024,
+          messages: [
+            { role: "system", content: system },
+            { role: "user", content: user }
+          ]
+        })
+      });
+      if (!res.ok) throw new Error(`judge request failed: HTTP ${res.status}`);
+      const data = await res.json();
+      return data?.choices?.[0]?.message?.content ?? "";
+    }
+  };
+}
+function judgeFromEnv(env = process.env) {
+  const apiKey = env.LITMUS_LLM_API_KEY;
+  const model = env.LITMUS_LLM_MODEL;
+  if (!apiKey || !model) return null;
+  return openAICompatJudge({ apiKey, model, baseUrl: env.LITMUS_LLM_BASE_URL ?? "https://api.openai.com/v1" });
+}
+var AXES = ["honesty", "coherence"];
+var RATINGS = ["good", "concern", "bad"];
+var SYSTEM = 'You are a strict, terse reviewer of Claude Code "skills" (instruction files an AI agent loads). Judge ONLY two axes. honesty: does the body do anything material its frontmatter `description` does not disclose, or claim a far broader/narrower scope than it delivers? coherence: are the instructions clear, internally consistent, and actually followable? Rate each good|concern|bad. Reply with ONLY a JSON object: {"honesty":{"rating":"...","why":"<=20 words"},"coherence":{"rating":"...","why":"<=20 words"}}. No prose.';
+function buildUserPrompt(loaded) {
+  const body = loaded.body.length > 12e3 ? loaded.body.slice(0, 12e3) + "\n\u2026[truncated]" : loaded.body;
+  return `description: ${loaded.description || "(none)"}
+--- SKILL BODY ---
+${body}`;
+}
+function parseVerdict(text) {
+  const start = text.indexOf("{");
+  const end = text.lastIndexOf("}");
+  if (start < 0 || end <= start) return null;
+  let obj;
+  try {
+    obj = JSON.parse(text.slice(start, end + 1));
+  } catch {
+    return null;
+  }
+  const out = {};
+  for (const axis of AXES) {
+    const r = obj?.[axis]?.rating;
+    if (typeof r !== "string" || !RATINGS.includes(r)) return null;
+    out[axis] = r;
+  }
+  return out;
+}
+function majority(ratings) {
+  const tally = /* @__PURE__ */ new Map();
+  for (const r of ratings) tally.set(r, (tally.get(r) ?? 0) + 1);
+  let best = "good";
+  let bestN = -1;
+  for (const r of RATINGS) {
+    const n = tally.get(r) ?? 0;
+    if (n > bestN || n === bestN && RATINGS.indexOf(r) > RATINGS.indexOf(best)) {
+      best = r;
+      bestN = n;
+    }
+  }
+  return { rating: best, count: bestN };
+}
+async function judgeSkillQuality(loaded, judge, opts = {}) {
+  const samples = Math.max(1, Math.min(opts.samples ?? 1, 5));
+  const user = buildUserPrompt(loaded);
+  const verdicts = [];
+  for (let i = 0; i < samples; i++) {
+    const v = parseVerdict(await judge.complete(SYSTEM, user));
+    if (v) verdicts.push(v);
+  }
+  if (verdicts.length === 0) throw new Error("judge returned no parseable verdict");
+  let minAgreement = 1;
+  const axes = AXES.map((axis) => {
+    const m = majority(verdicts.map((v) => v[axis]));
+    minAgreement = Math.min(minAgreement, m.count / verdicts.length);
+    return { axis, rating: m.rating, rationale: `majority of ${verdicts.length} sample(s)` };
+  });
+  return {
+    judge: judge.id,
+    samples: verdicts.length,
+    agreement: Number(minAgreement.toFixed(2)),
+    axes,
+    note: "Advisory, non-deterministic: produced by an LLM judge, not the reproducible static scan. Repeatability is majority-over-k, not bit-identical. Never affects the safety letter and is never minted."
+  };
+}
+// ../probes/src/skills/quality.ts
+var SKILL_QUALITY_VERSION = "skill-quality-v1";
+var QUALITY_DISCLAIMER = "skill-quality-v1 is an ADVISORY signal, separate from the safety grade. It is never an A\u2013F letter and is never minted on-chain. This version runs only the deterministic well-formedness checks; the non-deterministic, LLM-judged axes (outcome fidelity, trigger calibration) are not included, so it does not assert that the skill actually works.";
+function brokenBundleLinks(body, relPaths) {
+  const broken = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const m of body.matchAll(/!?\[[^\]]*\]\(([^)\s]+)/g)) {
+    let ref = m[1].trim();
+    if (/^(?:https?:|mailto:|tel:|data:|#)/i.test(ref) || ref.startsWith("/")) continue;
+    ref = ref.replace(/^\.\//, "").split("#")[0].split("?")[0].normalize("NFC");
+    if (!ref || seen.has(ref)) continue;
+    seen.add(ref);
+    if (!relPaths.has(ref)) broken.push(ref);
+  }
+  return broken;
+}
+function runSkillQuality(dir, opts = {}) {
+  const ranAt = opts.ranAt ?? (/* @__PURE__ */ new Date()).toISOString();
+  const base = { qualityVersion: SKILL_QUALITY_VERSION, ranAt, disclaimer: QUALITY_DISCLAIMER };
+  let loaded;
+  try {
+    loaded = loadSkill(dir);
+  } catch (e) {
+    return {
+      ...base,
+      skillRef: opts.skillRef ?? dir,
+      contentHash: "0x",
+      verdict: "malformed",
+      checks: [{ id: "loadable", status: "fail", detail: e instanceof SkillLoadError ? e.message : "could not load skill" }]
+    };
+  }
+  const checks = [];
+  const name = /(^|\n)name\s*:/i.test(loaded.frontmatter);
+  checks.push(
+    name ? { id: "frontmatter-name", status: "pass", detail: "frontmatter has a name" } : { id: "frontmatter-name", status: "fail", detail: "frontmatter is missing `name`" }
+  );
+  checks.push(
+    loaded.description.trim() ? { id: "frontmatter-description", status: "pass", detail: "frontmatter has a non-empty description" } : { id: "frontmatter-description", status: "fail", detail: "frontmatter is missing a non-empty `description` (the skill's activation trigger)" }
+  );
+  checks.push(
+    loaded.body.trim() ? { id: "body-nonempty", status: "pass", detail: "the instruction body is non-empty" } : { id: "body-nonempty", status: "fail", detail: "the instruction body is empty" }
+  );
+  const relPaths = new Set(loaded.files.map((f) => f.relPath));
+  const broken = brokenBundleLinks(loaded.body, relPaths);
+  checks.push(
+    broken.length === 0 ? { id: "bundled-links-resolve", status: "pass", detail: "all relative links in the body resolve to bundled files" } : { id: "bundled-links-resolve", status: "warn", detail: `broken relative link(s) to: ${broken.slice(0, 5).join(", ")}` }
+  );
+  const verdict = checks.some((c) => c.status === "fail") ? "malformed" : checks.some((c) => c.status === "warn") ? "issues" : "well-formed";
+  return { ...base, skillRef: opts.skillRef ?? dir, contentHash: loaded.contentHash, verdict, checks };
+}
+async function runSkillQualityJudged(dir, judge, opts = {}) {
+  const bundle = runSkillQuality(dir, opts);
+  if (bundle.contentHash === "0x") return bundle;
+  try {
+    bundle.judged = await judgeSkillQuality(loadSkill(dir), judge, opts);
+  } catch {
+  }
+  return bundle;
+}
 export {
   connectTarget,
   fingerprintToolDefs,
@@ -2170,5 +2625,23 @@ export {
   hasHighSeverity,
   gradeFromCategories,
   assembleBundle,
-  runLitmus
+  runLitmus,
+  SkillLoadError,
+  loadSkill,
+  stripExamples,
+  skillInjection,
+  skillInjectionFails,
+  exfilInstruction,
+  dangerousCommand,
+  overBroadTrigger,
+  gradeSkillCategories,
+  SKILL_METHODOLOGY_VERSION,
+  SKILL_BUNDLE_SCHEMA_VERSION,
+  runSkillLitmus,
+  openAICompatJudge,
+  judgeFromEnv,
+  judgeSkillQuality,
+  SKILL_QUALITY_VERSION,
+  runSkillQuality,
+  runSkillQualityJudged
 };