npm - @polygraphso/litmus - Versions diffs - 0.8.1 → 0.9.1 - Mend

@polygraphso/litmus 0.8.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +58 -0
package/dist/{chunk-ZR6XRGMQ.js → chunk-44R4ZYOE.js} +67 -0
package/dist/{chunk-VOPISHBU.js → chunk-BUKDFSDO.js} +2 -2
package/dist/{chunk-35UOPCBW.js → chunk-RYJXVMCT.js} +482 -9
package/dist/chunk-Z66GKAQD.js +692 -0
package/dist/cli-skill.d.ts +1 -0
package/dist/cli-skill.js +98 -0
package/dist/cli.js +2 -2
package/dist/index.d.ts +437 -2
package/dist/index.js +86 -8
package/dist/mcp.js +130 -122
package/dist/src-TMJOIVGB.js +67 -0
package/package.json +4 -3
package/dist/chunk-BPS4YCDL.js +0 -250
package/dist/src-RSTPCEYU.js +0 -31

package/dist/index.js CHANGED Viewed

@@ -1,49 +1,88 @@
 import {
   LITMUS_SCHEMA,
+  LITMUS_SKILL_SCHEMA,
   NETWORKS,
   RUN_LITMUS_TOOL_DESCRIPTION,
   RUN_LITMUS_TOOL_NAME,
   RUN_LITMUS_TOOL_TITLE,
+  RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
+  RUN_SKILL_LITMUS_TOOL_NAME,
+  RUN_SKILL_LITMUS_TOOL_TITLE,
+  VERIFY_SKILL_TOOL_DESCRIPTION,
+  VERIFY_SKILL_TOOL_NAME,
+  VERIFY_SKILL_TOOL_TITLE,
   decodeLitmusAttestation,
+  decodeSkillAttestation,
   encodeLitmusAttestation,
+  encodeSkillAttestation,
+  encodeSkillAttestationFields,
   handleRunLitmus,
+  handleRunSkillLitmus,
+  handleVerifySkill,
   litmusFields,
   litmusSchemaUID,
   networkConfig,
   readAttestation,
+  readSkillAttestation,
   rpcUrl,
   runLitmusInputShape,
-  selectedNetwork
-} from "./chunk-BPS4YCDL.js";
+  runSkillLitmusInputShape,
+  selectedNetwork,
+  skillAttestationFields,
+  skillSchemaUID,
+  verifySkillInputShape
+} from "./chunk-Z66GKAQD.js";
 import {
   parseAuthFlags,
   resolveTarget
-} from "./chunk-VOPISHBU.js";
+} from "./chunk-BUKDFSDO.js";
 import {
+  SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_METHODOLOGY_VERSION,
+  SKILL_QUALITY_VERSION,
+  SkillLoadError,
   assembleBundle,
   canaryMatch,
   classifyTool,
   connectTarget,
+  dangerousCommand,
+  exfilInstruction,
   fingerprintToolDefs,
   gradeFromCategories,
+  gradeSkillCategories,
   hasHighSeverity,
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  judgeFromEnv,
+  judgeSkillQuality,
+  loadSkill,
   markdownTricks,
+  openAICompatJudge,
+  overBroadTrigger,
   runLitmus,
-  stateChangingToolNames
-} from "./chunk-35UOPCBW.js";
+  runSkillLitmus,
+  runSkillQuality,
+  runSkillQualityJudged,
+  skillInjection,
+  skillInjectionFails,
+  stateChangingToolNames,
+  stripExamples
+} from "./chunk-RYJXVMCT.js";
 import {
   BUNDLE_SCHEMA_VERSION,
   CATEGORY_STATUS_UINT8,
   METHODOLOGY_VERSION,
   ServerRefParseError,
+  SkillRefParseError,
   canonicalStringify,
   formatServerRef,
+  formatSkillRef,
   parseServerRef,
-  serverKey
-} from "./chunk-ZR6XRGMQ.js";
+  parseSkillRef,
+  serverKey,
+  skillKey
+} from "./chunk-44R4ZYOE.js";
 // ../agent/src/gate.ts
 function sameServer(a, b) {
@@ -92,41 +131,80 @@ export {
   CATEGORY_STATUS_UINT8,
   DEFAULT_PASSING,
   LITMUS_SCHEMA,
+  LITMUS_SKILL_SCHEMA,
   METHODOLOGY_VERSION,
   NETWORKS,
   RUN_LITMUS_TOOL_DESCRIPTION,
   RUN_LITMUS_TOOL_NAME,
   RUN_LITMUS_TOOL_TITLE,
+  RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
+  RUN_SKILL_LITMUS_TOOL_NAME,
+  RUN_SKILL_LITMUS_TOOL_TITLE,
+  SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_METHODOLOGY_VERSION,
+  SKILL_QUALITY_VERSION,
   ServerRefParseError,
+  SkillLoadError,
+  SkillRefParseError,
+  VERIFY_SKILL_TOOL_DESCRIPTION,
+  VERIFY_SKILL_TOOL_NAME,
+  VERIFY_SKILL_TOOL_TITLE,
   assembleBundle,
   canaryMatch,
   canonicalStringify,
   classifyTool,
   connectTarget,
+  dangerousCommand,
   decodeLitmusAttestation,
+  decodeSkillAttestation,
   encodeLitmusAttestation,
+  encodeSkillAttestation,
+  encodeSkillAttestationFields,
+  exfilInstruction,
   fingerprintToolDefs,
   formatServerRef,
+  formatSkillRef,
   gateDecision,
   gradeFromCategories,
+  gradeSkillCategories,
   handleRunLitmus,
+  handleRunSkillLitmus,
+  handleVerifySkill,
   hasHighSeverity,
   instructionMimicry,
   internalsLeak,
   invisibleUnicode,
+  judgeFromEnv,
+  judgeSkillQuality,
   litmusFields,
   litmusSchemaUID,
   liveFingerprint,
+  loadSkill,
   markdownTricks,
   networkConfig,
+  openAICompatJudge,
+  overBroadTrigger,
   parseAuthFlags,
   parseServerRef,
+  parseSkillRef,
   readAttestation,
+  readSkillAttestation,
   resolveTarget,
   rpcUrl,
   runLitmus,
   runLitmusInputShape,
+  runSkillLitmus,
+  runSkillLitmusInputShape,
+  runSkillQuality,
+  runSkillQualityJudged,
   selectedNetwork,
   serverKey,
-  stateChangingToolNames
+  skillAttestationFields,
+  skillInjection,
+  skillInjectionFails,
+  skillKey,
+  skillSchemaUID,
+  stateChangingToolNames,
+  stripExamples,
+  verifySkillInputShape
 };

package/dist/mcp.js CHANGED Viewed

@@ -3,141 +3,61 @@ import {
   RUN_LITMUS_TOOL_DESCRIPTION,
   RUN_LITMUS_TOOL_NAME,
   RUN_LITMUS_TOOL_TITLE,
+  RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
+  RUN_SKILL_LITMUS_TOOL_NAME,
+  RUN_SKILL_LITMUS_TOOL_TITLE,
+  VERIFY_SKILL_TOOL_DESCRIPTION,
+  VERIFY_SKILL_TOOL_NAME,
+  VERIFY_SKILL_TOOL_TITLE,
+  VERIFY_TOOL_DESCRIPTION,
+  VERIFY_TOOL_NAME,
+  VERIFY_TOOL_TITLE,
   handleRunLitmus,
-  readAttestation,
+  handleRunSkillLitmus,
+  handleVerify,
+  handleVerifySkill,
   runLitmusInputShape,
-  selectedNetwork
-} from "./chunk-BPS4YCDL.js";
-import "./chunk-VOPISHBU.js";
-import "./chunk-35UOPCBW.js";
+  runSkillLitmusInputShape,
+  verifyInputShape,
+  verifySkillInputShape
+} from "./chunk-Z66GKAQD.js";
+import "./chunk-BUKDFSDO.js";
 import {
-  parseServerRef,
-  serverKey
-} from "./chunk-ZR6XRGMQ.js";
+  judgeFromEnv
+} from "./chunk-RYJXVMCT.js";
+import "./chunk-44R4ZYOE.js";
 // src/mcp.ts
 import { realpathSync } from "fs";
 import { fileURLToPath } from "url";
-import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
-import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
-import { z as z2 } from "zod";
-// ../mcp/src/index.ts
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-// ../mcp/src/tools/verify-attestation.ts
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { z } from "zod";
-function canonicalRef(ref) {
-  try {
-    return serverKey(parseServerRef(ref)).toLowerCase();
-  } catch {
-    return ref.trim().toLowerCase();
-  }
+// src/sampling-judge.ts
+function clientSupportsSampling(server) {
+  return Boolean(server.server.getClientCapabilities()?.sampling);
 }
-var VERIFY_TOOL_NAME = "verify_attestation";
-var VERIFY_TOOL_TITLE = "Verify a server's polygraph attestation";
-var VERIFY_TOOL_DESCRIPTION = [
-  "Read a server's already-published polygraph (litmus) grade \u2014 without running",
-  "anything \u2014 before an agent trusts or, in agentic commerce, pays it.",
-  "",
-  "When a grade is published it returns the behavioral grade (A\u2013F), the attestation",
-  "UID, the evidence CID, and the graded tool-surface fingerprint. The caller must",
-  "still recompute the LIVE fingerprint and require it to equal the attested one",
-  "before paying \u2014 a passing attestation can otherwise front for a tool surface the",
-  "server no longer serves (rug pull).",
-  "",
-  "Grade publishing is still rolling out, so this commonly returns not_available",
-  "today: that means UNEVALUATED (neither safe nor unsafe), not a failing grade \u2014 to",
-  "grade the server yourself right now, use `run_litmus`. A `lookup_failed` result",
-  "means the lookup itself failed (the index or chain was unreachable); the grade is",
-  "unknown, which is not the same as unevaluated.",
-  "",
-  "Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem."
-].join("\n");
-var verifyInputShape = {
-  server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server.")
-};
-async function handleVerify({ server_ref }) {
-  const found = await resolveUid(server_ref);
-  if (found.kind === "error") {
-    return {
-      isError: true,
-      content: [
-        {
-          type: "text",
-          text: `lookup_failed \u2014 could not reach the polygraph grade index for ${server_ref} (${found.detail}). The lookup itself failed, so the grade is unknown \u2014 retry or report it as unchecked, NOT as unevaluated.`
-        }
-      ]
-    };
-  }
-  let att = null;
-  if (found.kind === "found") {
-    try {
-      att = await readAttestation(found.uid);
-    } catch (err) {
-      return {
-        isError: true,
-        content: [
-          {
-            type: "text",
-            text: `lookup_failed \u2014 the onchain read failed for ${server_ref} (${err instanceof Error ? err.message : String(err)}). Treat as unchecked (the chain/RPC was unreachable), not as "no grade".`
-          }
-        ]
-      };
+function samplingJudge(server) {
+  return {
+    id: "mcp-sampling",
+    async complete(system, user) {
+      if (!clientSupportsSampling(server)) {
+        throw new Error("MCP client does not support sampling");
+      }
+      const res = await server.server.createMessage({
+        systemPrompt: system,
+        maxTokens: 1024,
+        messages: [{ role: "user", content: { type: "text", text: user } }]
+      });
+      return res.content.type === "text" ? res.content.text : "";
     }
-  }
-  if (!att) {
-    return {
-      content: [
-        {
-          type: "text",
-          text: `not_available \u2014 no published polygraph grade for ${server_ref}. Grade publishing is still rolling out, so this is expected for most servers; it means unevaluated (neither safe nor unsafe), not a failing grade. To grade it now, use run_litmus.`
-        }
-      ]
-    };
-  }
-  if (canonicalRef(att.serverRef) !== canonicalRef(server_ref)) {
-    return {
-      content: [
-        {
-          type: "text",
-          text: `not_available \u2014 the resolved attestation is for ${att.serverRef}, not ${server_ref} (discovery mismatch; treat as unevaluated)`
-        }
-      ]
-    };
-  }
-  const payload = {
-    status: "attested",
-    grade: att.overallGrade,
-    attestationUid: att.uid,
-    serverRef: att.serverRef,
-    // The version the grade was run against (null for HTTP/unresolved targets).
-    // Advisory: the live fingerprint, not this string, is the trust anchor.
-    resolvedVersion: att.resolvedVersion,
-    reportCID: att.reportCID,
-    toolDefsFingerprint: att.toolDefsFingerprint,
-    revoked: att.revoked,
-    network: selectedNetwork(),
-    liveFingerprintCheckRequired: "Recompute the live tool-surface fingerprint and require it to equal toolDefsFingerprint before paying."
   };
-  return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
-}
-async function resolveUid(serverRef) {
-  const base = process.env.POLYGRAPH_API_URL ?? "https://polygraph.so";
-  try {
-    const res = await fetch(`${base}/api/attestations?ref=${encodeURIComponent(serverRef)}`);
-    if (res.status === 404) return { kind: "none" };
-    if (!res.ok) return { kind: "error", detail: `grade index returned HTTP ${res.status}` };
-    const row = await res.json();
-    return row?.attestation_uid ? { kind: "found", uid: row.attestation_uid } : { kind: "none" };
-  } catch (err) {
-    return { kind: "error", detail: err instanceof Error ? err.message : String(err) };
-  }
 }
 // src/mcp.ts
 function buildServer() {
-  const server = new McpServer2(
+  const server = new McpServer(
     { name: "polygraph-litmus", version: "0.1.0" },
     {
       instructions: [
@@ -155,7 +75,12 @@ function buildServer() {
         "server, without running anything. Grade publishing is still rolling out, so",
         "it commonly returns not_available today \u2014 that means unevaluated (neither",
         "safe nor unsafe), not a failing grade; to grade the server yourself, use",
-        "`run_litmus`."
+        "`run_litmus`.",
+        "",
+        "Use `run_skill_litmus` to grade a Claude Code / Agent Skill (a SKILL.md +",
+        "bundle) A/B/D/F. This is a STATIC read of the skill's text and bundled files \u2014",
+        "no execution, no network \u2014 so it is fast but not behavioral proof. Pass",
+        "`skill_ref` as a local path to the skill directory."
       ].join("\n")
     }
   );
@@ -179,6 +104,30 @@ function buildServer() {
     },
     handleRunLitmus
   );
+  server.registerTool(
+    RUN_SKILL_LITMUS_TOOL_NAME,
+    {
+      title: RUN_SKILL_LITMUS_TOOL_TITLE,
+      description: RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
+      inputSchema: runSkillLitmusInputShape,
+      annotations: {
+        title: RUN_SKILL_LITMUS_TOOL_TITLE,
+        readOnlyHint: true,
+        // never mutates: the safety scan reads files; quality judging is host-mediated
+        destructiveHint: false,
+        idempotentHint: false,
+        // the optional LLM-judged quality axes are non-deterministic
+        openWorldHint: true
+        // the optional quality judge may use the host model (sampling) or a configured endpoint
+      }
+    },
+    // Resolve the judge per call (the client connection is known now): the host
+    // agent's model via sampling if it's offered, else an operator-set env key,
+    // else null ⇒ deterministic quality only. The litmus core never needs a key.
+    (args) => handleRunSkillLitmus(args, {
+      judge: clientSupportsSampling(server) ? samplingJudge(server) : judgeFromEnv()
+    })
+  );
   server.registerTool(
     VERIFY_TOOL_NAME,
     {
@@ -195,13 +144,30 @@ function buildServer() {
     },
     handleVerify
   );
+  server.registerTool(
+    VERIFY_SKILL_TOOL_NAME,
+    {
+      title: VERIFY_SKILL_TOOL_TITLE,
+      description: VERIFY_SKILL_TOOL_DESCRIPTION,
+      inputSchema: verifySkillInputShape,
+      annotations: {
+        title: VERIFY_SKILL_TOOL_TITLE,
+        readOnlyHint: true,
+        destructiveHint: false,
+        idempotentHint: true,
+        openWorldHint: true
+        // reads the grade index + chain
+      }
+    },
+    handleVerifySkill
+  );
   server.registerPrompt(
     "grade",
     {
       title: "Grade an MCP server",
       description: "Run the open behavioral litmus against an MCP server and report its grade A\u2013F with the evidence.",
       argsSchema: {
-        server_ref: z2.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
+        server_ref: z.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
       }
     },
     ({ server_ref }) => ({
@@ -222,7 +188,7 @@ function buildServer() {
       title: "Check a server's published grade",
       description: "Read a server's already-published polygraph grade without running anything.",
       argsSchema: {
-        server_ref: z2.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
+        server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
       }
     },
     ({ server_ref }) => ({
@@ -237,6 +203,48 @@ function buildServer() {
       ]
     })
   );
+  server.registerPrompt(
+    "grade-skill",
+    {
+      title: "Grade a Claude Code skill",
+      description: "Run the open static safety litmus over a skill (SKILL.md + bundle) and report its grade A/B/D/F with the evidence.",
+      argsSchema: {
+        skill_ref: z.string().min(1).max(1024).describe("Local path to a skill directory containing SKILL.md")
+      }
+    },
+    ({ skill_ref }) => ({
+      messages: [
+        {
+          role: "user",
+          content: {
+            type: "text",
+            text: `Run the polygraph skill litmus on ${skill_ref} using the run_skill_litmus tool. Report the letter grade, the one-line summary, any failed category with its findings, and the contentHash. State plainly that this is a static scan, not behavioral proof.`
+          }
+        }
+      ]
+    })
+  );
+  server.registerPrompt(
+    "check-skill",
+    {
+      title: "Check a skill's published grade",
+      description: "Read a skill's already-published polygraph grade without running anything.",
+      argsSchema: {
+        skill_ref: z.string().min(1).max(1024).describe("Skill identifier, e.g. github/<owner>/<repo>#<path> or marketplace/<owner>/<name>")
+      }
+    },
+    ({ skill_ref }) => ({
+      messages: [
+        {
+          role: "user",
+          content: {
+            type: "text",
+            text: `Use the verify_skill_attestation tool to read the published polygraph grade for ${skill_ref}. If it returns not_available, say the skill is unevaluated (neither safe nor unsafe) and offer to grade a local copy with run_skill_litmus. If a grade is returned, report it and remind the user to recompute the skill's contentHash before installing.`
+          }
+        }
+      ]
+    })
+  );
   return server;
 }
 async function main() {

package/dist/src-TMJOIVGB.js ADDED Viewed

@@ -0,0 +1,67 @@
+import {
+  SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_METHODOLOGY_VERSION,
+  SKILL_QUALITY_VERSION,
+  SkillLoadError,
+  assembleBundle,
+  canaryMatch,
+  classifyTool,
+  connectTarget,
+  dangerousCommand,
+  exfilInstruction,
+  fingerprintToolDefs,
+  gradeFromCategories,
+  gradeSkillCategories,
+  hasHighSeverity,
+  instructionMimicry,
+  internalsLeak,
+  invisibleUnicode,
+  judgeFromEnv,
+  judgeSkillQuality,
+  loadSkill,
+  markdownTricks,
+  openAICompatJudge,
+  overBroadTrigger,
+  runLitmus,
+  runSkillLitmus,
+  runSkillQuality,
+  runSkillQualityJudged,
+  skillInjection,
+  skillInjectionFails,
+  stateChangingToolNames,
+  stripExamples
+} from "./chunk-RYJXVMCT.js";
+import "./chunk-44R4ZYOE.js";
+export {
+  SKILL_BUNDLE_SCHEMA_VERSION,
+  SKILL_METHODOLOGY_VERSION,
+  SKILL_QUALITY_VERSION,
+  SkillLoadError,
+  assembleBundle,
+  canaryMatch,
+  classifyTool,
+  connectTarget,
+  dangerousCommand,
+  exfilInstruction,
+  fingerprintToolDefs,
+  gradeFromCategories,
+  gradeSkillCategories,
+  hasHighSeverity,
+  instructionMimicry,
+  internalsLeak,
+  invisibleUnicode,
+  judgeFromEnv,
+  judgeSkillQuality,
+  loadSkill,
+  markdownTricks,
+  openAICompatJudge,
+  overBroadTrigger,
+  runLitmus,
+  runSkillLitmus,
+  runSkillQuality,
+  runSkillQualityJudged,
+  skillInjection,
+  skillInjectionFails,
+  stateChangingToolNames,
+  stripExamples
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polygraphso/litmus",
-  "version": "0.8.1",
+  "version": "0.9.1",
   "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
   "license": "Apache-2.0",
   "homepage": "https://polygraph.so",
@@ -40,6 +40,7 @@
   },
   "bin": {
     "polygraphso-litmus": "dist/cli.js",
+    "polygraphso-litmus-skill": "dist/cli-skill.js",
     "polygraphso-litmus-mcp": "dist/mcp.js"
   },
   "files": [
@@ -65,8 +66,8 @@
     "@polygraph/onchain": "0.0.0",
     "@polygraph/agent": "0.0.0",
     "@polygraph/probes": "0.0.0",
-    "@polygraph/mcp": "0.0.0",
-    "@polygraph/cli": "0.0.0"
+    "@polygraph/cli": "0.0.0",
+    "@polygraph/mcp": "0.0.0"
   },
   "publishConfig": {
     "access": "public"