@polygraphso/litmus 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,14 +50,28 @@ open and deterministic, so a re-run reproduces the grade — or refutes it.
50
50
  The package ships a stdio MCP server, `polygraphso-litmus-mcp`, so it works in any
51
51
  MCP-capable client. It exposes two tools:
52
52
 
53
- - **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end),
54
- and return the grade and the evidence.
53
+ - **`run_litmus`** — actively grade a server *now* (runs the harness end-to-end)
54
+ and return the grade and the evidence. Optional **`bearer`** (and `header`
55
+ entries, each `"Key: Value"`) grade a token-gated `https://` MCP target — sent
56
+ to that origin only, ignored for stdio/local targets, the same plumbing as the
57
+ CLI's `--bearer` / `--header`.
55
58
  - **`verify_attestation`** — passively read a server's *already-published* grade
56
59
  before trusting or paying it.
57
60
 
61
+ It also registers two **prompts** that show up as slash commands — in Claude Code,
62
+ `/mcp__polygraph-litmus__grade <server_ref>` (run a fresh grade) and
63
+ `/mcp__polygraph-litmus__check <server_ref>` (read a published grade); other
64
+ clients surface the same prompts in their own UI. (Want a bare `/polygraph` in
65
+ Claude Code? Drop a `.claude/commands/polygraph.md` that calls `run_litmus` — a
66
+ Claude-Code-only convenience, not shipped here.)
67
+
58
68
  **Prerequisites:** Node ≥ 18. Docker is optional (without it, C-02 egress is
59
69
  skipped and the grade caps at B). Set `POLYGRAPH_API_URL=https://polygraph.so` so
60
- `verify_attestation` can resolve a server's published grade.
70
+ `verify_attestation` can look up published grades.
71
+
72
+ > **Heads-up:** grade *publishing* is still rolling out, so `verify_attestation`
73
+ > commonly returns `not_available` today — that means *unevaluated*, not a failing
74
+ > grade. To grade a server right now, use `run_litmus`.
61
75
 
62
76
  Add the server once, then just talk to your agent.
63
77
 
@@ -99,6 +113,31 @@ that's already published.
99
113
  `run_litmus` launches the target server's code to exercise it (egress-sandboxed
100
114
  when Docker is present). It needs no wallet or RPC.
101
115
 
116
+ ### ChatGPT and other remote clients
117
+
118
+ ChatGPT's MCP support expects a remote **Streamable-HTTP** server; this package is
119
+ **stdio-only**, so you can't point ChatGPT at it directly. If you self-host, bridge
120
+ stdio over HTTP yourself — e.g.
121
+
122
+ ```bash
123
+ npx -y supergateway --stdio "npx -y -p @polygraphso/litmus polygraphso-litmus-mcp" --port 8000
124
+ ```
125
+
126
+ (or [`mcp-proxy`](https://github.com/sparfenyuk/mcp-proxy)) — then point your client
127
+ at that endpoint. polygraph does not host this for you; the bridge runs on your own
128
+ machine.
129
+
130
+ ### Troubleshooting
131
+
132
+ - **Two bins / `npx`:** `npx` needs `-p @polygraphso/litmus` *plus* the bin name
133
+ (`polygraphso-litmus` or `polygraphso-litmus-mcp`); plain `npx @polygraphso/litmus`
134
+ can't choose which to run. Installed globally? Use the bin name directly, no `-p`.
135
+ - **Docker optional:** without Docker, C-02 (egress) is skipped and the grade caps
136
+ at **B** — the C-02 row reads `skipped` with reason `no sandbox (Docker
137
+ unavailable)`. Not a failure, just unverified.
138
+ - **`verify_attestation` says `lookup_failed`:** the grade index or RPC was
139
+ unreachable — that's *unknown*, not *no grade*. Retry; check `POLYGRAPH_API_URL`.
140
+
102
141
  ## Library
103
142
 
104
143
  ```ts
@@ -2003,6 +2003,7 @@ function assembleBundle(input) {
2003
2003
  }
2004
2004
 
2005
2005
  // ../probes/src/harness.ts
2006
+ var PROGRESS_STEPS = 5;
2006
2007
  async function runLitmus(target, opts = {}) {
2007
2008
  const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
2008
2009
  const ranAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -2025,6 +2026,7 @@ async function runLitmus(target, opts = {}) {
2025
2026
  });
2026
2027
  try {
2027
2028
  const runProbes = async () => {
2029
+ const step = (done, label) => opts.onProgress?.(done, PROGRESS_STEPS, label);
2028
2030
  const listed = await enumerateTools(conn.client);
2029
2031
  const tools = listed.map((t) => ({
2030
2032
  name: t.name,
@@ -2033,6 +2035,7 @@ async function runLitmus(target, opts = {}) {
2033
2035
  }));
2034
2036
  assertGradableSurface(tools);
2035
2037
  const { fingerprint, canonical } = fingerprintToolDefs(tools);
2038
+ step(1, "fingerprinted tool surface");
2036
2039
  const annotated = listed.map((t) => ({
2037
2040
  name: t.name,
2038
2041
  description: t.description ?? "",
@@ -2056,14 +2059,15 @@ async function runLitmus(target, opts = {}) {
2056
2059
  baselineAllowlist: []
2057
2060
  };
2058
2061
  assertEgressRanUnderIsolation(egress, isolation, isStdio);
2059
- const categories = [
2060
- await c01Injection(ctx),
2061
- c02Permission(probe21Declaration(annotated), egress),
2062
- await c03Sensitive(ctx, egress),
2063
- // C-04 runs LAST: its malformed/oversized inputs may crash the server, so
2064
- // it must not run before the other probes have used the live connection.
2065
- await c04Adversarial(ctx)
2066
- ];
2062
+ const c01 = await c01Injection(ctx);
2063
+ step(2, "C-01 tool-output injection");
2064
+ const c02 = c02Permission(probe21Declaration(annotated), egress);
2065
+ step(3, "C-02 permission / egress");
2066
+ const c03 = await c03Sensitive(ctx, egress);
2067
+ step(4, "C-03 sensitive-data handling");
2068
+ const c04 = await c04Adversarial(ctx);
2069
+ step(5, "C-04 adversarial-input handling");
2070
+ const categories = [c01, c02, c03, c04];
2067
2071
  const grade = gradeFromCategories(categories);
2068
2072
  return assembleBundle({
2069
2073
  serverRef: conn.serverRef,
@@ -1,9 +1,10 @@
1
1
  import {
2
+ parseAuthFlags,
2
3
  resolveTarget
3
- } from "./chunk-RAZNXIE5.js";
4
+ } from "./chunk-VOPISHBU.js";
4
5
  import {
5
6
  runLitmus
6
- } from "./chunk-EWLIQPXF.js";
7
+ } from "./chunk-35UOPCBW.js";
7
8
  import {
8
9
  CATEGORY_STATUS_UINT8,
9
10
  METHODOLOGY_VERSION
@@ -124,27 +125,46 @@ import { z } from "zod";
124
125
  var RUN_LITMUS_TOOL_NAME = "run_litmus";
125
126
  var RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server";
126
127
  var RUN_LITMUS_TOOL_DESCRIPTION = [
127
- `Run the open behavioral litmus (${METHODOLOGY_VERSION}) against an MCP server and return`,
128
- "its grade. The harness connects like an agent would, fingerprints the tool",
129
- "surface, and runs three probe categories: C-01 tool-output injection, C-02",
130
- "permission overreach (egress in a hardened default-deny Docker sandbox, plus a",
131
- "declared-permission honesty check), and C-03 sensitive-data handling (planted",
132
- "canaries). It grades A\u2013F.",
128
+ `Grade an MCP server A\u2013F against the open behavioral litmus (${METHODOLOGY_VERSION}).`,
129
+ "The harness connects the way an agent would, fingerprints the tool surface, and",
130
+ "runs four checks: C-01 tool-output injection, C-02 permission/egress overreach",
131
+ "(egress in a hardened default-deny Docker sandbox, plus a declared-permission",
132
+ "honesty check), C-03 sensitive-data handling (planted canaries), and C-04",
133
+ "adversarial-input handling (malformed/oversized and jailbreak inputs).",
133
134
  "",
134
- "This is ACTIVE: it launches the target server's code to exercise it (sandboxed",
135
- "for egress when Docker is available). It is not a passive lookup \u2014 for that,",
136
- "use `verify_attestation`. It needs no wallet or RPC.",
135
+ "This is ACTIVE: it launches the target server's code to exercise it (egress-",
136
+ "sandboxed when Docker is available) and takes ~20\u201360s. It is not a lookup \u2014 for",
137
+ "a server's already-published grade, use `verify_attestation`. No wallet or RPC",
138
+ "needed.",
137
139
  "",
138
- "Input: server_ref \u2014 a registry ref (npm/@scope/server), an https:// MCP URL,",
139
- "or a local path to an MCP entry file. If Docker is unavailable, C-02 is",
140
- "skipped and the grade is capped at B for that run."
140
+ "server_ref examples: npm/@modelcontextprotocol/server-filesystem \xB7",
141
+ "https://example.com/mcp \xB7 ./build/index.js. For a token-gated https:// target,",
142
+ "pass `bearer`. If Docker is unavailable, C-02 is skipped and the grade is capped",
143
+ "at B for that run."
141
144
  ].join("\n");
142
145
  var runLitmusInputShape = {
143
- server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file.")
146
+ server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
147
+ bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
148
+ header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
144
149
  };
145
- async function handleRunLitmus({ server_ref }) {
150
+ var PROGRESS_TOTAL = 5;
151
+ async function handleRunLitmus({ server_ref, bearer, header }, extra) {
146
152
  try {
147
- const bundle = await runLitmus(resolveTarget(server_ref));
153
+ const argv = [
154
+ ...bearer ? ["--bearer", bearer] : [],
155
+ ...(header ?? []).flatMap((h) => ["--header", h])
156
+ ];
157
+ const { headers } = parseAuthFlags(argv, {});
158
+ const progressToken = extra._meta?.progressToken;
159
+ const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
160
+ method: "notifications/progress",
161
+ params: { progressToken, progress, total: PROGRESS_TOTAL, message }
162
+ }) : void 0;
163
+ sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
164
+ const bundle = await runLitmus(resolveTarget(server_ref), {
165
+ ...Object.keys(headers).length ? { headers } : {},
166
+ ...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
167
+ });
148
168
  const payload = summarize(bundle);
149
169
  return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
150
170
  } catch (err) {
@@ -152,24 +172,28 @@ async function handleRunLitmus({ server_ref }) {
152
172
  return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
153
173
  }
154
174
  }
175
+ var CATEGORY_LABEL = {
176
+ "C-01": "tool-output injection",
177
+ "C-02": "permission / egress overreach",
178
+ "C-03": "sensitive-data handling",
179
+ "C-04": "adversarial-input handling"
180
+ };
155
181
  function summarize(b) {
156
182
  const find = (code) => b.categories.find((c) => c.code === code);
157
183
  const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
158
184
  const c = find(code);
159
185
  const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
160
- return { code, status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
186
+ return { code, check: CATEGORY_LABEL[code], status: c?.status ?? "unknown", reason: c?.reason ?? null, findings };
161
187
  });
162
- const dockerSkipped = !b.harness.dockerAvailable || find("C-02")?.status === "skipped";
163
188
  return {
164
189
  grade: b.grade,
165
- gradeRationale: b.gradeRationale,
166
- fingerprint: b.toolDefsFingerprint,
190
+ summary: b.gradeRationale,
167
191
  serverRef: b.serverRef,
168
192
  resolvedVersion: b.resolvedVersion,
193
+ fingerprint: b.toolDefsFingerprint,
169
194
  ranAt: b.ranAt,
170
195
  methodologyVersion: b.methodologyVersion,
171
- categories,
172
- ...dockerSkipped ? { dockerSkipped: "C-02 (egress) was not run because Docker was unavailable; the grade is capped at B for this run." } : {}
196
+ categories
173
197
  };
174
198
  }
175
199
  function truncate(s, n) {
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
44
44
  );
45
45
  return 2;
46
46
  }
47
- const { runLitmus } = await import("./src-GJ2L6B7K.js");
47
+ const { runLitmus } = await import("./src-RSTPCEYU.js");
48
48
  const input = resolveTarget(target);
49
49
  try {
50
50
  const bundle = await runLitmus(input, { headers, allowStateChanging });
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-RAZNXIE5.js";
4
+ } from "./chunk-VOPISHBU.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
@@ -104,7 +104,7 @@ examples:
104
104
  polygraphso-litmus litmus npm/@modelcontextprotocol/server-filesystem
105
105
  polygraphso-litmus litmus --json npm/@modelcontextprotocol/server-filesystem
106
106
 
107
- Set POLYGRAPH_API_URL to pin the evidence and get a mint hand-off link.
107
+ Set POLYGRAPH_API_URL so check/list can look up a server's published grade.
108
108
  More at https://polygraph.so
109
109
  `;
110
110
  function readVersion() {
package/dist/index.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
2
2
  import { z } from 'zod';
3
+ import { RequestHandlerExtra } from '@modelcontextprotocol/sdk/shared/protocol.js';
4
+ import { ServerRequest, ServerNotification } from '@modelcontextprotocol/sdk/types.js';
3
5
 
4
6
  /**
5
7
  * Shared contract types for the litmus MVP. Web3-free.
@@ -272,6 +274,14 @@ interface RunLitmusOptions {
272
274
  * the `finally` tears the connection down, settling any in-flight calls.
273
275
  */
274
276
  timeoutMs?: number;
277
+ /**
278
+ * Optional progress callback, fired once per probe phase as the run proceeds:
279
+ * `(done, total, label)` are step counts plus a short human phase name. Purely
280
+ * observational — it never affects the grade or the bundle. The MCP server
281
+ * forwards these as `notifications/progress` so a ~20–60s run isn't a frozen
282
+ * tool call.
283
+ */
284
+ onProgress?: (done: number, total: number, label: string) => void;
275
285
  }
276
286
  declare function runLitmus(target: TargetInput, opts?: RunLitmusOptions): Promise<EvidenceBundle>;
277
287
 
@@ -573,10 +583,14 @@ declare const RUN_LITMUS_TOOL_TITLE = "Run a behavioral litmus on an MCP server"
573
583
  declare const RUN_LITMUS_TOOL_DESCRIPTION: string;
574
584
  declare const runLitmusInputShape: {
575
585
  server_ref: z.ZodString;
586
+ bearer: z.ZodOptional<z.ZodString>;
587
+ header: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
576
588
  };
577
- declare function handleRunLitmus({ server_ref }: {
589
+ declare function handleRunLitmus({ server_ref, bearer, header }: {
578
590
  server_ref: string;
579
- }): Promise<{
591
+ bearer?: string;
592
+ header?: string[];
593
+ }, extra: RequestHandlerExtra<ServerRequest, ServerNotification>): Promise<{
580
594
  content: {
581
595
  type: "text";
582
596
  text: string;
package/dist/index.js CHANGED
@@ -14,11 +14,11 @@ import {
14
14
  rpcUrl,
15
15
  runLitmusInputShape,
16
16
  selectedNetwork
17
- } from "./chunk-GJ7M7C46.js";
17
+ } from "./chunk-LBXHFQN3.js";
18
18
  import {
19
19
  parseAuthFlags,
20
20
  resolveTarget
21
- } from "./chunk-RAZNXIE5.js";
21
+ } from "./chunk-VOPISHBU.js";
22
22
  import {
23
23
  assembleBundle,
24
24
  canaryMatch,
@@ -33,7 +33,7 @@ import {
33
33
  markdownTricks,
34
34
  runLitmus,
35
35
  stateChangingToolNames
36
- } from "./chunk-EWLIQPXF.js";
36
+ } from "./chunk-35UOPCBW.js";
37
37
  import {
38
38
  BUNDLE_SCHEMA_VERSION,
39
39
  CATEGORY_STATUS_UINT8,
package/dist/mcp.d.ts CHANGED
@@ -3,10 +3,11 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
3
 
4
4
  /**
5
5
  * `polygraphso-litmus-mcp` — the polygraph litmus MCP server. Stdio transport.
6
- * Exposes two tools to any MCP client (Claude Desktop, Cursor, …):
6
+ * Exposes to any MCP client (Claude Desktop, Cursor, …):
7
7
  *
8
- * • `run_litmus` — actively grade an MCP server A–F, then hand off to mint.
8
+ * • `run_litmus` — actively grade an MCP server A–F against the open harness.
9
9
  * • `verify_attestation` — passively read a server's published onchain grade.
10
+ * • prompts `grade` / `check` — one-line slash-command entry points to the two tools.
10
11
  *
11
12
  * Also exported as `@polygraphso/litmus/mcp` for embedding in a custom server.
12
13
  */
package/dist/mcp.js CHANGED
@@ -7,9 +7,9 @@ import {
7
7
  readAttestation,
8
8
  runLitmusInputShape,
9
9
  selectedNetwork
10
- } from "./chunk-GJ7M7C46.js";
11
- import "./chunk-RAZNXIE5.js";
12
- import "./chunk-EWLIQPXF.js";
10
+ } from "./chunk-LBXHFQN3.js";
11
+ import "./chunk-VOPISHBU.js";
12
+ import "./chunk-35UOPCBW.js";
13
13
  import {
14
14
  parseServerRef,
15
15
  serverKey
@@ -20,6 +20,7 @@ import { realpathSync } from "fs";
20
20
  import { fileURLToPath } from "url";
21
21
  import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
22
22
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
23
+ import { z as z2 } from "zod";
23
24
 
24
25
  // ../mcp/src/index.ts
25
26
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -36,28 +37,63 @@ function canonicalRef(ref) {
36
37
  var VERIFY_TOOL_NAME = "verify_attestation";
37
38
  var VERIFY_TOOL_TITLE = "Verify a server's polygraph attestation";
38
39
  var VERIFY_TOOL_DESCRIPTION = [
39
- "Read the onchain polygraph (litmus) attestation for an MCP server before an",
40
- "agent trusts \u2014 or, in agentic commerce, pays \u2014 it.",
40
+ "Read a server's already-published polygraph (litmus) grade \u2014 without running",
41
+ "anything \u2014 before an agent trusts or, in agentic commerce, pays it.",
41
42
  "",
42
- "Returns the behavioral grade (A\u2013F), the attestation UID, the evidence CID,",
43
- "and the graded tool-surface fingerprint. The caller must still recompute the",
44
- "LIVE fingerprint and require it to equal the attested one before paying \u2014 a",
45
- "passing attestation can otherwise front for a tool surface the server no",
46
- "longer serves (rug pull).",
43
+ "When a grade is published it returns the behavioral grade (A\u2013F), the attestation",
44
+ "UID, the evidence CID, and the graded tool-surface fingerprint. The caller must",
45
+ "still recompute the LIVE fingerprint and require it to equal the attested one",
46
+ "before paying \u2014 a passing attestation can otherwise front for a tool surface the",
47
+ "server no longer serves (rug pull).",
47
48
  "",
48
- "Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem. Returns",
49
- "not_available when there is no attestation: treat that as unevaluated \u2014",
50
- "neither safe nor unsafe."
49
+ "Grade publishing is still rolling out, so this commonly returns not_available",
50
+ "today: that means UNEVALUATED (neither safe nor unsafe), not a failing grade \u2014 to",
51
+ "grade the server yourself right now, use `run_litmus`. A `lookup_failed` result",
52
+ "means the lookup itself failed (the index or chain was unreachable); the grade is",
53
+ "unknown, which is not the same as unevaluated.",
54
+ "",
55
+ "Input: server_ref \u2014 e.g. npm/@modelcontextprotocol/server-filesystem."
51
56
  ].join("\n");
52
57
  var verifyInputShape = {
53
58
  server_ref: z.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server.")
54
59
  };
55
60
  async function handleVerify({ server_ref }) {
56
- const uid = await resolveUid(server_ref);
57
- const att = uid ? await readAttestation(uid) : null;
61
+ const found = await resolveUid(server_ref);
62
+ if (found.kind === "error") {
63
+ return {
64
+ isError: true,
65
+ content: [
66
+ {
67
+ type: "text",
68
+ text: `lookup_failed \u2014 could not reach the polygraph grade index for ${server_ref} (${found.detail}). The lookup itself failed, so the grade is unknown \u2014 retry or report it as unchecked, NOT as unevaluated.`
69
+ }
70
+ ]
71
+ };
72
+ }
73
+ let att = null;
74
+ if (found.kind === "found") {
75
+ try {
76
+ att = await readAttestation(found.uid);
77
+ } catch (err) {
78
+ return {
79
+ isError: true,
80
+ content: [
81
+ {
82
+ type: "text",
83
+ text: `lookup_failed \u2014 the onchain read failed for ${server_ref} (${err instanceof Error ? err.message : String(err)}). Treat as unchecked (the chain/RPC was unreachable), not as "no grade".`
84
+ }
85
+ ]
86
+ };
87
+ }
88
+ }
58
89
  if (!att) {
59
90
  return {
60
- content: [{ type: "text", text: `not_available \u2014 no polygraph attestation for ${server_ref}` }]
91
+ content: [
92
+ {
93
+ type: "text",
94
+ text: `not_available \u2014 no published polygraph grade for ${server_ref}. Grade publishing is still rolling out, so this is expected for most servers; it means unevaluated (neither safe nor unsafe), not a failing grade. To grade it now, use run_litmus.`
95
+ }
96
+ ]
61
97
  };
62
98
  }
63
99
  if (canonicalRef(att.serverRef) !== canonicalRef(server_ref)) {
@@ -90,11 +126,12 @@ async function resolveUid(serverRef) {
90
126
  const base = process.env.POLYGRAPH_API_URL ?? "https://polygraph.so";
91
127
  try {
92
128
  const res = await fetch(`${base}/api/attestations?ref=${encodeURIComponent(serverRef)}`);
93
- if (!res.ok) return null;
129
+ if (res.status === 404) return { kind: "none" };
130
+ if (!res.ok) return { kind: "error", detail: `grade index returned HTTP ${res.status}` };
94
131
  const row = await res.json();
95
- return row?.attestation_uid ?? null;
96
- } catch {
97
- return null;
132
+ return row?.attestation_uid ? { kind: "found", uid: row.attestation_uid } : { kind: "none" };
133
+ } catch (err) {
134
+ return { kind: "error", detail: err instanceof Error ? err.message : String(err) };
98
135
  }
99
136
  }
100
137
 
@@ -104,17 +141,21 @@ function buildServer() {
104
141
  { name: "polygraph-litmus", version: "0.1.0" },
105
142
  {
106
143
  instructions: [
107
- "polygraph issues behavioral litmus grades (A\u2013F) for MCP servers.",
144
+ "polygraph runs an open behavioral test on an MCP server and reports a",
145
+ "letter grade A\u2013F, with the evidence behind it.",
108
146
  "",
109
- "Use `run_litmus` to grade a server now: it runs the open harness against",
110
- "the target and returns the grade, the evidence, and \u2014 when configured \u2014 a",
111
- "mint hand-off URL the human opens to publish the grade onchain. It launches",
112
- "the target's code (egress-sandboxed when Docker is present), so it is not a",
113
- "passive read.",
147
+ "Use `run_litmus` to grade a server now. It connects the way an agent would",
148
+ "and exercises the target \u2014 so it runs the target's code (egress-sandboxed",
149
+ "when Docker is present), not a passive read; ~20\u201360s. No wallet or RPC",
150
+ "needed. Pass `server_ref` as an npm ref (npm/@scope/server), an https:// MCP",
151
+ "URL, or a local path to an MCP entry file; pass `bearer` for a token-gated",
152
+ "https target.",
114
153
  "",
115
- "Use `verify_attestation` to read a server's already-published grade before",
116
- "recommending, installing, or paying it. A server with no attestation is",
117
- "unevaluated \u2014 neither safe nor unsafe; say so."
154
+ "Use `verify_attestation` to read a grade that was already published for a",
155
+ "server, without running anything. Grade publishing is still rolling out, so",
156
+ "it commonly returns not_available today \u2014 that means unevaluated (neither",
157
+ "safe nor unsafe), not a failing grade; to grade the server yourself, use",
158
+ "`run_litmus`."
118
159
  ].join("\n")
119
160
  }
120
161
  );
@@ -154,6 +195,48 @@ function buildServer() {
154
195
  },
155
196
  handleVerify
156
197
  );
198
+ server.registerPrompt(
199
+ "grade",
200
+ {
201
+ title: "Grade an MCP server",
202
+ description: "Run the open behavioral litmus against an MCP server and report its grade A\u2013F with the evidence.",
203
+ argsSchema: {
204
+ server_ref: z2.string().min(1).max(512).describe("npm/@scope/server, an https:// MCP URL, or a local path to an MCP entry file")
205
+ }
206
+ },
207
+ ({ server_ref }) => ({
208
+ messages: [
209
+ {
210
+ role: "user",
211
+ content: {
212
+ type: "text",
213
+ text: `Run the polygraph litmus on ${server_ref} using the run_litmus tool. Report the letter grade, the one-line summary, and any failed category with its findings. If the grade is capped at B because Docker was unavailable, say so plainly.`
214
+ }
215
+ }
216
+ ]
217
+ })
218
+ );
219
+ server.registerPrompt(
220
+ "check",
221
+ {
222
+ title: "Check a server's published grade",
223
+ description: "Read a server's already-published polygraph grade without running anything.",
224
+ argsSchema: {
225
+ server_ref: z2.string().min(1).max(512).describe("Registry-prefixed server identifier, e.g. npm/@scope/server")
226
+ }
227
+ },
228
+ ({ server_ref }) => ({
229
+ messages: [
230
+ {
231
+ role: "user",
232
+ content: {
233
+ type: "text",
234
+ text: `Use the verify_attestation tool to read the published polygraph grade for ${server_ref}. If it returns not_available, say the server is unevaluated (neither safe nor unsafe) and offer to run a live grade with run_litmus. If it returns lookup_failed, say the lookup itself failed so the grade is unknown \u2014 do not call it unevaluated.`
235
+ }
236
+ }
237
+ ]
238
+ })
239
+ );
157
240
  return server;
158
241
  }
159
242
  async function main() {
@@ -12,7 +12,7 @@ import {
12
12
  markdownTricks,
13
13
  runLitmus,
14
14
  stateChangingToolNames
15
- } from "./chunk-EWLIQPXF.js";
15
+ } from "./chunk-35UOPCBW.js";
16
16
  import "./chunk-ZR6XRGMQ.js";
17
17
  export {
18
18
  assembleBundle,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -62,12 +62,12 @@
62
62
  "tsup": "^8.3.0",
63
63
  "typescript": "^5.9.3",
64
64
  "vitest": "^2.1.0",
65
- "@polygraph/probes": "0.0.0",
66
65
  "@polygraph/core": "0.0.0",
66
+ "@polygraph/probes": "0.0.0",
67
67
  "@polygraph/onchain": "0.0.0",
68
- "@polygraph/agent": "0.0.0",
68
+ "@polygraph/mcp": "0.0.0",
69
69
  "@polygraph/cli": "0.0.0",
70
- "@polygraph/mcp": "0.0.0"
70
+ "@polygraph/agent": "0.0.0"
71
71
  },
72
72
  "publishConfig": {
73
73
  "access": "public"