@polygraphso/litmus 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,16 +30,28 @@ and the grade is capped at **B** for that run.
30
30
  ```bash
31
31
  polygraphso-litmus litmus <registry-ref | https-url | path-to-mcp> # grade a server
32
32
  polygraphso-litmus litmus --json <ref> # machine-readable evidence bundle
33
+ polygraphso-litmus litmus --timeout <seconds> <ref> # cap the whole run (default 900s)
33
34
  polygraphso-litmus check <ref> # look up a published grade
34
35
  ```
35
36
 
36
37
  Examples:
37
38
 
38
39
  ```bash
39
- polygraphso-litmus litmus npm/@modelcontextprotocol/server-filesystem
40
+ # a remote https target runs no local code — graded directly
40
41
  polygraphso-litmus litmus https://example.com/mcp
42
+
43
+ # a registry ref or local file launches the TARGET's own code. Grade it sandboxed:
44
+ LITMUS_STDIO_ISOLATION=docker polygraphso-litmus litmus npm/@modelcontextprotocol/server-filesystem
45
+ # …or, without Docker, opt in to running it on this host:
46
+ polygraphso-litmus litmus --unsafe-host-exec npm/@modelcontextprotocol/server-filesystem
41
47
  ```
42
48
 
49
+ **Host-execution safety.** Grading a registry ref (`npm/…`, `pypi/…`) or a local
50
+ path **launches the target's own code**. By default the CLI refuses to do that on
51
+ your host: set `LITMUS_STDIO_ISOLATION=docker` to run the target only inside the
52
+ hardened sandbox, or pass `--unsafe-host-exec` to accept host execution. Remote
53
+ `https://` targets run no local code and need neither.
54
+
43
55
  The `litmus` command exits non-zero on a failing grade (D/F), so it scripts in CI.
44
56
 
45
57
  To dispute a published grade, just re-run `litmus` against the same server: the harness is
@@ -54,7 +66,9 @@ MCP-capable client. It exposes two tools:
54
66
  and return the grade and the evidence. Optional **`bearer`** (and `header`
55
67
  entries, each `"Key: Value"`) grade a token-gated `https://` MCP target — sent
56
68
  to that origin only, ignored for stdio/local targets, the same plumbing as the
57
- CLI's `--bearer` / `--header`.
69
+ CLI's `--bearer` / `--header`. Grading a registry ref or local path launches the
70
+ target's own code, so it requires **`unsafe_host_exec: true`** unless
71
+ `LITMUS_STDIO_ISOLATION=docker` is set (the MCP mirror of `--unsafe-host-exec`).
58
72
  - **`verify_attestation`** — passively read a server's *already-published* grade
59
73
  before trusting or paying it.
60
74
 
@@ -1536,12 +1536,14 @@ async function runEgressProbe(ref, opts) {
1536
1536
  if (staged) await staged.cleanup();
1537
1537
  }
1538
1538
  }
1539
+ async function exerciseSurface(client, exercise) {
1540
+ for (const t of await enumerateTools(client)) {
1541
+ await exercise({ name: t.name, description: t.description ?? "", inputSchema: t.inputSchema ?? null });
1542
+ }
1543
+ }
1539
1544
  async function collectEgress(conn, sink, declaredEgress, baselineAllowlist) {
1540
1545
  try {
1541
- const { tools } = await conn.client.listTools();
1542
- for (const t of tools) {
1543
- await exerciseTool(conn.client, { name: t.name, description: t.description ?? "", inputSchema: t.inputSchema ?? null });
1544
- }
1546
+ await exerciseSurface(conn.client, (def) => exerciseTool(conn.client, def));
1545
1547
  } finally {
1546
1548
  await conn.teardown();
1547
1549
  }
@@ -2626,6 +2628,7 @@ export {
2626
2628
  gradeFromCategories,
2627
2629
  assembleBundle,
2628
2630
  runLitmus,
2631
+ enumerateTools,
2629
2632
  SkillLoadError,
2630
2633
  loadSkill,
2631
2634
  stripExamples,
@@ -34,20 +34,29 @@ function truncate(s, n) {
34
34
  }
35
35
 
36
36
  // ../cli/src/litmus.ts
37
+ var DEFAULT_RUN_TIMEOUT_MS = 15 * 60 * 1e3;
37
38
  async function runLitmusCli(args) {
38
39
  const json = args.includes("--json");
39
- const { headers, allowStateChanging, positionals } = parseAuthFlags(args);
40
+ const { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals } = parseAuthFlags(args);
40
41
  const target = positionals[0];
41
42
  if (!target) {
42
43
  process.stderr.write(
43
- 'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] <registry-ref | https-url | path-to-mcp>\n'
44
+ 'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] [--unsafe-host-exec] [--timeout <seconds>] <registry-ref | https-url | path-to-mcp>\n'
44
45
  );
45
46
  return 2;
46
47
  }
47
- const { runLitmus } = await import("./src-TMJOIVGB.js");
48
48
  const input = resolveTarget(target);
49
+ const guard = checkHostExec(input, unsafeHostExec);
50
+ if (!guard.allow) {
51
+ process.stderr.write(`\u2192 litmus: ${guard.refuse}
52
+ `);
53
+ return 2;
54
+ }
55
+ if (guard.warn) process.stderr.write(`\u2192 ${guard.warn}
56
+ `);
57
+ const { runLitmus } = await import("./src-I6AGG4CJ.js");
49
58
  try {
50
- const bundle = await runLitmus(input, { headers, allowStateChanging });
59
+ const bundle = await runLitmus(input, { headers, allowStateChanging, timeoutMs });
51
60
  process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
52
61
  return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
53
62
  } catch (err) {
@@ -60,6 +69,8 @@ function parseAuthFlags(args, env = process.env) {
60
69
  const headers = {};
61
70
  const headerArgs = [];
62
71
  let allowStateChanging = false;
72
+ let unsafeHostExec = false;
73
+ let timeoutMs = DEFAULT_RUN_TIMEOUT_MS;
63
74
  let bearer = env.LITMUS_BEARER || void 0;
64
75
  const positionals = [];
65
76
  for (let i = 0; i < args.length; i++) {
@@ -67,6 +78,12 @@ function parseAuthFlags(args, env = process.env) {
67
78
  if (a === "--json") continue;
68
79
  if (a === "--allow-state-changing") {
69
80
  allowStateChanging = true;
81
+ } else if (a === "--unsafe-host-exec") {
82
+ unsafeHostExec = true;
83
+ } else if (a === "--timeout") {
84
+ timeoutMs = timeoutSecondsToMs(args[++i]) ?? timeoutMs;
85
+ } else if (a.startsWith("--timeout=")) {
86
+ timeoutMs = timeoutSecondsToMs(a.slice("--timeout=".length)) ?? timeoutMs;
70
87
  } else if (a === "--bearer") {
71
88
  bearer = args[++i] ?? bearer;
72
89
  } else if (a.startsWith("--bearer=")) {
@@ -89,7 +106,25 @@ function parseAuthFlags(args, env = process.env) {
89
106
  const value = h.slice(idx + 1).trim();
90
107
  if (key) headers[key] = value;
91
108
  }
92
- return { headers, allowStateChanging, positionals };
109
+ return { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals };
110
+ }
111
+ function timeoutSecondsToMs(v) {
112
+ if (!v) return void 0;
113
+ const sec = Number(v);
114
+ return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
115
+ }
116
+ function checkHostExec(input, optIn, optInHint = "--unsafe-host-exec", env = process.env) {
117
+ const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
118
+ const dockerIsolated = env.LITMUS_STDIO_ISOLATION === "docker";
119
+ if (!isStdio || dockerIsolated) return { allow: true };
120
+ const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
121
+ if (optIn) return { allow: true, warn: `\u26A0 unsafe host execution \u2014 ${why}.` };
122
+ return {
123
+ allow: false,
124
+ refuse: `refusing host execution \u2014 ${why}.
125
+ \u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
126
+ \u2022 accept the risk: re-run with ${optInHint}`
127
+ };
93
128
  }
94
129
  function resolveTarget(target) {
95
130
  if (/^https?:\/\//i.test(target)) return target;
@@ -112,7 +147,9 @@ function tsxCli() {
112
147
  }
113
148
 
114
149
  export {
150
+ DEFAULT_RUN_TIMEOUT_MS,
115
151
  runLitmusCli,
116
152
  parseAuthFlags,
153
+ checkHostExec,
117
154
  resolveTarget
118
155
  };
@@ -1,14 +1,16 @@
1
1
  import {
2
+ DEFAULT_RUN_TIMEOUT_MS,
3
+ checkHostExec,
2
4
  parseAuthFlags,
3
5
  resolveTarget
4
- } from "./chunk-BUKDFSDO.js";
6
+ } from "./chunk-GNPHHS6I.js";
5
7
  import {
6
8
  SKILL_METHODOLOGY_VERSION,
7
9
  runLitmus,
8
10
  runSkillLitmus,
9
11
  runSkillQuality,
10
12
  runSkillQualityJudged
11
- } from "./chunk-RYJXVMCT.js";
13
+ } from "./chunk-63OICX66.js";
12
14
  import {
13
15
  CATEGORY_STATUS_UINT8,
14
16
  METHODOLOGY_VERSION,
@@ -299,24 +301,32 @@ var RUN_LITMUS_TOOL_DESCRIPTION = [
299
301
  var runLitmusInputShape = {
300
302
  server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
301
303
  bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
302
- header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
304
+ header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.'),
305
+ unsafe_host_exec: z.boolean().optional().describe("Required to grade a registry ref or local path: it launches the target's own code, and without Docker isolation that runs on THIS host. Set true to accept host execution. Ignored for https:// targets or when LITMUS_STDIO_ISOLATION=docker."),
306
+ timeout_seconds: z.number().int().positive().max(3600).optional().describe("Aggregate wall-clock ceiling for the whole run, in seconds (default 900). Bounds a hostile server that stretches the run across many tools/probes.")
303
307
  };
304
308
  var PROGRESS_TOTAL = 5;
305
- async function handleRunLitmus({ server_ref, bearer, header }, extra) {
309
+ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }, extra) {
306
310
  try {
307
311
  const argv = [
308
312
  ...bearer ? ["--bearer", bearer] : [],
309
313
  ...(header ?? []).flatMap((h) => ["--header", h])
310
314
  ];
311
315
  const { headers } = parseAuthFlags(argv, {});
316
+ const input = resolveTarget(server_ref);
317
+ const guard = checkHostExec(input, unsafe_host_exec ?? false, 'set "unsafe_host_exec": true');
318
+ if (!guard.allow) {
319
+ return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${guard.refuse}` }] };
320
+ }
312
321
  const progressToken = extra._meta?.progressToken;
313
322
  const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
314
323
  method: "notifications/progress",
315
324
  params: { progressToken, progress, total: PROGRESS_TOTAL, message }
316
325
  }) : void 0;
317
326
  sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
318
- const bundle = await runLitmus(resolveTarget(server_ref), {
327
+ const bundle = await runLitmus(input, {
319
328
  ...Object.keys(headers).length ? { headers } : {},
329
+ timeoutMs: timeout_seconds ? timeout_seconds * 1e3 : DEFAULT_RUN_TIMEOUT_MS,
320
330
  ...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
321
331
  });
322
332
  const payload = summarize(bundle);
package/dist/cli-skill.js CHANGED
@@ -4,7 +4,7 @@ import {
4
4
  runSkillLitmus,
5
5
  runSkillQuality,
6
6
  runSkillQualityJudged
7
- } from "./chunk-RYJXVMCT.js";
7
+ } from "./chunk-63OICX66.js";
8
8
  import "./chunk-44R4ZYOE.js";
9
9
 
10
10
  // src/cli-skill.ts
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-BUKDFSDO.js";
4
+ } from "./chunk-GNPHHS6I.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
package/dist/index.d.ts CHANGED
@@ -323,6 +323,35 @@ interface RunLitmusOptions {
323
323
  onProgress?: (done: number, total: number, label: string) => void;
324
324
  }
325
325
  declare function runLitmus(target: TargetInput, opts?: RunLitmusOptions): Promise<EvidenceBundle>;
326
+ /** The fields of a `tools/list` entry the harness reads. */
327
+ interface ListedTool {
328
+ name: string;
329
+ description?: string;
330
+ inputSchema?: unknown;
331
+ annotations?: unknown;
332
+ }
333
+ interface ListToolsClient {
334
+ listTools(params?: {
335
+ cursor?: string;
336
+ }): Promise<{
337
+ tools?: ListedTool[];
338
+ nextCursor?: string;
339
+ }>;
340
+ }
341
+ /**
342
+ * Follow `tools/list` pagination to the end, accumulating the full tool surface.
343
+ * The MCP SDK's `listTools()` returns a single page and does not auto-paginate,
344
+ * so a server can park a tool (e.g. `transfer_funds`) or a poisoned description
345
+ * behind a `nextCursor` — invisible to a one-page lister, yet served to a real
346
+ * agent. We enumerate every page so the fingerprint and grade cover what the
347
+ * agent actually gets, and **fail closed**: if the server is still paginating
348
+ * past the gradable cap, we refuse rather than grade a partial surface.
349
+ */
350
+ declare function enumerateTools(client: ListToolsClient, opts?: {
351
+ maxTools?: number;
352
+ maxBytes?: number;
353
+ listTimeoutMs?: number;
354
+ }): Promise<ListedTool[]>;
326
355
 
327
356
  /**
328
357
  * Tool-surface fingerprint (litmus-test-v1 §6, technical-design §3).
@@ -961,23 +990,27 @@ declare const runLitmusInputShape: {
961
990
  server_ref: z.ZodString;
962
991
  bearer: z.ZodOptional<z.ZodString>;
963
992
  header: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
993
+ unsafe_host_exec: z.ZodOptional<z.ZodBoolean>;
994
+ timeout_seconds: z.ZodOptional<z.ZodNumber>;
964
995
  };
965
- declare function handleRunLitmus({ server_ref, bearer, header }: {
996
+ declare function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }: {
966
997
  server_ref: string;
967
998
  bearer?: string;
968
999
  header?: string[];
1000
+ unsafe_host_exec?: boolean;
1001
+ timeout_seconds?: number;
969
1002
  }, extra: RequestHandlerExtra<ServerRequest, ServerNotification>): Promise<{
1003
+ isError: true;
970
1004
  content: {
971
1005
  type: "text";
972
1006
  text: string;
973
1007
  }[];
974
- isError?: undefined;
975
1008
  } | {
976
- isError: true;
977
1009
  content: {
978
1010
  type: "text";
979
1011
  text: string;
980
1012
  }[];
1013
+ isError?: undefined;
981
1014
  }>;
982
1015
 
983
1016
  /**
@@ -1057,6 +1090,10 @@ interface ParsedLitmusFlags {
1057
1090
  headers: Record<string, string>;
1058
1091
  /** Whether to actively call state-changing tools (opt-in). */
1059
1092
  allowStateChanging: boolean;
1093
+ /** Opt-in to run a stdio target's code on the host without Docker isolation. */
1094
+ unsafeHostExec: boolean;
1095
+ /** Aggregate wall-clock ceiling (ms) — `--timeout <seconds>`, else the default. */
1096
+ timeoutMs: number;
1060
1097
  /** Non-flag arguments, in order (positionals[0] is the target). */
1061
1098
  positionals: string[];
1062
1099
  }
@@ -1072,4 +1109,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
1072
1109
  /** A target is an https URL, a local MCP entry file, or a registry ref. */
1073
1110
  declare function resolveTarget(target: string): string | StdioCommand;
1074
1111
 
1075
- export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
1112
+ export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
package/dist/index.js CHANGED
@@ -31,11 +31,11 @@ import {
31
31
  skillAttestationFields,
32
32
  skillSchemaUID,
33
33
  verifySkillInputShape
34
- } from "./chunk-Z66GKAQD.js";
34
+ } from "./chunk-VAOQNFW3.js";
35
35
  import {
36
36
  parseAuthFlags,
37
37
  resolveTarget
38
- } from "./chunk-BUKDFSDO.js";
38
+ } from "./chunk-GNPHHS6I.js";
39
39
  import {
40
40
  SKILL_BUNDLE_SCHEMA_VERSION,
41
41
  SKILL_METHODOLOGY_VERSION,
@@ -46,6 +46,7 @@ import {
46
46
  classifyTool,
47
47
  connectTarget,
48
48
  dangerousCommand,
49
+ enumerateTools,
49
50
  exfilInstruction,
50
51
  fingerprintToolDefs,
51
52
  gradeFromCategories,
@@ -68,7 +69,7 @@ import {
68
69
  skillInjectionFails,
69
70
  stateChangingToolNames,
70
71
  stripExamples
71
- } from "./chunk-RYJXVMCT.js";
72
+ } from "./chunk-63OICX66.js";
72
73
  import {
73
74
  BUNDLE_SCHEMA_VERSION,
74
75
  CATEGORY_STATUS_UINT8,
@@ -112,16 +113,18 @@ function gateDecision(attestation, live, passing = DEFAULT_PASSING, now = BigInt
112
113
  const versionNote = attestation.resolvedVersion ? ` (graded version ${attestation.resolvedVersion})` : "";
113
114
  return { action: "pay", reason: `grade ${attestation.overallGrade}; live fingerprint matches${versionNote}` };
114
115
  }
116
+ async function fingerprintLiveSurface(client) {
117
+ const defs = (await enumerateTools(client)).map((t) => ({
118
+ name: t.name,
119
+ description: t.description ?? "",
120
+ inputSchema: t.inputSchema ?? null
121
+ }));
122
+ return fingerprintToolDefs(defs).fingerprint;
123
+ }
115
124
  async function liveFingerprint(target) {
116
125
  const conn = await connectTarget(target);
117
126
  try {
118
- const { tools } = await conn.client.listTools();
119
- const defs = (tools ?? []).map((t) => ({
120
- name: t.name,
121
- description: t.description ?? "",
122
- inputSchema: t.inputSchema ?? null
123
- }));
124
- return { fingerprint: fingerprintToolDefs(defs).fingerprint, serverRef: conn.serverRef };
127
+ return { fingerprint: await fingerprintLiveSurface(conn.client), serverRef: conn.serverRef };
125
128
  } finally {
126
129
  await conn.teardown();
127
130
  }
@@ -160,6 +163,7 @@ export {
160
163
  encodeLitmusAttestation,
161
164
  encodeSkillAttestation,
162
165
  encodeSkillAttestationFields,
166
+ enumerateTools,
163
167
  exfilInstruction,
164
168
  fingerprintToolDefs,
165
169
  formatServerRef,
package/dist/mcp.js CHANGED
@@ -20,11 +20,11 @@ import {
20
20
  runSkillLitmusInputShape,
21
21
  verifyInputShape,
22
22
  verifySkillInputShape
23
- } from "./chunk-Z66GKAQD.js";
24
- import "./chunk-BUKDFSDO.js";
23
+ } from "./chunk-VAOQNFW3.js";
24
+ import "./chunk-GNPHHS6I.js";
25
25
  import {
26
26
  judgeFromEnv
27
- } from "./chunk-RYJXVMCT.js";
27
+ } from "./chunk-63OICX66.js";
28
28
  import "./chunk-44R4ZYOE.js";
29
29
 
30
30
  // src/mcp.ts
@@ -8,6 +8,7 @@ import {
8
8
  classifyTool,
9
9
  connectTarget,
10
10
  dangerousCommand,
11
+ enumerateTools,
11
12
  exfilInstruction,
12
13
  fingerprintToolDefs,
13
14
  gradeFromCategories,
@@ -30,7 +31,7 @@ import {
30
31
  skillInjectionFails,
31
32
  stateChangingToolNames,
32
33
  stripExamples
33
- } from "./chunk-RYJXVMCT.js";
34
+ } from "./chunk-63OICX66.js";
34
35
  import "./chunk-44R4ZYOE.js";
35
36
  export {
36
37
  SKILL_BUNDLE_SCHEMA_VERSION,
@@ -42,6 +43,7 @@ export {
42
43
  classifyTool,
43
44
  connectTarget,
44
45
  dangerousCommand,
46
+ enumerateTools,
45
47
  exfilInstruction,
46
48
  fingerprintToolDefs,
47
49
  gradeFromCategories,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.9.1",
3
+ "version": "0.10.0",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -62,12 +62,12 @@
62
62
  "tsup": "^8.3.0",
63
63
  "typescript": "^5.9.3",
64
64
  "vitest": "^2.1.0",
65
- "@polygraph/core": "0.0.0",
66
65
  "@polygraph/onchain": "0.0.0",
67
- "@polygraph/agent": "0.0.0",
66
+ "@polygraph/core": "0.0.0",
68
67
  "@polygraph/probes": "0.0.0",
69
- "@polygraph/cli": "0.0.0",
70
- "@polygraph/mcp": "0.0.0"
68
+ "@polygraph/agent": "0.0.0",
69
+ "@polygraph/mcp": "0.0.0",
70
+ "@polygraph/cli": "0.0.0"
71
71
  },
72
72
  "publishConfig": {
73
73
  "access": "public"