@polygraphso/litmus 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -2
- package/dist/{chunk-RYJXVMCT.js → chunk-63OICX66.js} +7 -4
- package/dist/{chunk-BUKDFSDO.js → chunk-GNPHHS6I.js} +42 -5
- package/dist/{chunk-Z66GKAQD.js → chunk-VAOQNFW3.js} +15 -5
- package/dist/cli-skill.js +1 -1
- package/dist/cli.js +1 -1
- package/dist/index.d.ts +41 -4
- package/dist/index.js +14 -10
- package/dist/mcp.js +3 -3
- package/dist/{src-TMJOIVGB.js → src-I6AGG4CJ.js} +3 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -30,16 +30,28 @@ and the grade is capped at **B** for that run.
|
|
|
30
30
|
```bash
|
|
31
31
|
polygraphso-litmus litmus <registry-ref | https-url | path-to-mcp> # grade a server
|
|
32
32
|
polygraphso-litmus litmus --json <ref> # machine-readable evidence bundle
|
|
33
|
+
polygraphso-litmus litmus --timeout <seconds> <ref> # cap the whole run (default 900s)
|
|
33
34
|
polygraphso-litmus check <ref> # look up a published grade
|
|
34
35
|
```
|
|
35
36
|
|
|
36
37
|
Examples:
|
|
37
38
|
|
|
38
39
|
```bash
|
|
39
|
-
|
|
40
|
+
# a remote https target runs no local code — graded directly
|
|
40
41
|
polygraphso-litmus litmus https://example.com/mcp
|
|
42
|
+
|
|
43
|
+
# a registry ref or local file launches the TARGET's own code. Grade it sandboxed:
|
|
44
|
+
LITMUS_STDIO_ISOLATION=docker polygraphso-litmus litmus npm/@modelcontextprotocol/server-filesystem
|
|
45
|
+
# …or, without Docker, opt in to running it on this host:
|
|
46
|
+
polygraphso-litmus litmus --unsafe-host-exec npm/@modelcontextprotocol/server-filesystem
|
|
41
47
|
```
|
|
42
48
|
|
|
49
|
+
**Host-execution safety.** Grading a registry ref (`npm/…`, `pypi/…`) or a local
|
|
50
|
+
path **launches the target's own code**. By default the CLI refuses to do that on
|
|
51
|
+
your host: set `LITMUS_STDIO_ISOLATION=docker` to run the target only inside the
|
|
52
|
+
hardened sandbox, or pass `--unsafe-host-exec` to accept host execution. Remote
|
|
53
|
+
`https://` targets run no local code and need neither.
|
|
54
|
+
|
|
43
55
|
The `litmus` command exits non-zero on a failing grade (D/F), so it scripts in CI.
|
|
44
56
|
|
|
45
57
|
To dispute a published grade, just re-run `litmus` against the same server: the harness is
|
|
@@ -54,7 +66,9 @@ MCP-capable client. It exposes two tools:
|
|
|
54
66
|
and return the grade and the evidence. Optional **`bearer`** (and `header`
|
|
55
67
|
entries, each `"Key: Value"`) grade a token-gated `https://` MCP target — sent
|
|
56
68
|
to that origin only, ignored for stdio/local targets, the same plumbing as the
|
|
57
|
-
CLI's `--bearer` / `--header`.
|
|
69
|
+
CLI's `--bearer` / `--header`. Grading a registry ref or local path launches the
|
|
70
|
+
target's own code, so it requires **`unsafe_host_exec: true`** unless
|
|
71
|
+
`LITMUS_STDIO_ISOLATION=docker` is set (the MCP mirror of `--unsafe-host-exec`).
|
|
58
72
|
- **`verify_attestation`** — passively read a server's *already-published* grade
|
|
59
73
|
before trusting or paying it.
|
|
60
74
|
|
|
@@ -1536,12 +1536,14 @@ async function runEgressProbe(ref, opts) {
|
|
|
1536
1536
|
if (staged) await staged.cleanup();
|
|
1537
1537
|
}
|
|
1538
1538
|
}
|
|
1539
|
+
async function exerciseSurface(client, exercise) {
|
|
1540
|
+
for (const t of await enumerateTools(client)) {
|
|
1541
|
+
await exercise({ name: t.name, description: t.description ?? "", inputSchema: t.inputSchema ?? null });
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1539
1544
|
async function collectEgress(conn, sink, declaredEgress, baselineAllowlist) {
|
|
1540
1545
|
try {
|
|
1541
|
-
|
|
1542
|
-
for (const t of tools) {
|
|
1543
|
-
await exerciseTool(conn.client, { name: t.name, description: t.description ?? "", inputSchema: t.inputSchema ?? null });
|
|
1544
|
-
}
|
|
1546
|
+
await exerciseSurface(conn.client, (def) => exerciseTool(conn.client, def));
|
|
1545
1547
|
} finally {
|
|
1546
1548
|
await conn.teardown();
|
|
1547
1549
|
}
|
|
@@ -2626,6 +2628,7 @@ export {
|
|
|
2626
2628
|
gradeFromCategories,
|
|
2627
2629
|
assembleBundle,
|
|
2628
2630
|
runLitmus,
|
|
2631
|
+
enumerateTools,
|
|
2629
2632
|
SkillLoadError,
|
|
2630
2633
|
loadSkill,
|
|
2631
2634
|
stripExamples,
|
|
@@ -34,20 +34,29 @@ function truncate(s, n) {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
// ../cli/src/litmus.ts
|
|
37
|
+
var DEFAULT_RUN_TIMEOUT_MS = 15 * 60 * 1e3;
|
|
37
38
|
async function runLitmusCli(args) {
|
|
38
39
|
const json = args.includes("--json");
|
|
39
|
-
const { headers, allowStateChanging, positionals } = parseAuthFlags(args);
|
|
40
|
+
const { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals } = parseAuthFlags(args);
|
|
40
41
|
const target = positionals[0];
|
|
41
42
|
if (!target) {
|
|
42
43
|
process.stderr.write(
|
|
43
|
-
'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] <registry-ref | https-url | path-to-mcp>\n'
|
|
44
|
+
'usage: polygraphso litmus [--json] [--bearer <token>] [--header "Key: Value"] [--allow-state-changing] [--unsafe-host-exec] [--timeout <seconds>] <registry-ref | https-url | path-to-mcp>\n'
|
|
44
45
|
);
|
|
45
46
|
return 2;
|
|
46
47
|
}
|
|
47
|
-
const { runLitmus } = await import("./src-TMJOIVGB.js");
|
|
48
48
|
const input = resolveTarget(target);
|
|
49
|
+
const guard = checkHostExec(input, unsafeHostExec);
|
|
50
|
+
if (!guard.allow) {
|
|
51
|
+
process.stderr.write(`\u2192 litmus: ${guard.refuse}
|
|
52
|
+
`);
|
|
53
|
+
return 2;
|
|
54
|
+
}
|
|
55
|
+
if (guard.warn) process.stderr.write(`\u2192 ${guard.warn}
|
|
56
|
+
`);
|
|
57
|
+
const { runLitmus } = await import("./src-I6AGG4CJ.js");
|
|
49
58
|
try {
|
|
50
|
-
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
|
59
|
+
const bundle = await runLitmus(input, { headers, allowStateChanging, timeoutMs });
|
|
51
60
|
process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
|
|
52
61
|
return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
|
|
53
62
|
} catch (err) {
|
|
@@ -60,6 +69,8 @@ function parseAuthFlags(args, env = process.env) {
|
|
|
60
69
|
const headers = {};
|
|
61
70
|
const headerArgs = [];
|
|
62
71
|
let allowStateChanging = false;
|
|
72
|
+
let unsafeHostExec = false;
|
|
73
|
+
let timeoutMs = DEFAULT_RUN_TIMEOUT_MS;
|
|
63
74
|
let bearer = env.LITMUS_BEARER || void 0;
|
|
64
75
|
const positionals = [];
|
|
65
76
|
for (let i = 0; i < args.length; i++) {
|
|
@@ -67,6 +78,12 @@ function parseAuthFlags(args, env = process.env) {
|
|
|
67
78
|
if (a === "--json") continue;
|
|
68
79
|
if (a === "--allow-state-changing") {
|
|
69
80
|
allowStateChanging = true;
|
|
81
|
+
} else if (a === "--unsafe-host-exec") {
|
|
82
|
+
unsafeHostExec = true;
|
|
83
|
+
} else if (a === "--timeout") {
|
|
84
|
+
timeoutMs = timeoutSecondsToMs(args[++i]) ?? timeoutMs;
|
|
85
|
+
} else if (a.startsWith("--timeout=")) {
|
|
86
|
+
timeoutMs = timeoutSecondsToMs(a.slice("--timeout=".length)) ?? timeoutMs;
|
|
70
87
|
} else if (a === "--bearer") {
|
|
71
88
|
bearer = args[++i] ?? bearer;
|
|
72
89
|
} else if (a.startsWith("--bearer=")) {
|
|
@@ -89,7 +106,25 @@ function parseAuthFlags(args, env = process.env) {
|
|
|
89
106
|
const value = h.slice(idx + 1).trim();
|
|
90
107
|
if (key) headers[key] = value;
|
|
91
108
|
}
|
|
92
|
-
return { headers, allowStateChanging, positionals };
|
|
109
|
+
return { headers, allowStateChanging, unsafeHostExec, timeoutMs, positionals };
|
|
110
|
+
}
|
|
111
|
+
function timeoutSecondsToMs(v) {
|
|
112
|
+
if (!v) return void 0;
|
|
113
|
+
const sec = Number(v);
|
|
114
|
+
return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
|
|
115
|
+
}
|
|
116
|
+
function checkHostExec(input, optIn, optInHint = "--unsafe-host-exec", env = process.env) {
|
|
117
|
+
const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
|
|
118
|
+
const dockerIsolated = env.LITMUS_STDIO_ISOLATION === "docker";
|
|
119
|
+
if (!isStdio || dockerIsolated) return { allow: true };
|
|
120
|
+
const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
|
|
121
|
+
if (optIn) return { allow: true, warn: `\u26A0 unsafe host execution \u2014 ${why}.` };
|
|
122
|
+
return {
|
|
123
|
+
allow: false,
|
|
124
|
+
refuse: `refusing host execution \u2014 ${why}.
|
|
125
|
+
\u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
|
|
126
|
+
\u2022 accept the risk: re-run with ${optInHint}`
|
|
127
|
+
};
|
|
93
128
|
}
|
|
94
129
|
function resolveTarget(target) {
|
|
95
130
|
if (/^https?:\/\//i.test(target)) return target;
|
|
@@ -112,7 +147,9 @@ function tsxCli() {
|
|
|
112
147
|
}
|
|
113
148
|
|
|
114
149
|
export {
|
|
150
|
+
DEFAULT_RUN_TIMEOUT_MS,
|
|
115
151
|
runLitmusCli,
|
|
116
152
|
parseAuthFlags,
|
|
153
|
+
checkHostExec,
|
|
117
154
|
resolveTarget
|
|
118
155
|
};
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import {
|
|
2
|
+
DEFAULT_RUN_TIMEOUT_MS,
|
|
3
|
+
checkHostExec,
|
|
2
4
|
parseAuthFlags,
|
|
3
5
|
resolveTarget
|
|
4
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-GNPHHS6I.js";
|
|
5
7
|
import {
|
|
6
8
|
SKILL_METHODOLOGY_VERSION,
|
|
7
9
|
runLitmus,
|
|
8
10
|
runSkillLitmus,
|
|
9
11
|
runSkillQuality,
|
|
10
12
|
runSkillQualityJudged
|
|
11
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-63OICX66.js";
|
|
12
14
|
import {
|
|
13
15
|
CATEGORY_STATUS_UINT8,
|
|
14
16
|
METHODOLOGY_VERSION,
|
|
@@ -299,24 +301,32 @@ var RUN_LITMUS_TOOL_DESCRIPTION = [
|
|
|
299
301
|
var runLitmusInputShape = {
|
|
300
302
|
server_ref: z.string().min(1).max(512).describe("What to grade: a registry ref (npm/@scope/server), an https:// MCP URL, or a local path to an MCP entry file."),
|
|
301
303
|
bearer: z.string().min(1).max(8192).optional().describe("Bearer token for a token-gated https:// MCP server. Sent as `Authorization: Bearer <token>` to the target origin only. Ignored for stdio/local targets."),
|
|
302
|
-
header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.')
|
|
304
|
+
header: z.array(z.string()).max(20).optional().describe('Extra HTTP headers for a gated https:// target, each "Key: Value" (e.g. "X-Api-Key: \u2026"). Overrides the bearer-derived Authorization for the same key. Ignored for stdio/local targets.'),
|
|
305
|
+
unsafe_host_exec: z.boolean().optional().describe("Required to grade a registry ref or local path: it launches the target's own code, and without Docker isolation that runs on THIS host. Set true to accept host execution. Ignored for https:// targets or when LITMUS_STDIO_ISOLATION=docker."),
|
|
306
|
+
timeout_seconds: z.number().int().positive().max(3600).optional().describe("Aggregate wall-clock ceiling for the whole run, in seconds (default 900). Bounds a hostile server that stretches the run across many tools/probes.")
|
|
303
307
|
};
|
|
304
308
|
var PROGRESS_TOTAL = 5;
|
|
305
|
-
async function handleRunLitmus({ server_ref, bearer, header }, extra) {
|
|
309
|
+
async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }, extra) {
|
|
306
310
|
try {
|
|
307
311
|
const argv = [
|
|
308
312
|
...bearer ? ["--bearer", bearer] : [],
|
|
309
313
|
...(header ?? []).flatMap((h) => ["--header", h])
|
|
310
314
|
];
|
|
311
315
|
const { headers } = parseAuthFlags(argv, {});
|
|
316
|
+
const input = resolveTarget(server_ref);
|
|
317
|
+
const guard = checkHostExec(input, unsafe_host_exec ?? false, 'set "unsafe_host_exec": true');
|
|
318
|
+
if (!guard.allow) {
|
|
319
|
+
return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${guard.refuse}` }] };
|
|
320
|
+
}
|
|
312
321
|
const progressToken = extra._meta?.progressToken;
|
|
313
322
|
const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
|
|
314
323
|
method: "notifications/progress",
|
|
315
324
|
params: { progressToken, progress, total: PROGRESS_TOTAL, message }
|
|
316
325
|
}) : void 0;
|
|
317
326
|
sendProgress?.(0, `Connecting to ${server_ref}\u2026`);
|
|
318
|
-
const bundle = await runLitmus(
|
|
327
|
+
const bundle = await runLitmus(input, {
|
|
319
328
|
...Object.keys(headers).length ? { headers } : {},
|
|
329
|
+
timeoutMs: timeout_seconds ? timeout_seconds * 1e3 : DEFAULT_RUN_TIMEOUT_MS,
|
|
320
330
|
...sendProgress ? { onProgress: (done, _total, label) => sendProgress(done, label) } : {}
|
|
321
331
|
});
|
|
322
332
|
const payload = summarize(bundle);
|
package/dist/cli-skill.js
CHANGED
package/dist/cli.js
CHANGED
package/dist/index.d.ts
CHANGED
|
@@ -323,6 +323,35 @@ interface RunLitmusOptions {
|
|
|
323
323
|
onProgress?: (done: number, total: number, label: string) => void;
|
|
324
324
|
}
|
|
325
325
|
declare function runLitmus(target: TargetInput, opts?: RunLitmusOptions): Promise<EvidenceBundle>;
|
|
326
|
+
/** The fields of a `tools/list` entry the harness reads. */
|
|
327
|
+
interface ListedTool {
|
|
328
|
+
name: string;
|
|
329
|
+
description?: string;
|
|
330
|
+
inputSchema?: unknown;
|
|
331
|
+
annotations?: unknown;
|
|
332
|
+
}
|
|
333
|
+
interface ListToolsClient {
|
|
334
|
+
listTools(params?: {
|
|
335
|
+
cursor?: string;
|
|
336
|
+
}): Promise<{
|
|
337
|
+
tools?: ListedTool[];
|
|
338
|
+
nextCursor?: string;
|
|
339
|
+
}>;
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Follow `tools/list` pagination to the end, accumulating the full tool surface.
|
|
343
|
+
* The MCP SDK's `listTools()` returns a single page and does not auto-paginate,
|
|
344
|
+
* so a server can park a tool (e.g. `transfer_funds`) or a poisoned description
|
|
345
|
+
* behind a `nextCursor` — invisible to a one-page lister, yet served to a real
|
|
346
|
+
* agent. We enumerate every page so the fingerprint and grade cover what the
|
|
347
|
+
* agent actually gets, and **fail closed**: if the server is still paginating
|
|
348
|
+
* past the gradable cap, we refuse rather than grade a partial surface.
|
|
349
|
+
*/
|
|
350
|
+
declare function enumerateTools(client: ListToolsClient, opts?: {
|
|
351
|
+
maxTools?: number;
|
|
352
|
+
maxBytes?: number;
|
|
353
|
+
listTimeoutMs?: number;
|
|
354
|
+
}): Promise<ListedTool[]>;
|
|
326
355
|
|
|
327
356
|
/**
|
|
328
357
|
* Tool-surface fingerprint (litmus-test-v1 §6, technical-design §3).
|
|
@@ -961,23 +990,27 @@ declare const runLitmusInputShape: {
|
|
|
961
990
|
server_ref: z.ZodString;
|
|
962
991
|
bearer: z.ZodOptional<z.ZodString>;
|
|
963
992
|
header: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
993
|
+
unsafe_host_exec: z.ZodOptional<z.ZodBoolean>;
|
|
994
|
+
timeout_seconds: z.ZodOptional<z.ZodNumber>;
|
|
964
995
|
};
|
|
965
|
-
declare function handleRunLitmus({ server_ref, bearer, header }: {
|
|
996
|
+
declare function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, timeout_seconds }: {
|
|
966
997
|
server_ref: string;
|
|
967
998
|
bearer?: string;
|
|
968
999
|
header?: string[];
|
|
1000
|
+
unsafe_host_exec?: boolean;
|
|
1001
|
+
timeout_seconds?: number;
|
|
969
1002
|
}, extra: RequestHandlerExtra<ServerRequest, ServerNotification>): Promise<{
|
|
1003
|
+
isError: true;
|
|
970
1004
|
content: {
|
|
971
1005
|
type: "text";
|
|
972
1006
|
text: string;
|
|
973
1007
|
}[];
|
|
974
|
-
isError?: undefined;
|
|
975
1008
|
} | {
|
|
976
|
-
isError: true;
|
|
977
1009
|
content: {
|
|
978
1010
|
type: "text";
|
|
979
1011
|
text: string;
|
|
980
1012
|
}[];
|
|
1013
|
+
isError?: undefined;
|
|
981
1014
|
}>;
|
|
982
1015
|
|
|
983
1016
|
/**
|
|
@@ -1057,6 +1090,10 @@ interface ParsedLitmusFlags {
|
|
|
1057
1090
|
headers: Record<string, string>;
|
|
1058
1091
|
/** Whether to actively call state-changing tools (opt-in). */
|
|
1059
1092
|
allowStateChanging: boolean;
|
|
1093
|
+
/** Opt-in to run a stdio target's code on the host without Docker isolation. */
|
|
1094
|
+
unsafeHostExec: boolean;
|
|
1095
|
+
/** Aggregate wall-clock ceiling (ms) — `--timeout <seconds>`, else the default. */
|
|
1096
|
+
timeoutMs: number;
|
|
1060
1097
|
/** Non-flag arguments, in order (positionals[0] is the target). */
|
|
1061
1098
|
positionals: string[];
|
|
1062
1099
|
}
|
|
@@ -1072,4 +1109,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
|
|
|
1072
1109
|
/** A target is an https URL, a local MCP entry file, or a registry ref. */
|
|
1073
1110
|
declare function resolveTarget(target: string): string | StdioCommand;
|
|
1074
1111
|
|
|
1075
|
-
export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
|
|
1112
|
+
export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
|
package/dist/index.js
CHANGED
|
@@ -31,11 +31,11 @@ import {
|
|
|
31
31
|
skillAttestationFields,
|
|
32
32
|
skillSchemaUID,
|
|
33
33
|
verifySkillInputShape
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-VAOQNFW3.js";
|
|
35
35
|
import {
|
|
36
36
|
parseAuthFlags,
|
|
37
37
|
resolveTarget
|
|
38
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-GNPHHS6I.js";
|
|
39
39
|
import {
|
|
40
40
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
41
41
|
SKILL_METHODOLOGY_VERSION,
|
|
@@ -46,6 +46,7 @@ import {
|
|
|
46
46
|
classifyTool,
|
|
47
47
|
connectTarget,
|
|
48
48
|
dangerousCommand,
|
|
49
|
+
enumerateTools,
|
|
49
50
|
exfilInstruction,
|
|
50
51
|
fingerprintToolDefs,
|
|
51
52
|
gradeFromCategories,
|
|
@@ -68,7 +69,7 @@ import {
|
|
|
68
69
|
skillInjectionFails,
|
|
69
70
|
stateChangingToolNames,
|
|
70
71
|
stripExamples
|
|
71
|
-
} from "./chunk-
|
|
72
|
+
} from "./chunk-63OICX66.js";
|
|
72
73
|
import {
|
|
73
74
|
BUNDLE_SCHEMA_VERSION,
|
|
74
75
|
CATEGORY_STATUS_UINT8,
|
|
@@ -112,16 +113,18 @@ function gateDecision(attestation, live, passing = DEFAULT_PASSING, now = BigInt
|
|
|
112
113
|
const versionNote = attestation.resolvedVersion ? ` (graded version ${attestation.resolvedVersion})` : "";
|
|
113
114
|
return { action: "pay", reason: `grade ${attestation.overallGrade}; live fingerprint matches${versionNote}` };
|
|
114
115
|
}
|
|
116
|
+
async function fingerprintLiveSurface(client) {
|
|
117
|
+
const defs = (await enumerateTools(client)).map((t) => ({
|
|
118
|
+
name: t.name,
|
|
119
|
+
description: t.description ?? "",
|
|
120
|
+
inputSchema: t.inputSchema ?? null
|
|
121
|
+
}));
|
|
122
|
+
return fingerprintToolDefs(defs).fingerprint;
|
|
123
|
+
}
|
|
115
124
|
async function liveFingerprint(target) {
|
|
116
125
|
const conn = await connectTarget(target);
|
|
117
126
|
try {
|
|
118
|
-
|
|
119
|
-
const defs = (tools ?? []).map((t) => ({
|
|
120
|
-
name: t.name,
|
|
121
|
-
description: t.description ?? "",
|
|
122
|
-
inputSchema: t.inputSchema ?? null
|
|
123
|
-
}));
|
|
124
|
-
return { fingerprint: fingerprintToolDefs(defs).fingerprint, serverRef: conn.serverRef };
|
|
127
|
+
return { fingerprint: await fingerprintLiveSurface(conn.client), serverRef: conn.serverRef };
|
|
125
128
|
} finally {
|
|
126
129
|
await conn.teardown();
|
|
127
130
|
}
|
|
@@ -160,6 +163,7 @@ export {
|
|
|
160
163
|
encodeLitmusAttestation,
|
|
161
164
|
encodeSkillAttestation,
|
|
162
165
|
encodeSkillAttestationFields,
|
|
166
|
+
enumerateTools,
|
|
163
167
|
exfilInstruction,
|
|
164
168
|
fingerprintToolDefs,
|
|
165
169
|
formatServerRef,
|
package/dist/mcp.js
CHANGED
|
@@ -20,11 +20,11 @@ import {
|
|
|
20
20
|
runSkillLitmusInputShape,
|
|
21
21
|
verifyInputShape,
|
|
22
22
|
verifySkillInputShape
|
|
23
|
-
} from "./chunk-
|
|
24
|
-
import "./chunk-
|
|
23
|
+
} from "./chunk-VAOQNFW3.js";
|
|
24
|
+
import "./chunk-GNPHHS6I.js";
|
|
25
25
|
import {
|
|
26
26
|
judgeFromEnv
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-63OICX66.js";
|
|
28
28
|
import "./chunk-44R4ZYOE.js";
|
|
29
29
|
|
|
30
30
|
// src/mcp.ts
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
classifyTool,
|
|
9
9
|
connectTarget,
|
|
10
10
|
dangerousCommand,
|
|
11
|
+
enumerateTools,
|
|
11
12
|
exfilInstruction,
|
|
12
13
|
fingerprintToolDefs,
|
|
13
14
|
gradeFromCategories,
|
|
@@ -30,7 +31,7 @@ import {
|
|
|
30
31
|
skillInjectionFails,
|
|
31
32
|
stateChangingToolNames,
|
|
32
33
|
stripExamples
|
|
33
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-63OICX66.js";
|
|
34
35
|
import "./chunk-44R4ZYOE.js";
|
|
35
36
|
export {
|
|
36
37
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
@@ -42,6 +43,7 @@ export {
|
|
|
42
43
|
classifyTool,
|
|
43
44
|
connectTarget,
|
|
44
45
|
dangerousCommand,
|
|
46
|
+
enumerateTools,
|
|
45
47
|
exfilInstruction,
|
|
46
48
|
fingerprintToolDefs,
|
|
47
49
|
gradeFromCategories,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -62,12 +62,12 @@
|
|
|
62
62
|
"tsup": "^8.3.0",
|
|
63
63
|
"typescript": "^5.9.3",
|
|
64
64
|
"vitest": "^2.1.0",
|
|
65
|
-
"@polygraph/core": "0.0.0",
|
|
66
65
|
"@polygraph/onchain": "0.0.0",
|
|
67
|
-
"@polygraph/
|
|
66
|
+
"@polygraph/core": "0.0.0",
|
|
68
67
|
"@polygraph/probes": "0.0.0",
|
|
69
|
-
"@polygraph/
|
|
70
|
-
"@polygraph/mcp": "0.0.0"
|
|
68
|
+
"@polygraph/agent": "0.0.0",
|
|
69
|
+
"@polygraph/mcp": "0.0.0",
|
|
70
|
+
"@polygraph/cli": "0.0.0"
|
|
71
71
|
},
|
|
72
72
|
"publishConfig": {
|
|
73
73
|
"access": "public"
|