@polygraphso/litmus 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-44R4ZYOE.js → chunk-CKQZFK77.js} +8 -1
- package/dist/{chunk-63OICX66.js → chunk-OGOFUBLN.js} +28 -6
- package/dist/{chunk-VAOQNFW3.js → chunk-PTWDLGI5.js} +26 -19
- package/dist/{chunk-GNPHHS6I.js → chunk-TTGWSGPC.js} +75 -14
- package/dist/cli-skill.js +28 -20
- package/dist/cli.js +2 -2
- package/dist/index.d.ts +37 -3
- package/dist/index.js +10 -4
- package/dist/mcp.js +4 -4
- package/dist/{src-I6AGG4CJ.js → src-ZHTFCKNR.js} +6 -2
- package/package.json +3 -3
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
// ../core/src/types.ts
|
|
2
2
|
var METHODOLOGY_VERSION = "litmus-v5";
|
|
3
|
-
var BUNDLE_SCHEMA_VERSION = "1.
|
|
3
|
+
var BUNDLE_SCHEMA_VERSION = "1.5.0";
|
|
4
|
+
var CATEGORY_META = {
|
|
5
|
+
"C-01": { label: "tool-output injection", description: "whether it tries to hijack the caller through tool output" },
|
|
6
|
+
"C-02": { label: "permission / egress overreach", description: "whether it reaches the network beyond what it declares" },
|
|
7
|
+
"C-03": { label: "sensitive-data handling", description: "whether it leaks planted secrets it was handed" },
|
|
8
|
+
"C-04": { label: "adversarial-input handling", description: "whether it stays stable on malformed or hostile input" }
|
|
9
|
+
};
|
|
4
10
|
var CATEGORY_STATUS_UINT8 = {
|
|
5
11
|
pass: 0,
|
|
6
12
|
fail: 1,
|
|
@@ -174,6 +180,7 @@ function sortDeep(value, depth = 0) {
|
|
|
174
180
|
export {
|
|
175
181
|
METHODOLOGY_VERSION,
|
|
176
182
|
BUNDLE_SCHEMA_VERSION,
|
|
183
|
+
CATEGORY_META,
|
|
177
184
|
CATEGORY_STATUS_UINT8,
|
|
178
185
|
ServerRefParseError,
|
|
179
186
|
parseServerRef,
|
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
METHODOLOGY_VERSION,
|
|
4
4
|
parseServerRef,
|
|
5
5
|
serverKey
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-CKQZFK77.js";
|
|
7
7
|
|
|
8
8
|
// ../probes/src/harness.ts
|
|
9
9
|
import { execFile as execFile3 } from "child_process";
|
|
@@ -451,6 +451,10 @@ import { execFile as execFile2 } from "child_process";
|
|
|
451
451
|
import { promisify } from "util";
|
|
452
452
|
import { randomUUID as randomUUID3 } from "crypto";
|
|
453
453
|
var execFileP = promisify(execFile2);
|
|
454
|
+
var TARGET_STDERR = process.env.LITMUS_DEBUG ? "inherit" : "pipe";
|
|
455
|
+
function discardStderr(transport) {
|
|
456
|
+
transport.stderr?.resume?.();
|
|
457
|
+
}
|
|
454
458
|
var CLIENT_INFO = { name: "polygraph-litmus", version: "0.0.0" };
|
|
455
459
|
async function connectTarget(input, opts = {}) {
|
|
456
460
|
const isolated = opts.isolation === "docker";
|
|
@@ -464,6 +468,7 @@ async function connectTarget(input, opts = {}) {
|
|
|
464
468
|
command: input.command,
|
|
465
469
|
args: input.args ?? [],
|
|
466
470
|
env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {}, ...input.env ?? {} },
|
|
471
|
+
stderr: TARGET_STDERR,
|
|
467
472
|
...input.cwd ?? opts.seedCwd ? { cwd: input.cwd ?? opts.seedCwd } : {}
|
|
468
473
|
});
|
|
469
474
|
const cmdline = [input.command, ...input.args ?? []].join(" ");
|
|
@@ -497,6 +502,7 @@ async function connectTarget(input, opts = {}) {
|
|
|
497
502
|
command: launch.command,
|
|
498
503
|
args: launch.args,
|
|
499
504
|
env: { ...getDefaultEnvironment(), ...opts.seedEnv ?? {} },
|
|
505
|
+
stderr: TARGET_STDERR,
|
|
500
506
|
...opts.seedCwd ? { cwd: opts.seedCwd } : {}
|
|
501
507
|
});
|
|
502
508
|
const client = await connectOrThrow(transport);
|
|
@@ -518,14 +524,14 @@ async function connectHostNpm(ref, parsed, opts) {
|
|
|
518
524
|
const binNames = await fetchNpmBins(spec, parsed.name);
|
|
519
525
|
if (!binNames || binNames.length === 0) {
|
|
520
526
|
const args = ["-y", spec];
|
|
521
|
-
const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
|
|
527
|
+
const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
|
|
522
528
|
const client = await connectOrThrow(transport);
|
|
523
529
|
return makeResult(client, "stdio", { kind: "stdio", command: ["npx", ...args].join(" "), url: null }, serverRefVal, resolvedVersion, []);
|
|
524
530
|
}
|
|
525
531
|
const candidates = orderBinCandidates(binNames, parsed.name);
|
|
526
532
|
const { result } = await probeForMcpBin(ref, candidates, async (bin) => {
|
|
527
533
|
const args = ["-y", "-p", spec, bin];
|
|
528
|
-
const transport = new StdioClientTransport({ command: "npx", args, env, ...cwd });
|
|
534
|
+
const transport = new StdioClientTransport({ command: "npx", args, env, stderr: TARGET_STDERR, ...cwd });
|
|
529
535
|
const client = await tryConnect(transport);
|
|
530
536
|
return client ? { client, descriptor: { kind: "stdio", command: ["npx", ...args].join(" "), url: null } } : null;
|
|
531
537
|
});
|
|
@@ -562,8 +568,9 @@ async function connectIsolatedNpm(ref, parsed, opts) {
|
|
|
562
568
|
const transport = new StdioClientTransport({
|
|
563
569
|
command: launch.command,
|
|
564
570
|
args: namedArgs,
|
|
565
|
-
env: getDefaultEnvironment()
|
|
571
|
+
env: getDefaultEnvironment(),
|
|
566
572
|
// default env only: no host secrets, no canaries
|
|
573
|
+
stderr: TARGET_STDERR
|
|
567
574
|
});
|
|
568
575
|
const client = await tryConnect(transport);
|
|
569
576
|
if (!client) {
|
|
@@ -608,6 +615,7 @@ async function tryConnect(transport) {
|
|
|
608
615
|
const client = new Client(CLIENT_INFO, { capabilities: {} });
|
|
609
616
|
try {
|
|
610
617
|
await withConnectTimeout(client.connect(transport), transport);
|
|
618
|
+
discardStderr(transport);
|
|
611
619
|
return client;
|
|
612
620
|
} catch {
|
|
613
621
|
try {
|
|
@@ -620,6 +628,7 @@ async function tryConnect(transport) {
|
|
|
620
628
|
async function connectOrThrow(transport) {
|
|
621
629
|
const client = new Client(CLIENT_INFO, { capabilities: {} });
|
|
622
630
|
await withConnectTimeout(client.connect(transport), transport);
|
|
631
|
+
discardStderr(transport);
|
|
623
632
|
return client;
|
|
624
633
|
}
|
|
625
634
|
function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardownExtra) {
|
|
@@ -629,6 +638,9 @@ function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardo
|
|
|
629
638
|
descriptor,
|
|
630
639
|
serverRef,
|
|
631
640
|
resolvedVersion,
|
|
641
|
+
// The server's self-reported identity from the initialize handshake. The SDK
|
|
642
|
+
// exposes it post-connect via getServerVersion(); absent/blank → null.
|
|
643
|
+
selfReportedVersion: client.getServerVersion()?.version ?? null,
|
|
632
644
|
teardown: async () => {
|
|
633
645
|
try {
|
|
634
646
|
await client.close();
|
|
@@ -2009,6 +2021,7 @@ function assembleBundle(input) {
|
|
|
2009
2021
|
methodologyVersion: METHODOLOGY_VERSION,
|
|
2010
2022
|
serverRef: input.serverRef,
|
|
2011
2023
|
resolvedVersion: input.resolvedVersion,
|
|
2024
|
+
selfReportedVersion: input.selfReportedVersion,
|
|
2012
2025
|
target: input.target,
|
|
2013
2026
|
toolDefsFingerprint: input.toolDefsFingerprint,
|
|
2014
2027
|
toolDefs: input.toolDefs,
|
|
@@ -2027,7 +2040,7 @@ async function runLitmus(target, opts = {}) {
|
|
|
2027
2040
|
const isolation = opts.isolation ?? (process.env.LITMUS_STDIO_ISOLATION === "docker" ? "docker" : "none");
|
|
2028
2041
|
const ranAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
2029
2042
|
const baselineAllowlist = [...DEFAULT_EGRESS_BASELINE, ...parseAllowlistEnv(process.env.LITMUS_EGRESS_ALLOWLIST)];
|
|
2030
|
-
const dockerAvailable = await
|
|
2043
|
+
const dockerAvailable = await isDockerAvailable();
|
|
2031
2044
|
const canaries = mintCanaries();
|
|
2032
2045
|
const seedEnv = canaryEnv(canaries);
|
|
2033
2046
|
const isHttp = typeof target === "string" && /^https?:\/\//i.test(target);
|
|
@@ -2091,6 +2104,7 @@ async function runLitmus(target, opts = {}) {
|
|
|
2091
2104
|
return assembleBundle({
|
|
2092
2105
|
serverRef: conn.serverRef,
|
|
2093
2106
|
resolvedVersion: conn.resolvedVersion,
|
|
2107
|
+
selfReportedVersion: conn.selfReportedVersion,
|
|
2094
2108
|
// Surface the server's declared egress in the bundle (disclosure: a
|
|
2095
2109
|
// declaration is not exoneration — the consumer/agent-gate can judge).
|
|
2096
2110
|
target: egress.declaredEgress.length ? { ...conn.descriptor, declaredEgress: egress.declaredEgress } : conn.descriptor,
|
|
@@ -2169,7 +2183,7 @@ function withTimeout(p, ms, label) {
|
|
|
2169
2183
|
})
|
|
2170
2184
|
]);
|
|
2171
2185
|
}
|
|
2172
|
-
function
|
|
2186
|
+
function isDockerAvailable() {
|
|
2173
2187
|
return new Promise((resolve) => {
|
|
2174
2188
|
const child = execFile3("docker", ["info"], { timeout: 4e3 }, (err) => resolve(!err));
|
|
2175
2189
|
child.on("error", () => resolve(false));
|
|
@@ -2364,6 +2378,12 @@ function overBroadTrigger(description) {
|
|
|
2364
2378
|
}
|
|
2365
2379
|
|
|
2366
2380
|
// ../probes/src/skills/grade-skill.ts
|
|
2381
|
+
var SKILL_CATEGORY_META = {
|
|
2382
|
+
"S-01": { label: "prompt injection / context poisoning", description: "whether the skill body tries to hijack the agent" },
|
|
2383
|
+
"S-03": { label: "data-exfiltration instructions", description: "whether it instructs the agent to leak secrets" },
|
|
2384
|
+
"S-04": { label: "dangerous bundled commands", description: "whether it ships dangerous executable commands" },
|
|
2385
|
+
"S-05": { label: "tool / permission overreach", description: "whether it claims more capability than it needs" }
|
|
2386
|
+
};
|
|
2367
2387
|
var DISQUALIFYING = /* @__PURE__ */ new Set(["S-01", "S-03"]);
|
|
2368
2388
|
var CAPPING = /* @__PURE__ */ new Set(["S-04", "S-05"]);
|
|
2369
2389
|
function gradeSkillCategories(categories) {
|
|
@@ -2629,6 +2649,7 @@ export {
|
|
|
2629
2649
|
assembleBundle,
|
|
2630
2650
|
runLitmus,
|
|
2631
2651
|
enumerateTools,
|
|
2652
|
+
isDockerAvailable,
|
|
2632
2653
|
SkillLoadError,
|
|
2633
2654
|
loadSkill,
|
|
2634
2655
|
stripExamples,
|
|
@@ -2637,6 +2658,7 @@ export {
|
|
|
2637
2658
|
exfilInstruction,
|
|
2638
2659
|
dangerousCommand,
|
|
2639
2660
|
overBroadTrigger,
|
|
2661
|
+
SKILL_CATEGORY_META,
|
|
2640
2662
|
gradeSkillCategories,
|
|
2641
2663
|
SKILL_METHODOLOGY_VERSION,
|
|
2642
2664
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
@@ -3,22 +3,24 @@ import {
|
|
|
3
3
|
checkHostExec,
|
|
4
4
|
parseAuthFlags,
|
|
5
5
|
resolveTarget
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-TTGWSGPC.js";
|
|
7
7
|
import {
|
|
8
|
+
SKILL_CATEGORY_META,
|
|
8
9
|
SKILL_METHODOLOGY_VERSION,
|
|
9
10
|
runLitmus,
|
|
10
11
|
runSkillLitmus,
|
|
11
12
|
runSkillQuality,
|
|
12
13
|
runSkillQualityJudged
|
|
13
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-OGOFUBLN.js";
|
|
14
15
|
import {
|
|
16
|
+
CATEGORY_META,
|
|
15
17
|
CATEGORY_STATUS_UINT8,
|
|
16
18
|
METHODOLOGY_VERSION,
|
|
17
19
|
parseServerRef,
|
|
18
20
|
parseSkillRef,
|
|
19
21
|
serverKey,
|
|
20
22
|
skillKey
|
|
21
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-CKQZFK77.js";
|
|
22
24
|
|
|
23
25
|
// ../onchain/src/networks.ts
|
|
24
26
|
var NETWORKS = {
|
|
@@ -314,9 +316,14 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
|
|
|
314
316
|
];
|
|
315
317
|
const { headers } = parseAuthFlags(argv, {});
|
|
316
318
|
const input = resolveTarget(server_ref);
|
|
317
|
-
const
|
|
318
|
-
|
|
319
|
-
|
|
319
|
+
const decision = checkHostExec(input, {
|
|
320
|
+
optIn: unsafe_host_exec ?? false,
|
|
321
|
+
dockerAvailable: false,
|
|
322
|
+
interactive: false,
|
|
323
|
+
optInHint: 'set "unsafe_host_exec": true'
|
|
324
|
+
});
|
|
325
|
+
if (decision.action === "refuse") {
|
|
326
|
+
return { isError: true, content: [{ type: "text", text: `run_litmus refused: ${decision.refuse}` }] };
|
|
320
327
|
}
|
|
321
328
|
const progressToken = extra._meta?.progressToken;
|
|
322
329
|
const sendProgress = progressToken !== void 0 ? (progress, message) => void extra.sendNotification({
|
|
@@ -336,24 +343,28 @@ async function handleRunLitmus({ server_ref, bearer, header, unsafe_host_exec, t
|
|
|
336
343
|
return { isError: true, content: [{ type: "text", text: `run_litmus failed: ${message}` }] };
|
|
337
344
|
}
|
|
338
345
|
}
|
|
339
|
-
var CATEGORY_LABEL = {
|
|
340
|
-
"C-01": "tool-output injection",
|
|
341
|
-
"C-02": "permission / egress overreach",
|
|
342
|
-
"C-03": "sensitive-data handling",
|
|
343
|
-
"C-04": "adversarial-input handling"
|
|
344
|
-
};
|
|
345
346
|
function summarize(b) {
|
|
346
347
|
const find = (code) => b.categories.find((c) => c.code === code);
|
|
347
348
|
const categories = ["C-01", "C-02", "C-03", "C-04"].map((code) => {
|
|
348
349
|
const c = find(code);
|
|
349
350
|
const findings = c?.status === "fail" ? c.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ tool: f.tool, kind: f.kind, match: truncate(f.match, 120), host: f.host, port: f.port })) : [];
|
|
350
|
-
return {
|
|
351
|
+
return {
|
|
352
|
+
code,
|
|
353
|
+
check: CATEGORY_META[code].label,
|
|
354
|
+
description: CATEGORY_META[code].description,
|
|
355
|
+
status: c?.status ?? "unknown",
|
|
356
|
+
reason: c?.reason ?? null,
|
|
357
|
+
findings
|
|
358
|
+
};
|
|
351
359
|
});
|
|
352
360
|
return {
|
|
353
361
|
grade: b.grade,
|
|
354
362
|
summary: b.gradeRationale,
|
|
355
363
|
serverRef: b.serverRef,
|
|
356
364
|
resolvedVersion: b.resolvedVersion,
|
|
365
|
+
// The server's self-asserted serverInfo.version — descriptive only, not a
|
|
366
|
+
// re-fetchable pin (cf. resolvedVersion). Null when the server reports none.
|
|
367
|
+
selfReportedVersion: b.selfReportedVersion,
|
|
357
368
|
fingerprint: b.toolDefsFingerprint,
|
|
358
369
|
ranAt: b.ranAt,
|
|
359
370
|
methodologyVersion: b.methodologyVersion,
|
|
@@ -417,15 +428,11 @@ async function handleRunSkillLitmus({ skill_ref }, ctx = {}) {
|
|
|
417
428
|
function errorResult(message) {
|
|
418
429
|
return { isError: true, content: [{ type: "text", text: `run_skill_litmus failed: ${message}` }] };
|
|
419
430
|
}
|
|
420
|
-
var CATEGORY_LABEL2 = {
|
|
421
|
-
"S-01": "prompt injection / context poisoning",
|
|
422
|
-
"S-03": "data-exfiltration instructions",
|
|
423
|
-
"S-04": "dangerous bundled commands"
|
|
424
|
-
};
|
|
425
431
|
function summarize2(b) {
|
|
426
432
|
const categories = b.categories.map((c) => ({
|
|
427
433
|
code: c.code,
|
|
428
|
-
check:
|
|
434
|
+
check: SKILL_CATEGORY_META[c.code]?.label ?? c.code,
|
|
435
|
+
description: SKILL_CATEGORY_META[c.code]?.description ?? null,
|
|
429
436
|
status: c.status,
|
|
430
437
|
reason: c.reason ?? null,
|
|
431
438
|
findings: c.status === "fail" ? c.findings.filter((f) => f.severity === "high").slice(0, 5).map((f) => ({ kind: f.kind, match: truncate2(f.match, 120), file: f.file })) : []
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
|
+
CATEGORY_META,
|
|
2
3
|
canonicalStringify
|
|
3
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-CKQZFK77.js";
|
|
4
5
|
|
|
5
6
|
// ../cli/src/litmus.ts
|
|
6
7
|
import { existsSync } from "fs";
|
|
@@ -9,11 +10,17 @@ import * as path from "path";
|
|
|
9
10
|
|
|
10
11
|
// ../cli/src/format.ts
|
|
11
12
|
function formatBundle(b) {
|
|
12
|
-
const status = (code) => b.categories.find((c) => c.code === code)?.status ?? "?";
|
|
13
13
|
const lines = [];
|
|
14
14
|
lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
|
|
15
15
|
if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
|
|
16
|
-
lines.push(`\u2192
|
|
16
|
+
if (b.selfReportedVersion) lines.push(`\u2192 self-reported ${b.selfReportedVersion} (unverified)`);
|
|
17
|
+
lines.push("\u2192 checks");
|
|
18
|
+
const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
|
|
19
|
+
for (const c of b.categories) {
|
|
20
|
+
const { label, description } = CATEGORY_META[c.code];
|
|
21
|
+
lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}`);
|
|
22
|
+
lines.push(` ${description}`);
|
|
23
|
+
}
|
|
17
24
|
const c01 = b.categories.find((c) => c.code === "C-01");
|
|
18
25
|
if (c01?.status === "fail") {
|
|
19
26
|
const highs = c01.probes.flatMap((p) => p.findings).filter((f) => f.severity === "high");
|
|
@@ -46,17 +53,37 @@ async function runLitmusCli(args) {
|
|
|
46
53
|
return 2;
|
|
47
54
|
}
|
|
48
55
|
const input = resolveTarget(target);
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
|
|
56
|
+
const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
|
|
57
|
+
const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
58
|
+
const probes = await import("./src-ZHTFCKNR.js");
|
|
59
|
+
const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
|
|
60
|
+
const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
|
|
61
|
+
if (decision.action === "refuse") {
|
|
62
|
+
process.stderr.write(`\u2192 litmus: ${decision.refuse}
|
|
52
63
|
`);
|
|
53
64
|
return 2;
|
|
54
65
|
}
|
|
55
|
-
if (
|
|
66
|
+
if (decision.action === "confirm" && !await promptYesNo(decision.prompt, decision.defaultYes)) {
|
|
67
|
+
process.stderr.write("\u2192 litmus: cancelled.\n");
|
|
68
|
+
return 2;
|
|
69
|
+
}
|
|
70
|
+
const isolation = decision.isolation;
|
|
71
|
+
if (decision.warn) process.stderr.write(`\u2192 ${decision.warn}
|
|
56
72
|
`);
|
|
57
|
-
|
|
73
|
+
if (!json) process.stderr.write(`\u2192 running litmus against ${target} \u2026 (~20\u201360s)
|
|
74
|
+
`);
|
|
75
|
+
const onProgress = (done, total, label) => {
|
|
76
|
+
if (!json) process.stderr.write(` \u2192 [${done}/${total}] ${label}
|
|
77
|
+
`);
|
|
78
|
+
};
|
|
58
79
|
try {
|
|
59
|
-
const bundle = await runLitmus(input, {
|
|
80
|
+
const bundle = await probes.runLitmus(input, {
|
|
81
|
+
headers,
|
|
82
|
+
allowStateChanging,
|
|
83
|
+
timeoutMs,
|
|
84
|
+
onProgress,
|
|
85
|
+
...isolation ? { isolation } : {}
|
|
86
|
+
});
|
|
60
87
|
process.stdout.write(json ? canonicalStringify(bundle) + "\n" : formatBundle(bundle));
|
|
61
88
|
return bundle.grade === "D" || bundle.grade === "F" ? 1 : 0;
|
|
62
89
|
} catch (err) {
|
|
@@ -65,6 +92,15 @@ async function runLitmusCli(args) {
|
|
|
65
92
|
return 1;
|
|
66
93
|
}
|
|
67
94
|
}
|
|
95
|
+
async function promptYesNo(prompt, defaultYes) {
|
|
96
|
+
const { createInterface } = await import("readline/promises");
|
|
97
|
+
const rl = createInterface({ input: process.stdin, output: process.stderr });
|
|
98
|
+
try {
|
|
99
|
+
return isAffirmative(await rl.question(prompt), defaultYes);
|
|
100
|
+
} finally {
|
|
101
|
+
rl.close();
|
|
102
|
+
}
|
|
103
|
+
}
|
|
68
104
|
function parseAuthFlags(args, env = process.env) {
|
|
69
105
|
const headers = {};
|
|
70
106
|
const headerArgs = [];
|
|
@@ -113,19 +149,44 @@ function timeoutSecondsToMs(v) {
|
|
|
113
149
|
const sec = Number(v);
|
|
114
150
|
return Number.isFinite(sec) && sec > 0 ? Math.floor(sec * 1e3) : void 0;
|
|
115
151
|
}
|
|
116
|
-
function checkHostExec(input,
|
|
152
|
+
function checkHostExec(input, gate) {
|
|
153
|
+
const { optIn, dockerAvailable, interactive, optInHint = "--unsafe-host-exec", env = process.env } = gate;
|
|
117
154
|
const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
|
|
118
|
-
|
|
119
|
-
if (
|
|
155
|
+
if (!isStdio) return { action: "allow" };
|
|
156
|
+
if (env.LITMUS_STDIO_ISOLATION === "docker") return { action: "allow", isolation: "docker" };
|
|
120
157
|
const why = "this launches the target's own code; without Docker isolation it runs on THIS host";
|
|
121
|
-
|
|
158
|
+
const warn = `\u26A0 unsafe host execution \u2014 ${why}.`;
|
|
159
|
+
if (optIn) return { action: "allow", isolation: "none", warn };
|
|
160
|
+
if (interactive) {
|
|
161
|
+
if (dockerAvailable) {
|
|
162
|
+
return {
|
|
163
|
+
action: "confirm",
|
|
164
|
+
isolation: "docker",
|
|
165
|
+
defaultYes: true,
|
|
166
|
+
prompt: "Docker detected \u2014 the target will run sandboxed (recommended). Proceed? [Y/n] "
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
action: "confirm",
|
|
171
|
+
isolation: "none",
|
|
172
|
+
defaultYes: false,
|
|
173
|
+
prompt: `No Docker found \u2014 this would run the target's own code on THIS host, unsandboxed.
|
|
174
|
+
Type "yes" to proceed, or set LITMUS_STDIO_ISOLATION=docker to sandbox: `,
|
|
175
|
+
warn
|
|
176
|
+
};
|
|
177
|
+
}
|
|
122
178
|
return {
|
|
123
|
-
|
|
179
|
+
action: "refuse",
|
|
124
180
|
refuse: `refusing host execution \u2014 ${why}.
|
|
125
181
|
\u2022 sandboxed (recommended): set LITMUS_STDIO_ISOLATION=docker (requires Docker)
|
|
126
182
|
\u2022 accept the risk: re-run with ${optInHint}`
|
|
127
183
|
};
|
|
128
184
|
}
|
|
185
|
+
function isAffirmative(answer, defaultYes) {
|
|
186
|
+
const a = answer.trim().toLowerCase();
|
|
187
|
+
if (a === "") return defaultYes;
|
|
188
|
+
return a === "y" || a === "yes";
|
|
189
|
+
}
|
|
129
190
|
function resolveTarget(target) {
|
|
130
191
|
if (/^https?:\/\//i.test(target)) return target;
|
|
131
192
|
if (existsSync(target)) {
|
package/dist/cli-skill.js
CHANGED
|
@@ -1,31 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
|
+
SKILL_CATEGORY_META,
|
|
3
4
|
judgeFromEnv,
|
|
4
5
|
runSkillLitmus,
|
|
5
6
|
runSkillQuality,
|
|
6
7
|
runSkillQualityJudged
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
8
|
+
} from "./chunk-OGOFUBLN.js";
|
|
9
|
+
import "./chunk-CKQZFK77.js";
|
|
9
10
|
|
|
10
11
|
// src/cli-skill.ts
|
|
11
12
|
import { statSync } from "fs";
|
|
12
|
-
var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
|
|
13
|
-
|
|
14
|
-
usage:
|
|
15
|
-
polygraphso-litmus-skill [--json] <path-to-skill-dir>
|
|
16
|
-
polygraphso-litmus-skill --help
|
|
17
|
-
|
|
18
|
-
The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
|
|
19
|
-
execution); an A means the static checks were clean, not that the skill is
|
|
20
|
-
behaviorally safe.
|
|
21
13
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
|
|
25
|
-
endpoint). Without a key only the deterministic well-formedness checks run.
|
|
26
|
-
More at https://polygraph.so
|
|
27
|
-
`;
|
|
28
|
-
function render(b) {
|
|
14
|
+
// src/format-skill.ts
|
|
15
|
+
function formatSkillSafety(b) {
|
|
29
16
|
const lines = [
|
|
30
17
|
`grade: ${b.grade} (${b.methodologyVersion})`,
|
|
31
18
|
`${b.gradeRationale}`,
|
|
@@ -34,8 +21,11 @@ function render(b) {
|
|
|
34
21
|
"",
|
|
35
22
|
"categories:"
|
|
36
23
|
];
|
|
24
|
+
const labelWidth = Math.max(0, ...b.categories.map((c) => SKILL_CATEGORY_META[c.code].label.length));
|
|
37
25
|
for (const c of b.categories) {
|
|
38
|
-
|
|
26
|
+
const { label, description } = SKILL_CATEGORY_META[c.code];
|
|
27
|
+
lines.push(` ${c.code} ${label.padEnd(labelWidth)} ${c.status}${c.reason ? ` (${c.reason})` : ""}`);
|
|
28
|
+
lines.push(` ${description}`);
|
|
39
29
|
if (c.status === "fail") {
|
|
40
30
|
for (const f of c.findings.filter((x) => x.severity === "high").slice(0, 5)) {
|
|
41
31
|
lines.push(` ! ${f.kind}${f.file ? ` [${f.file}]` : ""}: ${f.match}`);
|
|
@@ -51,6 +41,24 @@ function render(b) {
|
|
|
51
41
|
lines.push("", b.disclaimer);
|
|
52
42
|
return lines.join("\n") + "\n";
|
|
53
43
|
}
|
|
44
|
+
|
|
45
|
+
// src/cli-skill.ts
|
|
46
|
+
var HELP = `polygraphso-litmus-skill \u2014 static safety grades for Claude Code skills.
|
|
47
|
+
|
|
48
|
+
usage:
|
|
49
|
+
polygraphso-litmus-skill [--json] <path-to-skill-dir>
|
|
50
|
+
polygraphso-litmus-skill --help
|
|
51
|
+
|
|
52
|
+
The skill dir must contain a SKILL.md. The safety letter is a STATIC scan (no
|
|
53
|
+
execution); an A means the static checks were clean, not that the skill is
|
|
54
|
+
behaviorally safe.
|
|
55
|
+
|
|
56
|
+
It also prints a separate, advisory quality signal. The optional LLM-judged
|
|
57
|
+
axes (honesty, coherence) run only if you provide your own key \u2014 set
|
|
58
|
+
LITMUS_LLM_API_KEY and LITMUS_LLM_MODEL (and LITMUS_LLM_BASE_URL for a non-OpenAI
|
|
59
|
+
endpoint). Without a key only the deterministic well-formedness checks run.
|
|
60
|
+
More at https://polygraph.so
|
|
61
|
+
`;
|
|
54
62
|
function renderQuality(q) {
|
|
55
63
|
const lines = ["", `quality (advisory, separate from the grade): ${q.verdict}`];
|
|
56
64
|
for (const c of q.checks) lines.push(` ${c.status === "pass" ? "\xB7" : "!"} ${c.id}: ${c.detail}`);
|
|
@@ -87,7 +95,7 @@ async function main(argv) {
|
|
|
87
95
|
const judge = judgeFromEnv();
|
|
88
96
|
const quality = judge ? await runSkillQualityJudged(target, judge, { skillRef: target }) : runSkillQuality(target, { skillRef: target });
|
|
89
97
|
process.stdout.write(
|
|
90
|
-
json ? JSON.stringify({ safety, quality }, null, 2) + "\n" :
|
|
98
|
+
json ? JSON.stringify({ safety, quality }, null, 2) + "\n" : formatSkillSafety(safety) + renderQuality(quality)
|
|
91
99
|
);
|
|
92
100
|
return 0;
|
|
93
101
|
}
|
package/dist/cli.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
runLitmusCli
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-TTGWSGPC.js";
|
|
5
5
|
import {
|
|
6
6
|
parseServerRef,
|
|
7
7
|
serverKey
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-CKQZFK77.js";
|
|
9
9
|
|
|
10
10
|
// src/cli.ts
|
|
11
11
|
import { readFileSync } from "fs";
|
package/dist/index.d.ts
CHANGED
|
@@ -30,13 +30,24 @@ type Registry = "npm" | "pypi" | "github";
|
|
|
30
30
|
* not branch on it. */
|
|
31
31
|
declare const METHODOLOGY_VERSION: "litmus-v5";
|
|
32
32
|
/** Evidence-bundle format version (owned by onchain-proof-spec §2).
|
|
33
|
+
* 1.5.0 adds the optional `selfReportedVersion` field (the server's
|
|
34
|
+
* self-asserted `serverInfo.version`, descriptive metadata only);
|
|
33
35
|
* 1.4.0 adds the C-01 probe id `1.3` (second-order injection, litmus-v5);
|
|
34
36
|
* 1.3.0 adds the optional C-04 category and the `internals-leak`/`crash` finding
|
|
35
37
|
* kinds (litmus-v4); 1.2.0 adds the optional `target.declaredEgress` field and
|
|
36
38
|
* the `egress-allowed` finding kind (litmus-v3); 1.1.0 adds
|
|
37
39
|
* `harness.stdioIsolation`; older remain valid. */
|
|
38
|
-
declare const BUNDLE_SCHEMA_VERSION: "1.
|
|
40
|
+
declare const BUNDLE_SCHEMA_VERSION: "1.5.0";
|
|
39
41
|
type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
|
|
42
|
+
/**
|
|
43
|
+
* Plain-English label + one-line description for each probe category, so CLI and
|
|
44
|
+
* MCP output is legible without knowing the probe IDs. The single source of these
|
|
45
|
+
* strings — both renderers and the MCP `run_litmus` summary read from here.
|
|
46
|
+
*/
|
|
47
|
+
declare const CATEGORY_META: Record<CategoryCode, {
|
|
48
|
+
label: string;
|
|
49
|
+
description: string;
|
|
50
|
+
}>;
|
|
40
51
|
/** Probe IDs carry their family number (1=injection, 2=permission,
|
|
41
52
|
* 3=adversarial-input, 4=sensitive). 1.3 (second-order injection) added in v5. */
|
|
42
53
|
type ProbeId = "1.1" | "1.2" | "1.3" | "2.1" | "2.2" | "3.1" | "3.2" | "4.1" | "4.2";
|
|
@@ -108,8 +119,15 @@ interface EvidenceBundle {
|
|
|
108
119
|
methodologyVersion: string;
|
|
109
120
|
/** Canonical, versionless identity (serverKey). */
|
|
110
121
|
serverRef: string;
|
|
111
|
-
/** The exact version actually run
|
|
122
|
+
/** The exact version actually run — a re-fetchable pin (npm/pypi version,
|
|
123
|
+
* skill commit). Null when the target has no such identity (remote URL,
|
|
124
|
+
* unpinned ref). This is the reproducibility anchor. */
|
|
112
125
|
resolvedVersion: string | null;
|
|
126
|
+
/** The version the server reports about *itself* in the MCP `initialize`
|
|
127
|
+
* handshake (`serverInfo.version`). Self-asserted and operator-controlled —
|
|
128
|
+
* descriptive metadata only, never a reproducibility anchor (cf.
|
|
129
|
+
* resolvedVersion). Null when the server reports none. */
|
|
130
|
+
selfReportedVersion: string | null;
|
|
113
131
|
target: TargetDescriptor;
|
|
114
132
|
/** sha256 of the canonical tool surface → `0x` + 64 hex (bytes32). */
|
|
115
133
|
toolDefsFingerprint: string;
|
|
@@ -246,6 +264,9 @@ interface ConnectedTarget {
|
|
|
246
264
|
/** Canonical versionless identity (serverKey), the URL, or the command line. */
|
|
247
265
|
serverRef: string;
|
|
248
266
|
resolvedVersion: string | null;
|
|
267
|
+
/** The server's self-asserted `serverInfo.version` from the MCP handshake.
|
|
268
|
+
* Descriptive metadata only (see EvidenceBundle.selfReportedVersion). */
|
|
269
|
+
selfReportedVersion: string | null;
|
|
249
270
|
teardown: () => Promise<void>;
|
|
250
271
|
}
|
|
251
272
|
interface ConnectOptions {
|
|
@@ -352,6 +373,9 @@ declare function enumerateTools(client: ListToolsClient, opts?: {
|
|
|
352
373
|
maxBytes?: number;
|
|
353
374
|
listTimeoutMs?: number;
|
|
354
375
|
}): Promise<ListedTool[]>;
|
|
376
|
+
/** True if a Docker daemon is reachable (governs C-02 / probe 4.2, and the CLI's
|
|
377
|
+
* detect-and-confirm sandbox prompt). */
|
|
378
|
+
declare function isDockerAvailable(): Promise<boolean>;
|
|
355
379
|
|
|
356
380
|
/**
|
|
357
381
|
* Tool-surface fingerprint (litmus-test-v1 §6, technical-design §3).
|
|
@@ -409,6 +433,7 @@ declare function gradeFromCategories(categories: readonly CategoryResult[]): Gra
|
|
|
409
433
|
interface BundleInput {
|
|
410
434
|
serverRef: string;
|
|
411
435
|
resolvedVersion: string | null;
|
|
436
|
+
selfReportedVersion: string | null;
|
|
412
437
|
target: TargetDescriptor;
|
|
413
438
|
toolDefsFingerprint: string;
|
|
414
439
|
toolDefs: ToolDef[];
|
|
@@ -470,6 +495,15 @@ declare function hasHighSeverity(findings: readonly Finding[]): boolean;
|
|
|
470
495
|
*/
|
|
471
496
|
|
|
472
497
|
type SkillCategoryCode = "S-01" | "S-03" | "S-04" | "S-05";
|
|
498
|
+
/**
|
|
499
|
+
* Plain-English label + one-line description for each skill category, so the skill
|
|
500
|
+
* CLI/MCP output is legible without knowing the S-codes. The single source of these
|
|
501
|
+
* strings — both the renderer and the MCP `run_skill_litmus` summary read from here.
|
|
502
|
+
*/
|
|
503
|
+
declare const SKILL_CATEGORY_META: Record<SkillCategoryCode, {
|
|
504
|
+
label: string;
|
|
505
|
+
description: string;
|
|
506
|
+
}>;
|
|
473
507
|
interface SkillCategoryResult {
|
|
474
508
|
code: SkillCategoryCode;
|
|
475
509
|
status: CategoryStatus;
|
|
@@ -1109,4 +1143,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
|
|
|
1109
1143
|
/** A target is an https URL, a local MCP entry file, or a registry ref. */
|
|
1110
1144
|
declare function resolveTarget(target: string): string | StdioCommand;
|
|
1111
1145
|
|
|
1112
|
-
export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
|
|
1146
|
+
export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_META, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type ListToolsClient, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_CATEGORY_META, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, enumerateTools, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, isDockerAvailable, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
|
package/dist/index.js
CHANGED
|
@@ -31,13 +31,14 @@ import {
|
|
|
31
31
|
skillAttestationFields,
|
|
32
32
|
skillSchemaUID,
|
|
33
33
|
verifySkillInputShape
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-PTWDLGI5.js";
|
|
35
35
|
import {
|
|
36
36
|
parseAuthFlags,
|
|
37
37
|
resolveTarget
|
|
38
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-TTGWSGPC.js";
|
|
39
39
|
import {
|
|
40
40
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
41
|
+
SKILL_CATEGORY_META,
|
|
41
42
|
SKILL_METHODOLOGY_VERSION,
|
|
42
43
|
SKILL_QUALITY_VERSION,
|
|
43
44
|
SkillLoadError,
|
|
@@ -55,6 +56,7 @@ import {
|
|
|
55
56
|
instructionMimicry,
|
|
56
57
|
internalsLeak,
|
|
57
58
|
invisibleUnicode,
|
|
59
|
+
isDockerAvailable,
|
|
58
60
|
judgeFromEnv,
|
|
59
61
|
judgeSkillQuality,
|
|
60
62
|
loadSkill,
|
|
@@ -69,9 +71,10 @@ import {
|
|
|
69
71
|
skillInjectionFails,
|
|
70
72
|
stateChangingToolNames,
|
|
71
73
|
stripExamples
|
|
72
|
-
} from "./chunk-
|
|
74
|
+
} from "./chunk-OGOFUBLN.js";
|
|
73
75
|
import {
|
|
74
76
|
BUNDLE_SCHEMA_VERSION,
|
|
77
|
+
CATEGORY_META,
|
|
75
78
|
CATEGORY_STATUS_UINT8,
|
|
76
79
|
METHODOLOGY_VERSION,
|
|
77
80
|
ServerRefParseError,
|
|
@@ -83,7 +86,7 @@ import {
|
|
|
83
86
|
parseSkillRef,
|
|
84
87
|
serverKey,
|
|
85
88
|
skillKey
|
|
86
|
-
} from "./chunk-
|
|
89
|
+
} from "./chunk-CKQZFK77.js";
|
|
87
90
|
|
|
88
91
|
// ../agent/src/gate.ts
|
|
89
92
|
function sameServer(a, b) {
|
|
@@ -131,6 +134,7 @@ async function liveFingerprint(target) {
|
|
|
131
134
|
}
|
|
132
135
|
export {
|
|
133
136
|
BUNDLE_SCHEMA_VERSION,
|
|
137
|
+
CATEGORY_META,
|
|
134
138
|
CATEGORY_STATUS_UINT8,
|
|
135
139
|
DEFAULT_PASSING,
|
|
136
140
|
LITMUS_SCHEMA,
|
|
@@ -144,6 +148,7 @@ export {
|
|
|
144
148
|
RUN_SKILL_LITMUS_TOOL_NAME,
|
|
145
149
|
RUN_SKILL_LITMUS_TOOL_TITLE,
|
|
146
150
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
151
|
+
SKILL_CATEGORY_META,
|
|
147
152
|
SKILL_METHODOLOGY_VERSION,
|
|
148
153
|
SKILL_QUALITY_VERSION,
|
|
149
154
|
ServerRefParseError,
|
|
@@ -178,6 +183,7 @@ export {
|
|
|
178
183
|
instructionMimicry,
|
|
179
184
|
internalsLeak,
|
|
180
185
|
invisibleUnicode,
|
|
186
|
+
isDockerAvailable,
|
|
181
187
|
judgeFromEnv,
|
|
182
188
|
judgeSkillQuality,
|
|
183
189
|
litmusFields,
|
package/dist/mcp.js
CHANGED
|
@@ -20,12 +20,12 @@ import {
|
|
|
20
20
|
runSkillLitmusInputShape,
|
|
21
21
|
verifyInputShape,
|
|
22
22
|
verifySkillInputShape
|
|
23
|
-
} from "./chunk-
|
|
24
|
-
import "./chunk-
|
|
23
|
+
} from "./chunk-PTWDLGI5.js";
|
|
24
|
+
import "./chunk-TTGWSGPC.js";
|
|
25
25
|
import {
|
|
26
26
|
judgeFromEnv
|
|
27
|
-
} from "./chunk-
|
|
28
|
-
import "./chunk-
|
|
27
|
+
} from "./chunk-OGOFUBLN.js";
|
|
28
|
+
import "./chunk-CKQZFK77.js";
|
|
29
29
|
|
|
30
30
|
// src/mcp.ts
|
|
31
31
|
import { realpathSync } from "fs";
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
3
|
+
SKILL_CATEGORY_META,
|
|
3
4
|
SKILL_METHODOLOGY_VERSION,
|
|
4
5
|
SKILL_QUALITY_VERSION,
|
|
5
6
|
SkillLoadError,
|
|
@@ -17,6 +18,7 @@ import {
|
|
|
17
18
|
instructionMimicry,
|
|
18
19
|
internalsLeak,
|
|
19
20
|
invisibleUnicode,
|
|
21
|
+
isDockerAvailable,
|
|
20
22
|
judgeFromEnv,
|
|
21
23
|
judgeSkillQuality,
|
|
22
24
|
loadSkill,
|
|
@@ -31,10 +33,11 @@ import {
|
|
|
31
33
|
skillInjectionFails,
|
|
32
34
|
stateChangingToolNames,
|
|
33
35
|
stripExamples
|
|
34
|
-
} from "./chunk-
|
|
35
|
-
import "./chunk-
|
|
36
|
+
} from "./chunk-OGOFUBLN.js";
|
|
37
|
+
import "./chunk-CKQZFK77.js";
|
|
36
38
|
export {
|
|
37
39
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
40
|
+
SKILL_CATEGORY_META,
|
|
38
41
|
SKILL_METHODOLOGY_VERSION,
|
|
39
42
|
SKILL_QUALITY_VERSION,
|
|
40
43
|
SkillLoadError,
|
|
@@ -52,6 +55,7 @@ export {
|
|
|
52
55
|
instructionMimicry,
|
|
53
56
|
internalsLeak,
|
|
54
57
|
invisibleUnicode,
|
|
58
|
+
isDockerAvailable,
|
|
55
59
|
judgeFromEnv,
|
|
56
60
|
judgeSkillQuality,
|
|
57
61
|
loadSkill,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -62,11 +62,11 @@
|
|
|
62
62
|
"tsup": "^8.3.0",
|
|
63
63
|
"typescript": "^5.9.3",
|
|
64
64
|
"vitest": "^2.1.0",
|
|
65
|
-
"@polygraph/onchain": "0.0.0",
|
|
66
65
|
"@polygraph/core": "0.0.0",
|
|
67
|
-
"@polygraph/
|
|
66
|
+
"@polygraph/onchain": "0.0.0",
|
|
68
67
|
"@polygraph/agent": "0.0.0",
|
|
69
68
|
"@polygraph/mcp": "0.0.0",
|
|
69
|
+
"@polygraph/probes": "0.0.0",
|
|
70
70
|
"@polygraph/cli": "0.0.0"
|
|
71
71
|
},
|
|
72
72
|
"publishConfig": {
|