@polygraphso/litmus 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/dist/{chunk-M5HXKZVN.js → chunk-BUKDFSDO.js} +1 -1
- package/dist/{chunk-DN2OX4RT.js → chunk-RYJXVMCT.js} +28 -9
- package/dist/{chunk-AVF3GYCS.js → chunk-Z66GKAQD.js} +2 -2
- package/dist/cli-skill.js +1 -1
- package/dist/cli.js +1 -1
- package/dist/index.js +3 -3
- package/dist/mcp.js +3 -3
- package/dist/{src-TG44QXFV.js → src-TMJOIVGB.js} +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -180,9 +180,12 @@ It also prints a separate, advisory **quality** signal (`well-formed` / `issues`
|
|
|
180
180
|
- **Standalone:** bring your own key for any OpenAI-compatible endpoint:
|
|
181
181
|
|
|
182
182
|
```bash
|
|
183
|
-
export LITMUS_LLM_API_KEY=… # your key
|
|
184
|
-
export LITMUS_LLM_MODEL=gpt-4o #
|
|
183
|
+
export LITMUS_LLM_API_KEY=… # your key (any OpenAI-compatible endpoint)
|
|
184
|
+
export LITMUS_LLM_MODEL=gpt-4o # a model the endpoint serves
|
|
185
185
|
export LITMUS_LLM_BASE_URL=https://api.openai.com/v1 # optional; defaults to OpenAI
|
|
186
|
+
# Other providers via their OpenAI-compatible endpoint, e.g.:
|
|
187
|
+
# Claude: LITMUS_LLM_BASE_URL=https://api.anthropic.com/v1 LITMUS_LLM_MODEL=claude-sonnet-4-6
|
|
188
|
+
# Gemini: LITMUS_LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai LITMUS_LLM_MODEL=gemini-2.5-flash
|
|
186
189
|
```
|
|
187
190
|
|
|
188
191
|
- With neither, the judged axes are skipped — the grade and deterministic quality
|
|
@@ -195,9 +198,10 @@ The same `polygraphso-litmus-mcp` server exposes two skill tools (plus `grade-sk
|
|
|
195
198
|
|
|
196
199
|
- **`run_skill_litmus`** — grade a local skill directory now (static; uses the host
|
|
197
200
|
model via sampling for the quality axes, no key).
|
|
198
|
-
- **`verify_skill_attestation`** — read a skill's *already-published* grade
|
|
199
|
-
|
|
200
|
-
|
|
201
|
+
- **`verify_skill_attestation`** — read a skill's *already-published* grade by its
|
|
202
|
+
`skill_ref` (`source/owner/repo#path`, e.g. `github/anthropics/skills#skills/pdf`). It
|
|
203
|
+
returns the attested `contentHash`; recompute the skill's hash and require equality
|
|
204
|
+
before installing — the content hash, not the version, is the trust anchor.
|
|
201
205
|
|
|
202
206
|
## Library
|
|
203
207
|
|
|
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
|
|
|
44
44
|
);
|
|
45
45
|
return 2;
|
|
46
46
|
}
|
|
47
|
-
const { runLitmus } = await import("./src-
|
|
47
|
+
const { runLitmus } = await import("./src-TMJOIVGB.js");
|
|
48
48
|
const input = resolveTarget(target);
|
|
49
49
|
try {
|
|
50
50
|
const bundle = await runLitmus(input, { headers, allowStateChanging });
|
|
@@ -1078,15 +1078,29 @@ function instructionMimicry(text) {
|
|
|
1078
1078
|
}
|
|
1079
1079
|
return findings;
|
|
1080
1080
|
}
|
|
1081
|
-
var
|
|
1081
|
+
var SENSITIVE_QUERY_KEY_TOKENS = [
|
|
1082
|
+
"key",
|
|
1083
|
+
"token",
|
|
1084
|
+
"secret",
|
|
1085
|
+
"password",
|
|
1086
|
+
"passwd",
|
|
1087
|
+
"auth",
|
|
1088
|
+
"session",
|
|
1089
|
+
"cookie",
|
|
1090
|
+
"canary",
|
|
1091
|
+
"api",
|
|
1092
|
+
"env",
|
|
1093
|
+
"cred"
|
|
1094
|
+
];
|
|
1082
1095
|
function looksExfilQuery(url) {
|
|
1083
1096
|
const q = url.indexOf("?");
|
|
1084
1097
|
if (q < 0) return false;
|
|
1085
1098
|
const query = url.slice(q + 1);
|
|
1086
|
-
if (SENSITIVE_QUERY_KEY.test(url)) return true;
|
|
1087
1099
|
for (const pair of query.split("&")) {
|
|
1088
1100
|
const eq = pair.indexOf("=");
|
|
1089
1101
|
if (eq < 0) continue;
|
|
1102
|
+
const key = pair.slice(0, eq).toLowerCase();
|
|
1103
|
+
if (SENSITIVE_QUERY_KEY_TOKENS.some((t) => key.includes(t))) return true;
|
|
1090
1104
|
let v = pair.slice(eq + 1);
|
|
1091
1105
|
try {
|
|
1092
1106
|
v = decodeURIComponent(v);
|
|
@@ -1100,7 +1114,7 @@ function looksExfilQuery(url) {
|
|
|
1100
1114
|
}
|
|
1101
1115
|
function markdownTricks(text) {
|
|
1102
1116
|
const findings = [];
|
|
1103
|
-
const proto = /\b(?:javascript|data):[^\s)"'
|
|
1117
|
+
const proto = /\b(?:javascript|data):[^\s)"'<>*`]+/gi;
|
|
1104
1118
|
for (let m = proto.exec(text); m; m = proto.exec(text)) {
|
|
1105
1119
|
findings.push({
|
|
1106
1120
|
kind: "markdown-trick",
|
|
@@ -1109,7 +1123,7 @@ function markdownTricks(text) {
|
|
|
1109
1123
|
offset: m.index
|
|
1110
1124
|
});
|
|
1111
1125
|
}
|
|
1112
|
-
const exfilImg = /!?\[[^\]]
|
|
1126
|
+
const exfilImg = /!?\[[^\]]{0,200}\]\((https?:\/\/[^)\s?]{0,400}\?[^)\s=]{0,200}=[^)\s]{0,200})\)/gi;
|
|
1113
1127
|
for (let m = exfilImg.exec(text); m; m = exfilImg.exec(text)) {
|
|
1114
1128
|
const url = m[1] ?? m[0];
|
|
1115
1129
|
if (!looksExfilQuery(url)) continue;
|
|
@@ -1126,7 +1140,9 @@ var INTERNALS_LEAK = [
|
|
|
1126
1140
|
// V8 / Node stack frame: `at fn (/abs/file.js:12:5)` or `at /abs/file.js:12:5`
|
|
1127
1141
|
// (a leading path/drive/`node:`/`file:` is required, so a "meet at 10:30:45"
|
|
1128
1142
|
// timestamp can't trip it).
|
|
1129
|
-
|
|
1143
|
+
// Bounded quantifiers ({0,300}) keep this linear: overlapping `.*\s` + `[^\s()]*`
|
|
1144
|
+
// + trailing `\s*$` over untrusted output is otherwise polynomial (js/polynomial-redos).
|
|
1145
|
+
/^\s*at\s+(?:[^\n]{0,300}\s)?\(?(?:\/|[A-Za-z]:[\\/]|node:|file:\/\/)[^\s()]{0,300}:\d+:\d+\)?\s*$/m,
|
|
1130
1146
|
// Node uncaught-rejection / fatal banners.
|
|
1131
1147
|
/\b(?:UnhandledPromiseRejection(?:Warning)?|unhandledRejection|FATAL ERROR:|Fatal error:)\b/,
|
|
1132
1148
|
// Python traceback header + frame.
|
|
@@ -1138,8 +1154,9 @@ var INTERNALS_LEAK = [
|
|
|
1138
1154
|
// Go panic with its goroutine dump (`panic: … goroutine 1 [running]:`).
|
|
1139
1155
|
/\bpanic:[\s\S]{0,300}?\bgoroutine\s+\d+\s+\[/,
|
|
1140
1156
|
// Ruby backtrace frame (`from app.rb:10:in 'method'` / older backtick form);
|
|
1141
|
-
// requires a `.rb` file + `:line:in` so prose can't trip it.
|
|
1142
|
-
|
|
1157
|
+
// requires a `.rb` file + `:line:in` so prose can't trip it. The lookbehind +
|
|
1158
|
+
// bounded run keep `[\w./-]+\.rb` linear (the `.`-overlap is otherwise polynomial).
|
|
1159
|
+
/(?<![\w./-])[\w./-]{1,200}\.rb:\d+:in\s+['\x60]/,
|
|
1143
1160
|
// .NET stack frame (`at NS.Method() in C:\path\File.cs:line 12`).
|
|
1144
1161
|
/\bat\s+[\w.<>+]+\([^)]*\)\s+in\s+\S+:line\s+\d+/i,
|
|
1145
1162
|
// Rust panic banner (`thread 'main' panicked at …`).
|
|
@@ -2282,7 +2299,7 @@ var SINK = /(?:https?:\/\/\S+|\bto\s+(?:a\s+|an\s+|the\s+|your\s+|our\s+)?(?:rem
|
|
|
2282
2299
|
function exfilInstruction(text) {
|
|
2283
2300
|
const findings = [];
|
|
2284
2301
|
const stripped = stripExamples(text);
|
|
2285
|
-
for (const raw of stripped.split(/(?<=[
|
|
2302
|
+
for (const raw of stripped.split(/(?<=[.!?])\s+|\n/)) {
|
|
2286
2303
|
const sentence = raw.trim();
|
|
2287
2304
|
if (!sentence) continue;
|
|
2288
2305
|
if (TRANSMIT_VERB.test(sentence) && SECRET_NOUN.test(sentence) && SINK.test(sentence)) {
|
|
@@ -2439,7 +2456,9 @@ function runSkillLitmus(dir, opts = {}) {
|
|
|
2439
2456
|
|
|
2440
2457
|
// ../probes/src/skills/quality-judge.ts
|
|
2441
2458
|
function openAICompatJudge(cfg) {
|
|
2442
|
-
|
|
2459
|
+
let base = cfg.baseUrl;
|
|
2460
|
+
while (base.endsWith("/")) base = base.slice(0, -1);
|
|
2461
|
+
const url = `${base}/chat/completions`;
|
|
2443
2462
|
return {
|
|
2444
2463
|
id: `openai-compat:${cfg.model}`,
|
|
2445
2464
|
async complete(system, user) {
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import {
|
|
2
2
|
parseAuthFlags,
|
|
3
3
|
resolveTarget
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-BUKDFSDO.js";
|
|
5
5
|
import {
|
|
6
6
|
SKILL_METHODOLOGY_VERSION,
|
|
7
7
|
runLitmus,
|
|
8
8
|
runSkillLitmus,
|
|
9
9
|
runSkillQuality,
|
|
10
10
|
runSkillQualityJudged
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-RYJXVMCT.js";
|
|
12
12
|
import {
|
|
13
13
|
CATEGORY_STATUS_UINT8,
|
|
14
14
|
METHODOLOGY_VERSION,
|
package/dist/cli-skill.js
CHANGED
package/dist/cli.js
CHANGED
package/dist/index.js
CHANGED
|
@@ -31,11 +31,11 @@ import {
|
|
|
31
31
|
skillAttestationFields,
|
|
32
32
|
skillSchemaUID,
|
|
33
33
|
verifySkillInputShape
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-Z66GKAQD.js";
|
|
35
35
|
import {
|
|
36
36
|
parseAuthFlags,
|
|
37
37
|
resolveTarget
|
|
38
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-BUKDFSDO.js";
|
|
39
39
|
import {
|
|
40
40
|
SKILL_BUNDLE_SCHEMA_VERSION,
|
|
41
41
|
SKILL_METHODOLOGY_VERSION,
|
|
@@ -68,7 +68,7 @@ import {
|
|
|
68
68
|
skillInjectionFails,
|
|
69
69
|
stateChangingToolNames,
|
|
70
70
|
stripExamples
|
|
71
|
-
} from "./chunk-
|
|
71
|
+
} from "./chunk-RYJXVMCT.js";
|
|
72
72
|
import {
|
|
73
73
|
BUNDLE_SCHEMA_VERSION,
|
|
74
74
|
CATEGORY_STATUS_UINT8,
|
package/dist/mcp.js
CHANGED
|
@@ -20,11 +20,11 @@ import {
|
|
|
20
20
|
runSkillLitmusInputShape,
|
|
21
21
|
verifyInputShape,
|
|
22
22
|
verifySkillInputShape
|
|
23
|
-
} from "./chunk-
|
|
24
|
-
import "./chunk-
|
|
23
|
+
} from "./chunk-Z66GKAQD.js";
|
|
24
|
+
import "./chunk-BUKDFSDO.js";
|
|
25
25
|
import {
|
|
26
26
|
judgeFromEnv
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-RYJXVMCT.js";
|
|
28
28
|
import "./chunk-44R4ZYOE.js";
|
|
29
29
|
|
|
30
30
|
// src/mcp.ts
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polygraphso/litmus",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://polygraph.so",
|
|
@@ -63,9 +63,9 @@
|
|
|
63
63
|
"typescript": "^5.9.3",
|
|
64
64
|
"vitest": "^2.1.0",
|
|
65
65
|
"@polygraph/core": "0.0.0",
|
|
66
|
-
"@polygraph/probes": "0.0.0",
|
|
67
66
|
"@polygraph/onchain": "0.0.0",
|
|
68
67
|
"@polygraph/agent": "0.0.0",
|
|
68
|
+
"@polygraph/probes": "0.0.0",
|
|
69
69
|
"@polygraph/cli": "0.0.0",
|
|
70
70
|
"@polygraph/mcp": "0.0.0"
|
|
71
71
|
},
|