@polygraphso/litmus 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -180,9 +180,12 @@ It also prints a separate, advisory **quality** signal (`well-formed` / `issues`
180
180
  - **Standalone:** bring your own key for any OpenAI-compatible endpoint:
181
181
 
182
182
  ```bash
183
- export LITMUS_LLM_API_KEY=… # your key
184
- export LITMUS_LLM_MODEL=gpt-4o # any model the endpoint serves
183
+ export LITMUS_LLM_API_KEY=… # your key (any OpenAI-compatible endpoint)
184
+ export LITMUS_LLM_MODEL=gpt-4o # a model the endpoint serves
185
185
  export LITMUS_LLM_BASE_URL=https://api.openai.com/v1 # optional; defaults to OpenAI
186
+ # Other providers via their OpenAI-compatible endpoint, e.g.:
187
+ # Claude: LITMUS_LLM_BASE_URL=https://api.anthropic.com/v1 LITMUS_LLM_MODEL=claude-sonnet-4-6
188
+ # Gemini: LITMUS_LLM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai LITMUS_LLM_MODEL=gemini-2.5-flash
186
189
  ```
187
190
 
188
191
  - With neither, the judged axes are skipped — the grade and deterministic quality
@@ -195,9 +198,10 @@ The same `polygraphso-litmus-mcp` server exposes two skill tools (plus `grade-sk
195
198
 
196
199
  - **`run_skill_litmus`** — grade a local skill directory now (static; uses the host
197
200
  model via sampling for the quality axes, no key).
198
- - **`verify_skill_attestation`** — read a skill's *already-published* grade. It returns
199
- the attested `contentHash`; recompute the skill's hash and require equality before
200
- installing the content hash, not the version, is the trust anchor.
201
+ - **`verify_skill_attestation`** — read a skill's *already-published* grade by its
202
+ `skill_ref` (`source/owner/repo#path`, e.g. `github/anthropics/skills#skills/pdf`). It
203
+ returns the attested `contentHash`; recompute the skill's hash and require equality
204
+ before installing — the content hash, not the version, is the trust anchor.
201
205
 
202
206
  ## Library
203
207
 
@@ -44,7 +44,7 @@ async function runLitmusCli(args) {
44
44
  );
45
45
  return 2;
46
46
  }
47
- const { runLitmus } = await import("./src-TG44QXFV.js");
47
+ const { runLitmus } = await import("./src-TMJOIVGB.js");
48
48
  const input = resolveTarget(target);
49
49
  try {
50
50
  const bundle = await runLitmus(input, { headers, allowStateChanging });
@@ -1078,15 +1078,29 @@ function instructionMimicry(text) {
1078
1078
  }
1079
1079
  return findings;
1080
1080
  }
1081
- var SENSITIVE_QUERY_KEY = /(?:^|[?&])[^=&]*(?:key|token|secret|password|passwd|auth|session|cookie|canary|api|env|cred)[^=&]*=/i;
1081
+ var SENSITIVE_QUERY_KEY_TOKENS = [
1082
+ "key",
1083
+ "token",
1084
+ "secret",
1085
+ "password",
1086
+ "passwd",
1087
+ "auth",
1088
+ "session",
1089
+ "cookie",
1090
+ "canary",
1091
+ "api",
1092
+ "env",
1093
+ "cred"
1094
+ ];
1082
1095
  function looksExfilQuery(url) {
1083
1096
  const q = url.indexOf("?");
1084
1097
  if (q < 0) return false;
1085
1098
  const query = url.slice(q + 1);
1086
- if (SENSITIVE_QUERY_KEY.test(url)) return true;
1087
1099
  for (const pair of query.split("&")) {
1088
1100
  const eq = pair.indexOf("=");
1089
1101
  if (eq < 0) continue;
1102
+ const key = pair.slice(0, eq).toLowerCase();
1103
+ if (SENSITIVE_QUERY_KEY_TOKENS.some((t) => key.includes(t))) return true;
1090
1104
  let v = pair.slice(eq + 1);
1091
1105
  try {
1092
1106
  v = decodeURIComponent(v);
@@ -1100,7 +1114,7 @@ function looksExfilQuery(url) {
1100
1114
  }
1101
1115
  function markdownTricks(text) {
1102
1116
  const findings = [];
1103
- const proto = /\b(?:javascript|data):[^\s)"'<>]+/gi;
1117
+ const proto = /\b(?:javascript|data):[^\s)"'<>*`]+/gi;
1104
1118
  for (let m = proto.exec(text); m; m = proto.exec(text)) {
1105
1119
  findings.push({
1106
1120
  kind: "markdown-trick",
@@ -1109,7 +1123,7 @@ function markdownTricks(text) {
1109
1123
  offset: m.index
1110
1124
  });
1111
1125
  }
1112
- const exfilImg = /!?\[[^\]]*\]\((https?:\/\/[^)\s]*\?[^)\s]*=[^)\s]*)\)/gi;
1126
+ const exfilImg = /!?\[[^\]]{0,200}\]\((https?:\/\/[^)\s?]{0,400}\?[^)\s=]{0,200}=[^)\s]{0,200})\)/gi;
1113
1127
  for (let m = exfilImg.exec(text); m; m = exfilImg.exec(text)) {
1114
1128
  const url = m[1] ?? m[0];
1115
1129
  if (!looksExfilQuery(url)) continue;
@@ -1126,7 +1140,9 @@ var INTERNALS_LEAK = [
1126
1140
  // V8 / Node stack frame: `at fn (/abs/file.js:12:5)` or `at /abs/file.js:12:5`
1127
1141
  // (a leading path/drive/`node:`/`file:` is required, so a "meet at 10:30:45"
1128
1142
  // timestamp can't trip it).
1129
- /^\s*at\s+(?:.*\s)?\(?(?:\/|[A-Za-z]:[\\/]|node:|file:\/\/)[^\s()]*:\d+:\d+\)?\s*$/m,
1143
+ // Bounded quantifiers ({0,300}) keep this linear: overlapping `.*\s` + `[^\s()]*`
1144
+ // + trailing `\s*$` over untrusted output is otherwise polynomial (js/polynomial-redos).
1145
+ /^\s*at\s+(?:[^\n]{0,300}\s)?\(?(?:\/|[A-Za-z]:[\\/]|node:|file:\/\/)[^\s()]{0,300}:\d+:\d+\)?\s*$/m,
1130
1146
  // Node uncaught-rejection / fatal banners.
1131
1147
  /\b(?:UnhandledPromiseRejection(?:Warning)?|unhandledRejection|FATAL ERROR:|Fatal error:)\b/,
1132
1148
  // Python traceback header + frame.
@@ -1138,8 +1154,9 @@ var INTERNALS_LEAK = [
1138
1154
  // Go panic with its goroutine dump (`panic: … goroutine 1 [running]:`).
1139
1155
  /\bpanic:[\s\S]{0,300}?\bgoroutine\s+\d+\s+\[/,
1140
1156
  // Ruby backtrace frame (`from app.rb:10:in 'method'` / older backtick form);
1141
- // requires a `.rb` file + `:line:in` so prose can't trip it.
1142
- /[\w./-]+\.rb:\d+:in\s+['\x60]/,
1157
+ // requires a `.rb` file + `:line:in` so prose can't trip it. The lookbehind +
1158
+ // bounded run keep `[\w./-]+\.rb` linear (the `.`-overlap is otherwise polynomial).
1159
+ /(?<![\w./-])[\w./-]{1,200}\.rb:\d+:in\s+['\x60]/,
1143
1160
  // .NET stack frame (`at NS.Method() in C:\path\File.cs:line 12`).
1144
1161
  /\bat\s+[\w.<>+]+\([^)]*\)\s+in\s+\S+:line\s+\d+/i,
1145
1162
  // Rust panic banner (`thread 'main' panicked at …`).
@@ -2282,7 +2299,7 @@ var SINK = /(?:https?:\/\/\S+|\bto\s+(?:a\s+|an\s+|the\s+|your\s+|our\s+)?(?:rem
2282
2299
  function exfilInstruction(text) {
2283
2300
  const findings = [];
2284
2301
  const stripped = stripExamples(text);
2285
- for (const raw of stripped.split(/(?<=[.!?\n])/)) {
2302
+ for (const raw of stripped.split(/(?<=[.!?])\s+|\n/)) {
2286
2303
  const sentence = raw.trim();
2287
2304
  if (!sentence) continue;
2288
2305
  if (TRANSMIT_VERB.test(sentence) && SECRET_NOUN.test(sentence) && SINK.test(sentence)) {
@@ -2439,7 +2456,9 @@ function runSkillLitmus(dir, opts = {}) {
2439
2456
 
2440
2457
  // ../probes/src/skills/quality-judge.ts
2441
2458
  function openAICompatJudge(cfg) {
2442
- const url = `${cfg.baseUrl.replace(/\/+$/, "")}/chat/completions`;
2459
+ let base = cfg.baseUrl;
2460
+ while (base.endsWith("/")) base = base.slice(0, -1);
2461
+ const url = `${base}/chat/completions`;
2443
2462
  return {
2444
2463
  id: `openai-compat:${cfg.model}`,
2445
2464
  async complete(system, user) {
@@ -1,14 +1,14 @@
1
1
  import {
2
2
  parseAuthFlags,
3
3
  resolveTarget
4
- } from "./chunk-M5HXKZVN.js";
4
+ } from "./chunk-BUKDFSDO.js";
5
5
  import {
6
6
  SKILL_METHODOLOGY_VERSION,
7
7
  runLitmus,
8
8
  runSkillLitmus,
9
9
  runSkillQuality,
10
10
  runSkillQualityJudged
11
- } from "./chunk-DN2OX4RT.js";
11
+ } from "./chunk-RYJXVMCT.js";
12
12
  import {
13
13
  CATEGORY_STATUS_UINT8,
14
14
  METHODOLOGY_VERSION,
package/dist/cli-skill.js CHANGED
@@ -4,7 +4,7 @@ import {
4
4
  runSkillLitmus,
5
5
  runSkillQuality,
6
6
  runSkillQualityJudged
7
- } from "./chunk-DN2OX4RT.js";
7
+ } from "./chunk-RYJXVMCT.js";
8
8
  import "./chunk-44R4ZYOE.js";
9
9
 
10
10
  // src/cli-skill.ts
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-M5HXKZVN.js";
4
+ } from "./chunk-BUKDFSDO.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
package/dist/index.js CHANGED
@@ -31,11 +31,11 @@ import {
31
31
  skillAttestationFields,
32
32
  skillSchemaUID,
33
33
  verifySkillInputShape
34
- } from "./chunk-AVF3GYCS.js";
34
+ } from "./chunk-Z66GKAQD.js";
35
35
  import {
36
36
  parseAuthFlags,
37
37
  resolveTarget
38
- } from "./chunk-M5HXKZVN.js";
38
+ } from "./chunk-BUKDFSDO.js";
39
39
  import {
40
40
  SKILL_BUNDLE_SCHEMA_VERSION,
41
41
  SKILL_METHODOLOGY_VERSION,
@@ -68,7 +68,7 @@ import {
68
68
  skillInjectionFails,
69
69
  stateChangingToolNames,
70
70
  stripExamples
71
- } from "./chunk-DN2OX4RT.js";
71
+ } from "./chunk-RYJXVMCT.js";
72
72
  import {
73
73
  BUNDLE_SCHEMA_VERSION,
74
74
  CATEGORY_STATUS_UINT8,
package/dist/mcp.js CHANGED
@@ -20,11 +20,11 @@ import {
20
20
  runSkillLitmusInputShape,
21
21
  verifyInputShape,
22
22
  verifySkillInputShape
23
- } from "./chunk-AVF3GYCS.js";
24
- import "./chunk-M5HXKZVN.js";
23
+ } from "./chunk-Z66GKAQD.js";
24
+ import "./chunk-BUKDFSDO.js";
25
25
  import {
26
26
  judgeFromEnv
27
- } from "./chunk-DN2OX4RT.js";
27
+ } from "./chunk-RYJXVMCT.js";
28
28
  import "./chunk-44R4ZYOE.js";
29
29
 
30
30
  // src/mcp.ts
@@ -30,7 +30,7 @@ import {
30
30
  skillInjectionFails,
31
31
  stateChangingToolNames,
32
32
  stripExamples
33
- } from "./chunk-DN2OX4RT.js";
33
+ } from "./chunk-RYJXVMCT.js";
34
34
  import "./chunk-44R4ZYOE.js";
35
35
  export {
36
36
  SKILL_BUNDLE_SCHEMA_VERSION,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.9.0",
3
+ "version": "0.9.1",
4
4
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
5
  "license": "Apache-2.0",
6
6
  "homepage": "https://polygraph.so",
@@ -63,9 +63,9 @@
63
63
  "typescript": "^5.9.3",
64
64
  "vitest": "^2.1.0",
65
65
  "@polygraph/core": "0.0.0",
66
- "@polygraph/probes": "0.0.0",
67
66
  "@polygraph/onchain": "0.0.0",
68
67
  "@polygraph/agent": "0.0.0",
68
+ "@polygraph/probes": "0.0.0",
69
69
  "@polygraph/cli": "0.0.0",
70
70
  "@polygraph/mcp": "0.0.0"
71
71
  },