@polygraphso/litmus 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -109,7 +109,11 @@ claude mcp add polygraph-litmus -e POLYGRAPH_API_URL=https://polygraph.so \
109
109
  -- npx -y -p @polygraphso/litmus polygraphso-litmus-mcp
110
110
  ```
111
111
 
112
- **Claude Desktop** (`claude_desktop_config.json`) / **Cursor** (`~/.cursor/mcp.json`):
112
+ **Cursor** one-click install:
113
+
114
+ [![Add polygraph-litmus to Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](cursor://anysphere.cursor-deeplink/mcp/install?name=polygraph-litmus&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsIi1wIiwiQHBvbHlncmFwaHNvL2xpdG11cyIsInBvbHlncmFwaHNvLWxpdG11cy1tY3AiXSwiZW52Ijp7IlBPTFlHUkFQSF9BUElfVVJMIjoiaHR0cHM6Ly9wb2x5Z3JhcGguc28ifX0=)
115
+
116
+ Or wire it up by hand — **Claude Desktop** (`claude_desktop_config.json`) / **Cursor** (`~/.cursor/mcp.json`):
113
117
 
114
118
  ```json
115
119
  {
@@ -1,6 +1,6 @@
1
1
  // ../core/src/types.ts
2
2
  var METHODOLOGY_VERSION = "litmus-v5";
3
- var BUNDLE_SCHEMA_VERSION = "1.4.0";
3
+ var BUNDLE_SCHEMA_VERSION = "1.5.0";
4
4
  var CATEGORY_META = {
5
5
  "C-01": { label: "tool-output injection", description: "whether it tries to hijack the caller through tool output" },
6
6
  "C-02": { label: "permission / egress overreach", description: "whether it reaches the network beyond what it declares" },
@@ -3,7 +3,7 @@ import {
3
3
  METHODOLOGY_VERSION,
4
4
  parseServerRef,
5
5
  serverKey
6
- } from "./chunk-X3P26XGS.js";
6
+ } from "./chunk-CKQZFK77.js";
7
7
 
8
8
  // ../probes/src/harness.ts
9
9
  import { execFile as execFile3 } from "child_process";
@@ -638,6 +638,9 @@ function makeResult(client, kind, descriptor, serverRef, resolvedVersion, teardo
638
638
  descriptor,
639
639
  serverRef,
640
640
  resolvedVersion,
641
+ // The server's self-reported identity from the initialize handshake. The SDK
642
+ // exposes it post-connect via getServerVersion(); absent/blank → null.
643
+ selfReportedVersion: client.getServerVersion()?.version ?? null,
641
644
  teardown: async () => {
642
645
  try {
643
646
  await client.close();
@@ -2018,6 +2021,7 @@ function assembleBundle(input) {
2018
2021
  methodologyVersion: METHODOLOGY_VERSION,
2019
2022
  serverRef: input.serverRef,
2020
2023
  resolvedVersion: input.resolvedVersion,
2024
+ selfReportedVersion: input.selfReportedVersion,
2021
2025
  target: input.target,
2022
2026
  toolDefsFingerprint: input.toolDefsFingerprint,
2023
2027
  toolDefs: input.toolDefs,
@@ -2100,6 +2104,7 @@ async function runLitmus(target, opts = {}) {
2100
2104
  return assembleBundle({
2101
2105
  serverRef: conn.serverRef,
2102
2106
  resolvedVersion: conn.resolvedVersion,
2107
+ selfReportedVersion: conn.selfReportedVersion,
2103
2108
  // Surface the server's declared egress in the bundle (disclosure: a
2104
2109
  // declaration is not exoneration — the consumer/agent-gate can judge).
2105
2110
  target: egress.declaredEgress.length ? { ...conn.descriptor, declaredEgress: egress.declaredEgress } : conn.descriptor,
@@ -3,7 +3,7 @@ import {
3
3
  checkHostExec,
4
4
  parseAuthFlags,
5
5
  resolveTarget
6
- } from "./chunk-EMMCE3LC.js";
6
+ } from "./chunk-TTGWSGPC.js";
7
7
  import {
8
8
  SKILL_CATEGORY_META,
9
9
  SKILL_METHODOLOGY_VERSION,
@@ -11,7 +11,7 @@ import {
11
11
  runSkillLitmus,
12
12
  runSkillQuality,
13
13
  runSkillQualityJudged
14
- } from "./chunk-NPYDTMQ7.js";
14
+ } from "./chunk-OGOFUBLN.js";
15
15
  import {
16
16
  CATEGORY_META,
17
17
  CATEGORY_STATUS_UINT8,
@@ -20,7 +20,7 @@ import {
20
20
  parseSkillRef,
21
21
  serverKey,
22
22
  skillKey
23
- } from "./chunk-X3P26XGS.js";
23
+ } from "./chunk-CKQZFK77.js";
24
24
 
25
25
  // ../onchain/src/networks.ts
26
26
  var NETWORKS = {
@@ -362,6 +362,9 @@ function summarize(b) {
362
362
  summary: b.gradeRationale,
363
363
  serverRef: b.serverRef,
364
364
  resolvedVersion: b.resolvedVersion,
365
+ // The server's self-asserted serverInfo.version — descriptive only, not a
366
+ // re-fetchable pin (cf. resolvedVersion). Null when the server reports none.
367
+ selfReportedVersion: b.selfReportedVersion,
365
368
  fingerprint: b.toolDefsFingerprint,
366
369
  ranAt: b.ranAt,
367
370
  methodologyVersion: b.methodologyVersion,
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  CATEGORY_META,
3
3
  canonicalStringify
4
- } from "./chunk-X3P26XGS.js";
4
+ } from "./chunk-CKQZFK77.js";
5
5
 
6
6
  // ../cli/src/litmus.ts
7
7
  import { existsSync } from "fs";
@@ -13,6 +13,7 @@ function formatBundle(b) {
13
13
  const lines = [];
14
14
  lines.push(`\u2192 ${b.methodologyVersion} \xB7 ${b.serverRef}`);
15
15
  if (b.resolvedVersion) lines.push(`\u2192 version ${b.resolvedVersion}`);
16
+ if (b.selfReportedVersion) lines.push(`\u2192 self-reported ${b.selfReportedVersion} (unverified)`);
16
17
  lines.push("\u2192 checks");
17
18
  const labelWidth = Math.max(0, ...b.categories.map((c) => CATEGORY_META[c.code].label.length));
18
19
  for (const c of b.categories) {
@@ -54,7 +55,7 @@ async function runLitmusCli(args) {
54
55
  const input = resolveTarget(target);
55
56
  const isStdio = typeof input !== "string" || !/^https?:\/\//i.test(input);
56
57
  const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY);
57
- const probes = await import("./src-4L5VHLRF.js");
58
+ const probes = await import("./src-ZHTFCKNR.js");
58
59
  const dockerAvailable = isStdio && interactive ? await probes.isDockerAvailable() : false;
59
60
  const decision = checkHostExec(input, { optIn: unsafeHostExec, dockerAvailable, interactive });
60
61
  if (decision.action === "refuse") {
package/dist/cli-skill.js CHANGED
@@ -5,8 +5,8 @@ import {
5
5
  runSkillLitmus,
6
6
  runSkillQuality,
7
7
  runSkillQualityJudged
8
- } from "./chunk-NPYDTMQ7.js";
9
- import "./chunk-X3P26XGS.js";
8
+ } from "./chunk-OGOFUBLN.js";
9
+ import "./chunk-CKQZFK77.js";
10
10
 
11
11
  // src/cli-skill.ts
12
12
  import { statSync } from "fs";
package/dist/cli.js CHANGED
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runLitmusCli
4
- } from "./chunk-EMMCE3LC.js";
4
+ } from "./chunk-TTGWSGPC.js";
5
5
  import {
6
6
  parseServerRef,
7
7
  serverKey
8
- } from "./chunk-X3P26XGS.js";
8
+ } from "./chunk-CKQZFK77.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import { readFileSync } from "fs";
package/dist/index.d.ts CHANGED
@@ -30,12 +30,14 @@ type Registry = "npm" | "pypi" | "github";
30
30
  * not branch on it. */
31
31
  declare const METHODOLOGY_VERSION: "litmus-v5";
32
32
  /** Evidence-bundle format version (owned by onchain-proof-spec §2).
33
+ * 1.5.0 adds the optional `selfReportedVersion` field (the server's
34
+ * self-asserted `serverInfo.version`, descriptive metadata only);
33
35
  * 1.4.0 adds the C-01 probe id `1.3` (second-order injection, litmus-v5);
34
36
  * 1.3.0 adds the optional C-04 category and the `internals-leak`/`crash` finding
35
37
  * kinds (litmus-v4); 1.2.0 adds the optional `target.declaredEgress` field and
36
38
  * the `egress-allowed` finding kind (litmus-v3); 1.1.0 adds
37
39
  * `harness.stdioIsolation`; older remain valid. */
38
- declare const BUNDLE_SCHEMA_VERSION: "1.4.0";
40
+ declare const BUNDLE_SCHEMA_VERSION: "1.5.0";
39
41
  type CategoryCode = "C-01" | "C-02" | "C-03" | "C-04";
40
42
  /**
41
43
  * Plain-English label + one-line description for each probe category, so CLI and
@@ -117,8 +119,15 @@ interface EvidenceBundle {
117
119
  methodologyVersion: string;
118
120
  /** Canonical, versionless identity (serverKey). */
119
121
  serverRef: string;
120
- /** The exact version actually run. */
122
+ /** The exact version actually run — a re-fetchable pin (npm/pypi version,
123
+ * skill commit). Null when the target has no such identity (remote URL,
124
+ * unpinned ref). This is the reproducibility anchor. */
121
125
  resolvedVersion: string | null;
126
+ /** The version the server reports about *itself* in the MCP `initialize`
127
+ * handshake (`serverInfo.version`). Self-asserted and operator-controlled —
128
+ * descriptive metadata only, never a reproducibility anchor (cf.
129
+ * resolvedVersion). Null when the server reports none. */
130
+ selfReportedVersion: string | null;
122
131
  target: TargetDescriptor;
123
132
  /** sha256 of the canonical tool surface → `0x` + 64 hex (bytes32). */
124
133
  toolDefsFingerprint: string;
@@ -255,6 +264,9 @@ interface ConnectedTarget {
255
264
  /** Canonical versionless identity (serverKey), the URL, or the command line. */
256
265
  serverRef: string;
257
266
  resolvedVersion: string | null;
267
+ /** The server's self-asserted `serverInfo.version` from the MCP handshake.
268
+ * Descriptive metadata only (see EvidenceBundle.selfReportedVersion). */
269
+ selfReportedVersion: string | null;
258
270
  teardown: () => Promise<void>;
259
271
  }
260
272
  interface ConnectOptions {
@@ -421,6 +433,7 @@ declare function gradeFromCategories(categories: readonly CategoryResult[]): Gra
421
433
  interface BundleInput {
422
434
  serverRef: string;
423
435
  resolvedVersion: string | null;
436
+ selfReportedVersion: string | null;
424
437
  target: TargetDescriptor;
425
438
  toolDefsFingerprint: string;
426
439
  toolDefs: ToolDef[];
package/dist/index.js CHANGED
@@ -31,11 +31,11 @@ import {
31
31
  skillAttestationFields,
32
32
  skillSchemaUID,
33
33
  verifySkillInputShape
34
- } from "./chunk-TK4EI66E.js";
34
+ } from "./chunk-PTWDLGI5.js";
35
35
  import {
36
36
  parseAuthFlags,
37
37
  resolveTarget
38
- } from "./chunk-EMMCE3LC.js";
38
+ } from "./chunk-TTGWSGPC.js";
39
39
  import {
40
40
  SKILL_BUNDLE_SCHEMA_VERSION,
41
41
  SKILL_CATEGORY_META,
@@ -71,7 +71,7 @@ import {
71
71
  skillInjectionFails,
72
72
  stateChangingToolNames,
73
73
  stripExamples
74
- } from "./chunk-NPYDTMQ7.js";
74
+ } from "./chunk-OGOFUBLN.js";
75
75
  import {
76
76
  BUNDLE_SCHEMA_VERSION,
77
77
  CATEGORY_META,
@@ -86,7 +86,7 @@ import {
86
86
  parseSkillRef,
87
87
  serverKey,
88
88
  skillKey
89
- } from "./chunk-X3P26XGS.js";
89
+ } from "./chunk-CKQZFK77.js";
90
90
 
91
91
  // ../agent/src/gate.ts
92
92
  function sameServer(a, b) {
package/dist/mcp.js CHANGED
@@ -20,12 +20,12 @@ import {
20
20
  runSkillLitmusInputShape,
21
21
  verifyInputShape,
22
22
  verifySkillInputShape
23
- } from "./chunk-TK4EI66E.js";
24
- import "./chunk-EMMCE3LC.js";
23
+ } from "./chunk-PTWDLGI5.js";
24
+ import "./chunk-TTGWSGPC.js";
25
25
  import {
26
26
  judgeFromEnv
27
- } from "./chunk-NPYDTMQ7.js";
28
- import "./chunk-X3P26XGS.js";
27
+ } from "./chunk-OGOFUBLN.js";
28
+ import "./chunk-CKQZFK77.js";
29
29
 
30
30
  // src/mcp.ts
31
31
  import { realpathSync } from "fs";
@@ -33,8 +33,8 @@ import {
33
33
  skillInjectionFails,
34
34
  stateChangingToolNames,
35
35
  stripExamples
36
- } from "./chunk-NPYDTMQ7.js";
37
- import "./chunk-X3P26XGS.js";
36
+ } from "./chunk-OGOFUBLN.js";
37
+ import "./chunk-CKQZFK77.js";
38
38
  export {
39
39
  SKILL_BUNDLE_SCHEMA_VERSION,
40
40
  SKILL_CATEGORY_META,
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "@polygraphso/litmus",
3
- "version": "0.11.0",
3
+ "version": "0.12.1",
4
+ "mcpName": "io.github.polygraphso/litmus",
4
5
  "description": "Behavioral litmus harness for MCP servers — grade a server A–F (tool-output injection, egress, sensitive-data, adversarial-input) with reproducible, content-addressed evidence. Ships a CLI and an MCP server with a run_litmus tool for AI agents.",
5
6
  "license": "Apache-2.0",
6
7
  "homepage": "https://polygraph.so",
@@ -63,11 +64,11 @@
63
64
  "typescript": "^5.9.3",
64
65
  "vitest": "^2.1.0",
65
66
  "@polygraph/core": "0.0.0",
66
- "@polygraph/onchain": "0.0.0",
67
+ "@polygraph/probes": "0.0.0",
67
68
  "@polygraph/agent": "0.0.0",
69
+ "@polygraph/onchain": "0.0.0",
68
70
  "@polygraph/mcp": "0.0.0",
69
- "@polygraph/cli": "0.0.0",
70
- "@polygraph/probes": "0.0.0"
71
+ "@polygraph/cli": "0.0.0"
71
72
  },
72
73
  "publishConfig": {
73
74
  "access": "public"