@polygraphso/litmus 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -46,7 +46,7 @@ type LitmusGrade = "A" | "B" | "C" | "D" | "F";
46
46
  type Severity = "low" | "medium" | "high";
47
47
  /** uint8 encoding for per-category verdicts on the attestation (onchain-proof-spec §5). */
48
48
  declare const CATEGORY_STATUS_UINT8: Record<CategoryStatus, number>;
49
- type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "egress-allowed" | "permission-mislabel" | "internals-leak" | "crash";
49
+ type FindingKind = "invisible-unicode" | "instruction-mimicry" | "markdown-trick" | "canary" | "egress" | "egress-allowed" | "permission-mislabel" | "internals-leak" | "crash" | "exfil-instruction" | "dangerous-command" | "over-broad-trigger";
50
50
  interface Finding {
51
51
  kind: FindingKind;
52
52
  severity: Severity;
@@ -56,6 +56,8 @@ interface Finding {
56
56
  offset?: number;
57
57
  /** Offending tool name, when the finding is tied to one. */
58
58
  tool?: string;
59
+ /** Offending bundled file (relative path), for skill findings tied to a file. */
60
+ file?: string;
59
61
  host?: string;
60
62
  port?: number;
61
63
  firstBytes?: string;
@@ -162,6 +164,43 @@ declare function formatServerRef(parts: ParsedServerRef): string;
162
164
  /** Identity of a server without a version pin. */
163
165
  declare function serverKey(parts: Pick<ParsedServerRef, "registry" | "owner" | "name">): string;
164
166
 
167
+ /**
168
+ * Skill-identity helpers for refs of the form `{source}/{owner}/{name}[#{path}][@{ref}]`.
169
+ *
170
+ * Deliberately a SEPARATE namespace from the server `Registry` (identity.ts): a
171
+ * skill grade must never be readable as a server grade, so `SkillSource` and
172
+ * `Registry` do not overlap and the skill attestation uses its own EAS schema UID.
173
+ *
174
+ * A skill is static content, so the trust anchor is a CONTENT HASH of the whole
175
+ * directory (load-skill.ts), and the `@{ref}` pin should be IMMUTABLE (a git commit
176
+ * sha or the contentHash itself), never a mutable tag — there is no live re-
177
+ * fingerprint to catch drift, so the pin is all that binds a grade to the bytes.
178
+ *
179
+ * Examples:
180
+ * github/anthropic/skills#document-skills/pdf@a1b2c3d (repo + subdir + commit)
181
+ * marketplace/acme/format-markdown (a marketplace coordinate)
182
+ * npm/@acme/skills#skills/tidy@1.4.0 (a skill shipped in a pkg)
183
+ */
184
+ type SkillSource = "github" | "marketplace" | "npm";
185
+ interface ParsedSkillRef {
186
+ source: SkillSource;
187
+ /** Null for sources that don't namespace by owner (rare); usually present. */
188
+ owner: string | null;
189
+ name: string;
190
+ /** Subdirectory of the skill within the source (the SKILL.md folder), or null. */
191
+ path: string | null;
192
+ /** Immutable content pin (commit sha / contentHash). Mutable tags are discouraged. */
193
+ ref: string | null;
194
+ }
195
+ declare class SkillRefParseError extends Error {
196
+ constructor(ref: string, reason: string);
197
+ }
198
+ declare function parseSkillRef(ref: string): ParsedSkillRef;
199
+ declare function formatSkillRef(p: ParsedSkillRef): string;
200
+ /** Versionless identity of a skill (drops the `@ref` pin, keeps the `#path` — a
201
+ * repo can hold many skills, so the path is part of the identity). */
202
+ declare function skillKey(p: Pick<ParsedSkillRef, "source" | "owner" | "name" | "path">): string;
203
+
165
204
  /**
166
205
  * Deterministic JSON for content-addressing the evidence bundle
167
206
  * (onchain-proof-spec §2). Object keys are sorted lexicographically (recursively)
@@ -382,6 +421,261 @@ declare function canaryMatch(text: string, canaries: readonly string[]): Finding
382
421
  /** True if any finding is high-severity (the C-01 fail bar). */
383
422
  declare function hasHighSeverity(findings: readonly Finding[]): boolean;
384
423
 
424
+ /**
425
+ * Skill grading rubric — a strict structural mirror of `grade.ts gradeFromCategories`,
426
+ * for skill categories (S-*). Fail-first, always with a rationale.
427
+ *
428
+ * F — any S-01 (injection) or S-03 (exfil instruction) failure; disqualifying.
429
+ * D — any S-04 (dangerous bundled command) or S-05 (tool/permission overreach)
430
+ * failure, with no S-01/S-03 failure; capped.
431
+ * A — all present categories pass.
432
+ * B — S-01 & S-03 pass but a category was skipped (e.g. no bundle ⇒ S-04 skipped);
433
+ * a skipped category never grants A.
434
+ * F — fallthrough when S-01 did not complete (e.g. an unparseable SKILL.md):
435
+ * ungraded == unsafe.
436
+ *
437
+ * STRICT ALPHABET: skills emit A/B/D/F only — never "C". The agent gate's
438
+ * `DEFAULT_PASSING` is {A,B,C} and the hosted store rejects "C" (publish-check.ts),
439
+ * so a stray "C" would silently become a transacting grade. A "works but smells"
440
+ * signal belongs in the separate, non-letter quality channel.
441
+ */
442
+
443
+ type SkillCategoryCode = "S-01" | "S-03" | "S-04" | "S-05";
444
+ interface SkillCategoryResult {
445
+ code: SkillCategoryCode;
446
+ status: CategoryStatus;
447
+ reason?: string | null;
448
+ findings: Finding[];
449
+ }
450
+ interface SkillGrade {
451
+ grade: LitmusGrade;
452
+ rationale: string;
453
+ }
454
+ declare function gradeSkillCategories(categories: readonly SkillCategoryResult[]): SkillGrade;
455
+
456
+ /**
457
+ * Skill litmus harness — runs the deterministic static safety scan over a loaded
458
+ * skill and produces a content-addressed evidence bundle with an A/B/D/F letter.
459
+ *
460
+ * v1 (litmus-skill-v1) is STATIC ONLY: it scans the SKILL.md body + frontmatter
461
+ * (S-01 injection, S-03 exfil instructions) and the bundled executable files (S-04
462
+ * dangerous commands). It does NOT execute anything — bundled-script sandboxing and
463
+ * the agent-in-the-loop quality signal are out of scope here, by design. The
464
+ * disclaimer states the residual plainly: a static A is not behavioral proof.
465
+ */
466
+
467
+ declare const SKILL_METHODOLOGY_VERSION: "litmus-skill-v1";
468
+ declare const SKILL_BUNDLE_SCHEMA_VERSION: "0.1.0";
469
+ interface SkillEvidenceBundle {
470
+ schemaVersion: string;
471
+ methodologyVersion: string;
472
+ /** Caller-supplied identity (defaults to the directory). */
473
+ skillRef: string;
474
+ /** `0x` + 64 hex sha256 over the skill's file tree (the rug-pull anchor). */
475
+ contentHash: string;
476
+ ranAt: string;
477
+ harness: {
478
+ package: string;
479
+ version: string;
480
+ node: string;
481
+ };
482
+ categories: SkillCategoryResult[];
483
+ /** Non-letter signals (over-broad trigger, MED-only dangerous commands): recorded,
484
+ * never floor the grade. The semantic honesty/overreach checks (S-02/S-05) and the
485
+ * quality signal also land here / in a separate artifact, never in `categories`. */
486
+ advisories: Finding[];
487
+ grade: LitmusGrade;
488
+ gradeRationale: string;
489
+ disclaimer: string;
490
+ }
491
+ interface RunSkillLitmusOptions {
492
+ skillRef?: string;
493
+ /** Injectable for deterministic bundles/tests; defaults to now. */
494
+ ranAt?: string;
495
+ harnessVersion?: string;
496
+ }
497
+ declare function runSkillLitmus(dir: string, opts?: RunSkillLitmusOptions): SkillEvidenceBundle;
498
+
499
+ declare class SkillLoadError extends Error {
500
+ }
501
+ interface SkillFile {
502
+ /** NFC, forward-slash path relative to the skill dir. */
503
+ relPath: string;
504
+ bytes: Buffer;
505
+ /** A bundled executable (shebang or known interpreter extension) — the S-04 surface. */
506
+ isExecutable: boolean;
507
+ }
508
+ interface LoadedSkill {
509
+ dir: string;
510
+ /** Raw frontmatter text (between the leading `---` fences), "" if none. */
511
+ frontmatter: string;
512
+ /** Extracted `description` value, "" if absent. */
513
+ description: string;
514
+ /** Markdown body after the frontmatter. */
515
+ body: string;
516
+ /** Every file in the tree (including SKILL.md), sorted by relPath. */
517
+ files: SkillFile[];
518
+ /** `0x` + 64 hex sha256 over the canonical file-tree manifest. */
519
+ contentHash: string;
520
+ }
521
+ /** Load a skill from a directory. Throws SkillLoadError if there is no SKILL.md. */
522
+ declare function loadSkill(dir: string): LoadedSkill;
523
+
524
+ /**
525
+ * Skill safety scanners (S-01) — the deterministic, reuse-first core of the skill
526
+ * litmus. A skill's graded input is STATIC bytes (frontmatter + markdown body +
527
+ * bundled files), so these are pure functions over text, exactly like
528
+ * `probes/scanners.ts`. They are calibrated against a real skill corpus
529
+ * (`scripts/skill-fp-benchmark.ts`), not against tool docs.
530
+ *
531
+ * Recalibration (Phase 0 gate result, 110 real skills):
532
+ * - Scan the EXAMPLE-STRIPPED body: fenced/inline code and blockquoted transcript
533
+ * lines are where role tags / `system:` / tool-call JSON legitimately appear.
534
+ * - The bare `system:` colon pattern from `instructionMimicry` is dropped for
535
+ * skills: "design system:", "billing system:", "operating system:" are pervasive
536
+ * in honest skill prose and were the ONLY false-fail in the corpus. The
537
+ * angle-bracket role-tag pattern still covers the `<system>` injection shape, so
538
+ * no real injection signal is lost. After this, the corpus false-fail rate is 0.
539
+ */
540
+
541
+ /**
542
+ * The reference prose-segmentation for skills. Pinned as part of the skill
543
+ * methodology: "same bytes → same letter UNDER THIS PARSER". Strips fenced code
544
+ * (``` and ~~~), inline code spans, and blockquoted lines — the example/transcript
545
+ * surface — leaving the directive prose that an injection would have to live in to
546
+ * actually steer the agent.
547
+ */
548
+ declare function stripExamples(md: string): string;
549
+ /**
550
+ * S-01 — instruction-body injection / context-poisoning. Reuses the existing
551
+ * text scanners verbatim, over the example-stripped body, minus the over-broad
552
+ * bare-`system:` pattern. HIGH findings floor the letter to F (see grade-skill).
553
+ */
554
+ declare function skillInjection(body: string): Finding[];
555
+ /** True if S-01 should fail (any HIGH finding), the C-01-parity fail bar. */
556
+ declare function skillInjectionFails(findings: readonly Finding[]): boolean;
557
+ /**
558
+ * S-03 — an imperative instruction to exfiltrate secrets to a sink. Deliberately
559
+ * HIGH-PRECISION: a transmit verb, a credential/secret noun, AND a network sink
560
+ * must co-occur in ONE sentence. Prose that merely *mentions* credentials, or a
561
+ * security skill that *documents* an exfil attack across paragraphs, will not trip.
562
+ * The residual (an exfil instruction split across sentences, or constructed at
563
+ * agent runtime) is a disclosed limit — static prose scanning cannot resolve it.
564
+ */
565
+ declare function exfilInstruction(text: string): Finding[];
566
+ /**
567
+ * S-04 — dangerous commands in a bundled EXECUTABLE FILE. Scanning files (not body
568
+ * prose) collapses the "taught vs executed" ambiguity: a file with a shebang IS the
569
+ * executable. Obfuscated payloads (base64/hex blobs) are decoded and re-scanned so
570
+ * an encoded `curl … | sh` is still caught. HIGH findings floor the category to D.
571
+ */
572
+ declare function dangerousCommand(text: string, file?: string): Finding[];
573
+ /** Advisory: a frontmatter description/trigger that claims to fire on everything.
574
+ * Pure-lexical, the only deterministic slice of honesty checking; recorded as an
575
+ * advisory finding, NOT a failing category (see the plan: S-02/S-05 are advisory). */
576
+ declare function overBroadTrigger(description: string): Finding[];
577
+
578
+ /**
579
+ * Optional LLM-judged quality axes — the "is it honest / coherent" signal that
580
+ * static scanning provably cannot decide (this is the semantic S-02 we kept OUT of
581
+ * the deterministic letter). It is ADVISORY, NON-DETERMINISTIC, and provider-
582
+ * AGNOSTIC: it runs against any `Judge`, never floors the safety letter, and is
583
+ * never minted.
584
+ *
585
+ * Provider-agnostic by design:
586
+ * - inside an agent, the host model judges via MCP sampling (no key — the adapter
587
+ * lives in the litmus package, where the server connection is);
588
+ * - standalone, the user brings their OWN key for any OpenAI-compatible endpoint
589
+ * (OpenAI, OpenRouter, Groq, Google's compat layer, a local model, …);
590
+ * - with neither, the judged axes are simply skipped — the litmus core needs no key.
591
+ *
592
+ * Repeatability is majority-over-k + a recorded judge id, not seeding (modern models
593
+ * don't expose a usable temperature). The agreement ratio is reported honestly.
594
+ */
595
+
596
+ /** Provider-agnostic completion. Implementations: MCP sampling, OpenAI-compatible. */
597
+ interface Judge {
598
+ /** Stable label recorded in the bundle (e.g. "mcp-sampling", "openai-compat:gpt-4o"). */
599
+ readonly id: string;
600
+ complete(system: string, user: string): Promise<string>;
601
+ }
602
+ interface OpenAICompatConfig {
603
+ baseUrl: string;
604
+ apiKey: string;
605
+ model: string;
606
+ }
607
+ /**
608
+ * A Judge over any OpenAI-compatible Chat Completions endpoint. Uses global fetch
609
+ * (Node ≥18) — no SDK dependency. Sends only model/messages/max_tokens for the
610
+ * widest provider compatibility (temperature is omitted; many models reject it and
611
+ * repeatability comes from majority-over-k anyway).
612
+ */
613
+ declare function openAICompatJudge(cfg: OpenAICompatConfig): Judge;
614
+ /** Build an OpenAI-compatible judge from env, or null if no key is configured.
615
+ * LITMUS_LLM_API_KEY (+ LITMUS_LLM_MODEL; LITMUS_LLM_BASE_URL defaults to OpenAI). */
616
+ declare function judgeFromEnv(env?: NodeJS.ProcessEnv): Judge | null;
617
+ interface JudgeOptions {
618
+ /** Samples per run; majority-voted. Default 1 (host-agent sampling is not free). */
619
+ samples?: number;
620
+ }
621
+ /**
622
+ * Run the judged axes against a skill. Draws `samples` verdicts, majority-votes per
623
+ * axis, and reports the agreement ratio. Throws only if EVERY sample failed (no
624
+ * usable verdict) — callers treat that, and "no judge", as "judged axes not run".
625
+ */
626
+ declare function judgeSkillQuality(loaded: LoadedSkill, judge: Judge, opts?: JudgeOptions): Promise<JudgedQuality>;
627
+
628
+ declare const SKILL_QUALITY_VERSION: "skill-quality-v1";
629
+ type QualityCheckStatus = "pass" | "warn" | "fail";
630
+ /** Deliberately not A–F: a quality verdict must never read as a safety letter. */
631
+ type QualityVerdict = "well-formed" | "issues" | "malformed";
632
+ interface QualityCheck {
633
+ id: string;
634
+ status: QualityCheckStatus;
635
+ detail: string;
636
+ }
637
+ /** Optional, NON-DETERMINISTIC, opt-in LLM-judged axes (see quality-judge.ts).
638
+ * Present only when a judge was available (host-agent sampling, or a user key). */
639
+ interface JudgedQuality {
640
+ /** Judge identity: "mcp-sampling" (host agent), or "openai-compat:<model>". */
641
+ judge: string;
642
+ /** Number of samples drawn per axis (repeatability is majority-over-k, not seeding). */
643
+ samples: number;
644
+ /** Fraction of samples that agreed with the reported per-axis majority (0..1). */
645
+ agreement: number;
646
+ axes: {
647
+ axis: "honesty" | "coherence";
648
+ rating: "good" | "concern" | "bad";
649
+ rationale: string;
650
+ }[];
651
+ note: string;
652
+ }
653
+ interface QualityBundle {
654
+ qualityVersion: string;
655
+ /** Binds to the exact skill it evaluated; the SAME identity as the safety bundle… */
656
+ skillRef: string;
657
+ /** …but carried in a SEPARATE artifact — never inside the safety EvidenceBundle. */
658
+ contentHash: string;
659
+ ranAt: string;
660
+ verdict: QualityVerdict;
661
+ checks: QualityCheck[];
662
+ /** Non-deterministic LLM-judged axes, if a judge was available; else omitted. */
663
+ judged?: JudgedQuality;
664
+ disclaimer: string;
665
+ }
666
+ interface RunSkillQualityOptions {
667
+ skillRef?: string;
668
+ ranAt?: string;
669
+ }
670
+ declare function runSkillQuality(dir: string, opts?: RunSkillQualityOptions): QualityBundle;
671
+ /**
672
+ * The deterministic quality bundle PLUS the optional LLM-judged axes. The judged
673
+ * axes are best-effort: if the judge is unavailable or every sample fails, they are
674
+ * omitted and the deterministic verdict is returned unchanged. The judged result
675
+ * NEVER changes `verdict` and never touches the safety letter.
676
+ */
677
+ declare function runSkillQualityJudged(dir: string, judge: Judge, opts?: RunSkillQualityOptions & JudgeOptions): Promise<QualityBundle>;
678
+
385
679
  /**
386
680
  * Tool-safety classification (litmus-test-v1 §C-01/§C-03 safety note).
387
681
  *
@@ -489,12 +783,64 @@ declare function litmusFields(bundle: EvidenceBundle, reportCID: string): Litmus
489
783
  declare function encodeLitmusAttestation(bundle: EvidenceBundle, reportCID: string): string;
490
784
  declare function decodeLitmusAttestation(encoded: string): Record<string, unknown>;
491
785
 
786
+ /**
787
+ * EAS attestation encoding for SKILL grades (litmus-skill-v1).
788
+ *
789
+ * A SEPARATE, flat schema with its OWN UID — not an extension of LITMUS_SCHEMA.
790
+ * read-skill.ts fail-closes any attestation not under this exact UID, so a skill
791
+ * grade can never be read as a server grade (and vice versa). Fields mirror the
792
+ * server schema but key on a static-content artifact: `skillRef` + `contentHash`
793
+ * (the whole-directory hash) replace serverRef + toolDefsFingerprint, and
794
+ * `resolvedRef` is the immutable content pin (commit sha / contentHash) the grade
795
+ * was run against.
796
+ *
797
+ * Like eas.ts, this is a FLAT schema (no tuples/arrays/bytes), so the EAS
798
+ * SchemaEncoder reduces to `AbiCoder.defaultAbiCoder().encode(types, values)`; we
799
+ * encode directly with ethers and pin the bytes in eas-skill.test.ts.
800
+ */
801
+
802
+ declare const LITMUS_SKILL_SCHEMA = "string skillRef,bytes32 contentHash,uint8 gradeS01,uint8 gradeS03,uint8 gradeS04,string overallGrade,string reportCID,string methodologyVersion,uint64 ranAt,string resolvedRef";
803
+ interface SkillAttestationFields {
804
+ skillRef: string;
805
+ contentHash: string;
806
+ gradeS01: number;
807
+ gradeS03: number;
808
+ gradeS04: number;
809
+ overallGrade: string;
810
+ reportCID: string;
811
+ methodologyVersion: string;
812
+ ranAt: bigint;
813
+ resolvedRef: string;
814
+ }
815
+ /** Minimal structural view of a skill evidence bundle — satisfied by the probes
816
+ * SkillEvidenceBundle, so onchain needs no dependency on probes. */
817
+ interface SkillGradeForAttestation {
818
+ skillRef: string;
819
+ contentHash: string;
820
+ categories: readonly {
821
+ code: string;
822
+ status: CategoryStatus;
823
+ }[];
824
+ grade: string;
825
+ methodologyVersion: string;
826
+ ranAt: string;
827
+ }
828
+ /** Build the attestation fields. `resolvedRef` is the immutable pin (commit sha /
829
+ * contentHash) the grade was run against; "" when none is known. */
830
+ declare function skillAttestationFields(g: SkillGradeForAttestation, reportCID: string, resolvedRef: string | null): SkillAttestationFields;
831
+ declare function encodeSkillAttestationFields(f: SkillAttestationFields): string;
832
+ declare function encodeSkillAttestation(g: SkillGradeForAttestation, reportCID: string, resolvedRef: string | null): string;
833
+ declare function decodeSkillAttestation(encoded: string): Record<string, unknown>;
834
+
492
835
  /**
493
836
  * Read a litmus attestation from chain (the trust-critical read — onchain-proof
494
837
  * §7). Needs an RPC + a registered schema; the agent-gate calls this, then
495
838
  * re-checks the live fingerprint before paying.
496
839
  *
497
- * [verify] eas-sdk EAS.getAttestation return shape (uid / data / revocationTime).
840
+ * The read is a single EAS `getAttestation` view call. We hit the contract
841
+ * directly through a minimal ethers ABI fragment (below) rather than the
842
+ * eas-sdk `EAS` class — same on-chain struct, one fewer dependency (eas-sdk
843
+ * dragged hardhat into the production tree).
498
844
  */
499
845
  /** The registered litmus schema UID for the selected network (from env). */
500
846
  declare function litmusSchemaUID(): string;
@@ -515,6 +861,36 @@ interface OnchainLitmusAttestation {
515
861
  }
516
862
  declare function readAttestation(uid: string): Promise<OnchainLitmusAttestation | null>;
517
863
 
864
+ /**
865
+ * Read a SKILL attestation from chain. Mirrors read.ts, but fail-closes on the
866
+ * SKILL schema UID (NEXT_PUBLIC_EAS_SKILL_SCHEMA_UID) — a SEPARATE UID from the
867
+ * server schema. EAS schemas are permissionless, so without this bind a server
868
+ * attestation (or a look-alike) could be decoded and trusted as a skill grade.
869
+ *
870
+ * The trust anchor a consumer must check is `contentHash`: recompute sha256 of the
871
+ * skill directory (every file the SKILL.md can load) and require equality before
872
+ * installing. There is no live re-fingerprint, so the (immutable) `resolvedRef`
873
+ * pin and the contentHash are all that bind a grade to the bytes that run.
874
+ */
875
+ /** The registered SKILL schema UID for the selected network (from env). Distinct
876
+ * from the server schema UID so the two can never be confused. */
877
+ declare function skillSchemaUID(): string;
878
+ interface OnchainSkillAttestation {
879
+ uid: string;
880
+ skillRef: string;
881
+ /** Whole-directory sha256 (`0x` + 64 hex) — the consumer's re-hash trust anchor. */
882
+ contentHash: string;
883
+ overallGrade: string;
884
+ reportCID: string;
885
+ /** Immutable content pin (commit sha / contentHash) the grade was run against;
886
+ * null when none (the on-chain empty-string sentinel is normalized here). */
887
+ resolvedRef: string | null;
888
+ revoked: boolean;
889
+ attester: string;
890
+ expirationTime: bigint;
891
+ }
892
+ declare function readSkillAttestation(uid: string): Promise<OnchainSkillAttestation | null>;
893
+
518
894
  /**
519
895
  * The agent payment-gate (technical-design §6, onchain-proof-spec §7).
520
896
  *
@@ -604,6 +980,68 @@ declare function handleRunLitmus({ server_ref, bearer, header }: {
604
980
  }[];
605
981
  }>;
606
982
 
983
+ /**
984
+ * `run_skill_litmus` — run the deterministic static safety litmus over a Claude
985
+ * Code / Agent Skill (a SKILL.md + bundle) and return the grade + evidence.
986
+ *
987
+ * Unlike `run_litmus` (which LAUNCHES an MCP server's code), this is a pure STATIC
988
+ * read of the skill's text and bundled files — no execution, no network. That is
989
+ * also its disclosed limit: a static A is not behavioral proof. v1 grades a LOCAL
990
+ * skill directory; remote refs (github/marketplace) come with the onchain phase.
991
+ */
992
+
993
+ declare const RUN_SKILL_LITMUS_TOOL_NAME = "run_skill_litmus";
994
+ declare const RUN_SKILL_LITMUS_TOOL_TITLE = "Run a safety litmus on a Claude Code skill";
995
+ declare const RUN_SKILL_LITMUS_TOOL_DESCRIPTION: string;
996
+ declare const runSkillLitmusInputShape: {
997
+ skill_ref: z.ZodString;
998
+ };
999
+ /** Optional judge for the advisory quality axes. Resolved per-call by mcp.ts
1000
+ * (host-agent sampling if available, else an env key) — null ⇒ deterministic
1001
+ * quality only. The litmus core never requires a key. */
1002
+ interface RunSkillLitmusContext {
1003
+ judge?: Judge | null;
1004
+ }
1005
+ declare function handleRunSkillLitmus({ skill_ref }: {
1006
+ skill_ref: string;
1007
+ }, ctx?: RunSkillLitmusContext): Promise<{
1008
+ isError: true;
1009
+ content: {
1010
+ type: "text";
1011
+ text: string;
1012
+ }[];
1013
+ } | {
1014
+ content: {
1015
+ type: "text";
1016
+ text: string;
1017
+ }[];
1018
+ }>;
1019
+
1020
+ /**
1021
+ * `verify_skill_attestation` — read a skill's already-published polygraph grade
1022
+ * (no run) before an agent installs or trusts it. The skill analogue of
1023
+ * `verify_attestation`: instead of recomputing a LIVE tool-surface fingerprint,
1024
+ * the consumer must recompute the skill's CONTENT HASH (sha256 of every file the
1025
+ * SKILL.md can load) and require it to equal the attested `contentHash` before
1026
+ * installing — there is no live re-fingerprint, so the hash is the only thing
1027
+ * binding the grade to the bytes that run.
1028
+ */
1029
+
1030
+ declare const VERIFY_SKILL_TOOL_NAME = "verify_skill_attestation";
1031
+ declare const VERIFY_SKILL_TOOL_TITLE = "Verify a skill's polygraph attestation";
1032
+ declare const VERIFY_SKILL_TOOL_DESCRIPTION: string;
1033
+ declare const verifySkillInputShape: {
1034
+ skill_ref: z.ZodString;
1035
+ };
1036
+ declare function handleVerifySkill({ skill_ref }: {
1037
+ skill_ref: string;
1038
+ }): Promise<{
1039
+ content: {
1040
+ type: "text";
1041
+ text: string;
1042
+ }[];
1043
+ }>;
1044
+
607
1045
  /**
608
1046
  * `polygraphso litmus <ref | https-url | path-to-mcp>` — run the behavioral
609
1047
  * harness locally and print the grade. The heavy harness (`@polygraph/probes`)
@@ -634,4 +1072,4 @@ declare function parseAuthFlags(args: readonly string[], env?: NodeJS.ProcessEnv
634
1072
  /** A target is an https URL, a local MCP entry file, or a registry ref. */
635
1073
  declare function resolveTarget(target: string): string | StdioCommand;
636
1074
 
637
- export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, LITMUS_SCHEMA, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type ParsedLitmusFlags, type ParsedServerRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, ServerRefParseError, type Severity, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, decodeLitmusAttestation, encodeLitmusAttestation, fingerprintToolDefs, formatServerRef, gateDecision, gradeFromCategories, handleRunLitmus, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, litmusFields, litmusSchemaUID, liveFingerprint, markdownTricks, networkConfig, parseAuthFlags, parseServerRef, readAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, selectedNetwork, serverKey, stateChangingToolNames };
1075
+ export { type AttestationView, BUNDLE_SCHEMA_VERSION, type BundleInput, CATEGORY_STATUS_UINT8, type CategoryCode, type CategoryResult, type CategoryStatus, type ConnectOptions, type ConnectedTarget, DEFAULT_PASSING, type EvidenceBundle, type Finding, type FindingKind, type FingerprintResult, type GateAction, type GateDecision, type Grade, type HarnessInfo, type Judge, type JudgeOptions, type JudgedQuality, LITMUS_SCHEMA, LITMUS_SKILL_SCHEMA, type LitmusAttestationFields, type LitmusGrade, type RunLitmusOptions as LitmusOptions, type LoadedSkill, METHODOLOGY_VERSION, NETWORKS, type Network, type NetworkConfig, type OnchainLitmusAttestation, type OnchainSkillAttestation, type OpenAICompatConfig, type ParsedLitmusFlags, type ParsedServerRef, type ParsedSkillRef, type ProbeContext, type ProbeId, type ProbeResult, type ProbeStatus, type QualityBundle, type QualityCheck, type QualityCheckStatus, type QualityVerdict, RUN_LITMUS_TOOL_DESCRIPTION, RUN_LITMUS_TOOL_NAME, RUN_LITMUS_TOOL_TITLE, RUN_SKILL_LITMUS_TOOL_DESCRIPTION, RUN_SKILL_LITMUS_TOOL_NAME, RUN_SKILL_LITMUS_TOOL_TITLE, type Registry, type RunLitmusOptions, type RunSkillLitmusOptions, type RunSkillQualityOptions, SKILL_BUNDLE_SCHEMA_VERSION, SKILL_METHODOLOGY_VERSION, SKILL_QUALITY_VERSION, ServerRefParseError, type Severity, type SkillAttestationFields, type SkillCategoryCode, type SkillCategoryResult, type SkillEvidenceBundle, type SkillFile, type SkillGrade, type SkillGradeForAttestation, SkillLoadError, SkillRefParseError, type SkillSource, type StdioCommand, type TargetDescriptor, type TargetInput, type TargetKind, type ToolAnnotations, type ToolDef, type ToolSafety, VERIFY_SKILL_TOOL_DESCRIPTION, VERIFY_SKILL_TOOL_NAME, VERIFY_SKILL_TOOL_TITLE, assembleBundle, canaryMatch, canonicalStringify, classifyTool, connectTarget, dangerousCommand, decodeLitmusAttestation, decodeSkillAttestation, encodeLitmusAttestation, encodeSkillAttestation, encodeSkillAttestationFields, exfilInstruction, fingerprintToolDefs, formatServerRef, formatSkillRef, gateDecision, gradeFromCategories, gradeSkillCategories, handleRunLitmus, handleRunSkillLitmus, handleVerifySkill, hasHighSeverity, instructionMimicry, internalsLeak, invisibleUnicode, judgeFromEnv, judgeSkillQuality, litmusFields, litmusSchemaUID, liveFingerprint, loadSkill, markdownTricks, networkConfig, openAICompatJudge, overBroadTrigger, parseAuthFlags, parseServerRef, parseSkillRef, readAttestation, readSkillAttestation, resolveTarget, rpcUrl, runLitmus, runLitmusInputShape, runSkillLitmus, runSkillLitmusInputShape, runSkillQuality, runSkillQualityJudged, selectedNetwork, serverKey, skillAttestationFields, skillInjection, skillInjectionFails, skillKey, skillSchemaUID, stateChangingToolNames, stripExamples, verifySkillInputShape };
package/dist/index.js CHANGED
@@ -1,49 +1,88 @@
1
1
  import {
2
2
  LITMUS_SCHEMA,
3
+ LITMUS_SKILL_SCHEMA,
3
4
  NETWORKS,
4
5
  RUN_LITMUS_TOOL_DESCRIPTION,
5
6
  RUN_LITMUS_TOOL_NAME,
6
7
  RUN_LITMUS_TOOL_TITLE,
8
+ RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
9
+ RUN_SKILL_LITMUS_TOOL_NAME,
10
+ RUN_SKILL_LITMUS_TOOL_TITLE,
11
+ VERIFY_SKILL_TOOL_DESCRIPTION,
12
+ VERIFY_SKILL_TOOL_NAME,
13
+ VERIFY_SKILL_TOOL_TITLE,
7
14
  decodeLitmusAttestation,
15
+ decodeSkillAttestation,
8
16
  encodeLitmusAttestation,
17
+ encodeSkillAttestation,
18
+ encodeSkillAttestationFields,
9
19
  handleRunLitmus,
20
+ handleRunSkillLitmus,
21
+ handleVerifySkill,
10
22
  litmusFields,
11
23
  litmusSchemaUID,
12
24
  networkConfig,
13
25
  readAttestation,
26
+ readSkillAttestation,
14
27
  rpcUrl,
15
28
  runLitmusInputShape,
16
- selectedNetwork
17
- } from "./chunk-LBXHFQN3.js";
29
+ runSkillLitmusInputShape,
30
+ selectedNetwork,
31
+ skillAttestationFields,
32
+ skillSchemaUID,
33
+ verifySkillInputShape
34
+ } from "./chunk-AVF3GYCS.js";
18
35
  import {
19
36
  parseAuthFlags,
20
37
  resolveTarget
21
- } from "./chunk-VOPISHBU.js";
38
+ } from "./chunk-M5HXKZVN.js";
22
39
  import {
40
+ SKILL_BUNDLE_SCHEMA_VERSION,
41
+ SKILL_METHODOLOGY_VERSION,
42
+ SKILL_QUALITY_VERSION,
43
+ SkillLoadError,
23
44
  assembleBundle,
24
45
  canaryMatch,
25
46
  classifyTool,
26
47
  connectTarget,
48
+ dangerousCommand,
49
+ exfilInstruction,
27
50
  fingerprintToolDefs,
28
51
  gradeFromCategories,
52
+ gradeSkillCategories,
29
53
  hasHighSeverity,
30
54
  instructionMimicry,
31
55
  internalsLeak,
32
56
  invisibleUnicode,
57
+ judgeFromEnv,
58
+ judgeSkillQuality,
59
+ loadSkill,
33
60
  markdownTricks,
61
+ openAICompatJudge,
62
+ overBroadTrigger,
34
63
  runLitmus,
35
- stateChangingToolNames
36
- } from "./chunk-35UOPCBW.js";
64
+ runSkillLitmus,
65
+ runSkillQuality,
66
+ runSkillQualityJudged,
67
+ skillInjection,
68
+ skillInjectionFails,
69
+ stateChangingToolNames,
70
+ stripExamples
71
+ } from "./chunk-DN2OX4RT.js";
37
72
  import {
38
73
  BUNDLE_SCHEMA_VERSION,
39
74
  CATEGORY_STATUS_UINT8,
40
75
  METHODOLOGY_VERSION,
41
76
  ServerRefParseError,
77
+ SkillRefParseError,
42
78
  canonicalStringify,
43
79
  formatServerRef,
80
+ formatSkillRef,
44
81
  parseServerRef,
45
- serverKey
46
- } from "./chunk-ZR6XRGMQ.js";
82
+ parseSkillRef,
83
+ serverKey,
84
+ skillKey
85
+ } from "./chunk-44R4ZYOE.js";
47
86
 
48
87
  // ../agent/src/gate.ts
49
88
  function sameServer(a, b) {
@@ -92,41 +131,80 @@ export {
92
131
  CATEGORY_STATUS_UINT8,
93
132
  DEFAULT_PASSING,
94
133
  LITMUS_SCHEMA,
134
+ LITMUS_SKILL_SCHEMA,
95
135
  METHODOLOGY_VERSION,
96
136
  NETWORKS,
97
137
  RUN_LITMUS_TOOL_DESCRIPTION,
98
138
  RUN_LITMUS_TOOL_NAME,
99
139
  RUN_LITMUS_TOOL_TITLE,
140
+ RUN_SKILL_LITMUS_TOOL_DESCRIPTION,
141
+ RUN_SKILL_LITMUS_TOOL_NAME,
142
+ RUN_SKILL_LITMUS_TOOL_TITLE,
143
+ SKILL_BUNDLE_SCHEMA_VERSION,
144
+ SKILL_METHODOLOGY_VERSION,
145
+ SKILL_QUALITY_VERSION,
100
146
  ServerRefParseError,
147
+ SkillLoadError,
148
+ SkillRefParseError,
149
+ VERIFY_SKILL_TOOL_DESCRIPTION,
150
+ VERIFY_SKILL_TOOL_NAME,
151
+ VERIFY_SKILL_TOOL_TITLE,
101
152
  assembleBundle,
102
153
  canaryMatch,
103
154
  canonicalStringify,
104
155
  classifyTool,
105
156
  connectTarget,
157
+ dangerousCommand,
106
158
  decodeLitmusAttestation,
159
+ decodeSkillAttestation,
107
160
  encodeLitmusAttestation,
161
+ encodeSkillAttestation,
162
+ encodeSkillAttestationFields,
163
+ exfilInstruction,
108
164
  fingerprintToolDefs,
109
165
  formatServerRef,
166
+ formatSkillRef,
110
167
  gateDecision,
111
168
  gradeFromCategories,
169
+ gradeSkillCategories,
112
170
  handleRunLitmus,
171
+ handleRunSkillLitmus,
172
+ handleVerifySkill,
113
173
  hasHighSeverity,
114
174
  instructionMimicry,
115
175
  internalsLeak,
116
176
  invisibleUnicode,
177
+ judgeFromEnv,
178
+ judgeSkillQuality,
117
179
  litmusFields,
118
180
  litmusSchemaUID,
119
181
  liveFingerprint,
182
+ loadSkill,
120
183
  markdownTricks,
121
184
  networkConfig,
185
+ openAICompatJudge,
186
+ overBroadTrigger,
122
187
  parseAuthFlags,
123
188
  parseServerRef,
189
+ parseSkillRef,
124
190
  readAttestation,
191
+ readSkillAttestation,
125
192
  resolveTarget,
126
193
  rpcUrl,
127
194
  runLitmus,
128
195
  runLitmusInputShape,
196
+ runSkillLitmus,
197
+ runSkillLitmusInputShape,
198
+ runSkillQuality,
199
+ runSkillQualityJudged,
129
200
  selectedNetwork,
130
201
  serverKey,
131
- stateChangingToolNames
202
+ skillAttestationFields,
203
+ skillInjection,
204
+ skillInjectionFails,
205
+ skillKey,
206
+ skillSchemaUID,
207
+ stateChangingToolNames,
208
+ stripExamples,
209
+ verifySkillInputShape
132
210
  };