pluribus-context 0.3.36 → 0.3.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +3 -2
- package/bin/pluribus.js +3 -1
- package/docs/community-review-packet.md +19 -0
- package/docs/context-budget-receipts.md +1 -1
- package/docs/mcp-runtime-config-receipts.md +91 -0
- package/docs/skill-policy-receipts.md +1 -1
- package/examples/agent-skills/README.md +10 -0
- package/examples/mcp-audit-receipts/README.md +24 -0
- package/examples/mcp-audit-receipts/mcp-audit-receipt.json +70 -0
- package/examples/mcp-runtime-config-receipts/README.md +15 -0
- package/examples/mcp-runtime-config-receipts/check-mcp-runtime-config-receipt.mjs +127 -0
- package/examples/mcp-runtime-config-receipts/mcp-runtime-config-receipt.json +82 -0
- package/package.json +2 -1
- package/{examples/agent-skills → skills}/context-receipts/README.md +4 -4
- package/skills/context-receipts/SKILL.md +206 -0
- package/{examples/agent-skills → skills}/skill-policy-receipts/README.md +1 -1
- package/skills/skill-policy-receipts/SKILL.md +77 -0
- package/src/commands/demo.js +166 -12
- package/src/utils/version.js +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
All notable changes to Pluribus are documented here.
|
|
6
6
|
|
|
7
|
+
## 0.3.38 - 2026-06-06
|
|
8
|
+
|
|
9
|
+
- Added `pluribus demo mcp-audit-receipt`, a tiny npm-runnable demo that validates privacy-safe MCP tool-call audit events and low-cardinality usage metrics without logging raw prompts, args, results, tokens, or row data.
|
|
10
|
+
|
|
11
|
+
## 0.3.37 - 2026-06-06
|
|
12
|
+
|
|
13
|
+
- Published the canonical top-level Agent Skills layout (`skills/*/SKILL.md`) and backwards-compatible legacy mirrors so external skill registries can keep source links verifiable while package users get the current recipes from npm.
|
|
14
|
+
|
|
7
15
|
## 0.3.36 - 2026-06-05
|
|
8
16
|
|
|
9
17
|
- Added `pluribus demo skill-use-rate`, a tiny npm-runnable demo that validates the packaged Skill use-rate receipt and warns when installed/attached Skills have no observed invocations.
|
package/README.md
CHANGED
|
@@ -14,7 +14,7 @@ The original sync workflow is still useful: Pluribus can keep project instructio
|
|
|
14
14
|
|
|
15
15
|
It is **not** a persistent memory layer, retrieval system, agent orchestrator, enterprise ContextOps platform, or agent-merging framework. Think evidence for context boundaries: `CLAUDE.md`, `.cursorrules`, `copilot-instructions.md`, `AGENTS.md`, MCP Tool Search, Agent Skills, RAG/code-search, pruning, and compaction — with privacy-safe receipts instead of raw content dumps.
|
|
16
16
|
|
|
17
|
-
**Reviewer shortcut:** evaluating Pluribus for a list, newsletter, package roundup, or tool directory? Use the [Community Review Packet](docs/community-review-packet.md) for copy-paste directory submission fields, safety/removability notes, feedback links, and disposable 60-second smoke tests. If you only run one command for the cross-tool audit, try `npx --yes pluribus-context@latest audit --json --fidelity-report` to see native discovery surfaces, generic fallbacks, load evidence, duplicate-load selection evidence, manual activation requirements, effective context scope, and semantic differences. For the agent-observability wedge, start with [context-budget receipts](docs/context-budget-receipts.md): privacy-safe evidence for what MCP schemas, skills, memory, subagents, CLI help, retrieval chunks, pruning runs, or compaction summaries crossed an agent boundary. If you want the same idea as a copyable skill, use the [context-receipts Agent Skill recipe](
|
|
17
|
+
**Reviewer shortcut:** evaluating Pluribus for a list, newsletter, package roundup, or tool directory? Use the [Community Review Packet](docs/community-review-packet.md) for copy-paste directory submission fields, safety/removability notes, feedback links, and disposable 60-second smoke tests. If you only run one command for the cross-tool audit, try `npx --yes pluribus-context@latest audit --json --fidelity-report` to see native discovery surfaces, generic fallbacks, load evidence, duplicate-load selection evidence, manual activation requirements, effective context scope, and semantic differences. For the agent-observability wedge, start with [context-budget receipts](docs/context-budget-receipts.md): privacy-safe evidence for what MCP schemas, skills, memory, subagents, CLI help, retrieval chunks, pruning runs, or compaction summaries crossed an agent boundary. If you want the same idea as a copyable skill, use the [context-receipts Agent Skill recipe](skills/context-receipts/). npm `latest` is currently aligned with the GitHub release; the review packet also documents a GitHub-release smoke fallback for future release-lag windows.
|
|
18
18
|
|
|
19
19
|
---
|
|
20
20
|
|
|
@@ -161,7 +161,7 @@ npx --yes pluribus-context@latest sync --dry-run
|
|
|
161
161
|
|
|
162
162
|
If the preview looks right, run `npx --yes pluribus-context@latest sync` to write the tool-specific files.
|
|
163
163
|
|
|
164
|
-
For a fuller walkthrough, see the [Quickstart](docs/quickstart.md). To enforce generated context files in pull requests, use the [CI audit example](docs/ci-audit-example.md); to catch drift before commits leave your machine, use the [Pre-commit Audit Hook](docs/pre-commit-audit.md). If your repo already has `CLAUDE.md`, `.cursorrules`, Copilot instructions, or `AGENTS.md`, run a [Context Drift Audit](docs/context-drift-audit.md) first, try the intentionally drifted [audit example](examples/context-drift-audit/), then follow [Migrate Existing AI Context Files](docs/migrate-existing-context.md). If you switch between Cursor, Claude Code, Copilot, and terminal agents, try the [Cursor ↔ Claude Code context handoff guide](docs/cursor-claude-context-handoff.md) and its [example source file](examples/context-handoff/pluribus.md). If you run multiple AI sessions on the same project, try the [Coordination Contract guide](docs/coordination-contract.md) and its [example source file](examples/coordination-contract/pluribus.md) to keep event-log/scratchpad protocol rules aligned without turning Pluribus into an orchestrator. If you evaluate code-search, MCP retrieval, RAG-over-notes, or agent memory tools, use the [Orchestration-layer Search Receipts](docs/orchestration-search-receipts.md) sketch to measure retrieved context from the harness layer without asking retrieval tools to inspect whole transcripts. If you are adding agent observability, traces, or OpenTelemetry-style events, start with [Context Receipts for Agent Observability](docs/context-receipts-for-agent-observability.md), then use the [Context Input Evidence](docs/context-input-evidence.md) sketch and its [executable demos](examples/context-input-evidence/) to separate source bytes, canonical text, delivered hashes, post-hoc session-log receipts, skill/plugin invocation receipts, shared-memory retrieval receipts, self-remediating brain/doctor receipts, and OpenTelemetry-style SpanEvents. If you publish AI rules, skills, or instruction bundles as "portable", use the [Portability Fidelity Report](docs/portability-fidelity-report.md) and its [example source file](examples/portability-fidelity/pluribus.md) to make compatibility claims evidence-based instead of self-attested. Before committing shared or generated AI instructions, use the [Context File Review Checklist](docs/context-file-review.md). If you're deciding between Pluribus and a one-way rules converter, see [When to use Pluribus](docs/when-to-use-pluribus.md). If you are debugging "context drift" after compaction or long sessions, start with the [Context Drift Taxonomy](docs/context-drift-taxonomy.md) to separate file drift from runtime precedence drift. If you use MCP memory or knowledge-graph tools, try the [MCP memory handoff demo](docs/memory-mcp-handoff.md) to keep recall/store protocols aligned across AI coding tools without turning Pluribus into a memory server. If your shared-memory or knowledge-graph setup lets agents write durable facts, use [Memory write policy receipts](docs/memory-write-policy-receipts.md) and the [copyable gate](examples/memory-write-policy/) to require proposed diffs, scope, lifecycle, visibility, approval, and privacy checks before one run can teach every harness. If hooks, local gateways, or agent firewalls block risky tool calls, use [Agent firewall denial/audit receipts](docs/agent-firewall-denial-audit.md) and the [copyable checker](examples/agent-firewall-denial-audit/) to split model-visible denial from private operator audit evidence. If you are turning Claude Code/OpenClaw/Cursor into role-based “AI employee” agents with Skills and memory folders, use the [Controlled learning queue](docs/controlled-learning-queue.md) and [copyable example](examples/controlled-learning-queue/) to let agents propose durable memory changes without silently rewriting shared ICP, pricing, compliance, or process assumptions. If `PreCompact` / `PostCompact` or `SessionStart(compact)` workflows decide whether an agent may continue after summarization, use [Compaction resume receipts](docs/compaction-resume-receipts.md) and the [copyable gate](examples/compaction-resume-receipts/) to prove what was summarized, which instruction sources reloaded, what state was lost/kept, and whether `safe_to_resume` is actually true. If an MCP server is healthy but tools are missing in Claude Code/Cursor/Codex, use the [MCP tool visibility receipts](docs/mcp-tool-visibility-receipts.md) checklist to separate launch, handshake, `tools/list`, client catalog, and first invocation failures. If a Claude Code/OpenClaw-style Skill states a hard rule but the run still violates it, use the [Skill policy receipts](docs/skill-policy-receipts.md) guide and [copyable Skill recipe](
|
|
164
|
+
For a fuller walkthrough, see the [Quickstart](docs/quickstart.md). To enforce generated context files in pull requests, use the [CI audit example](docs/ci-audit-example.md); to catch drift before commits leave your machine, use the [Pre-commit Audit Hook](docs/pre-commit-audit.md). If your repo already has `CLAUDE.md`, `.cursorrules`, Copilot instructions, or `AGENTS.md`, run a [Context Drift Audit](docs/context-drift-audit.md) first, try the intentionally drifted [audit example](examples/context-drift-audit/), then follow [Migrate Existing AI Context Files](docs/migrate-existing-context.md). If you switch between Cursor, Claude Code, Copilot, and terminal agents, try the [Cursor ↔ Claude Code context handoff guide](docs/cursor-claude-context-handoff.md) and its [example source file](examples/context-handoff/pluribus.md). If you run multiple AI sessions on the same project, try the [Coordination Contract guide](docs/coordination-contract.md) and its [example source file](examples/coordination-contract/pluribus.md) to keep event-log/scratchpad protocol rules aligned without turning Pluribus into an orchestrator. If you evaluate code-search, MCP retrieval, RAG-over-notes, or agent memory tools, use the [Orchestration-layer Search Receipts](docs/orchestration-search-receipts.md) sketch to measure retrieved context from the harness layer without asking retrieval tools to inspect whole transcripts. If you are adding agent observability, traces, or OpenTelemetry-style events, start with [Context Receipts for Agent Observability](docs/context-receipts-for-agent-observability.md), then use the [Context Input Evidence](docs/context-input-evidence.md) sketch and its [executable demos](examples/context-input-evidence/) to separate source bytes, canonical text, delivered hashes, post-hoc session-log receipts, skill/plugin invocation receipts, shared-memory retrieval receipts, self-remediating brain/doctor receipts, and OpenTelemetry-style SpanEvents. If you publish AI rules, skills, or instruction bundles as "portable", use the [Portability Fidelity Report](docs/portability-fidelity-report.md) and its [example source file](examples/portability-fidelity/pluribus.md) to make compatibility claims evidence-based instead of self-attested. Before committing shared or generated AI instructions, use the [Context File Review Checklist](docs/context-file-review.md). If you're deciding between Pluribus and a one-way rules converter, see [When to use Pluribus](docs/when-to-use-pluribus.md). If you are debugging "context drift" after compaction or long sessions, start with the [Context Drift Taxonomy](docs/context-drift-taxonomy.md) to separate file drift from runtime precedence drift. If you use MCP memory or knowledge-graph tools, try the [MCP memory handoff demo](docs/memory-mcp-handoff.md) to keep recall/store protocols aligned across AI coding tools without turning Pluribus into a memory server. If your shared-memory or knowledge-graph setup lets agents write durable facts, use [Memory write policy receipts](docs/memory-write-policy-receipts.md) and the [copyable gate](examples/memory-write-policy/) to require proposed diffs, scope, lifecycle, visibility, approval, and privacy checks before one run can teach every harness. If hooks, local gateways, or agent firewalls block risky tool calls, use [Agent firewall denial/audit receipts](docs/agent-firewall-denial-audit.md) and the [copyable checker](examples/agent-firewall-denial-audit/) to split model-visible denial from private operator audit evidence. If you are turning Claude Code/OpenClaw/Cursor into role-based “AI employee” agents with Skills and memory folders, use the [Controlled learning queue](docs/controlled-learning-queue.md) and [copyable example](examples/controlled-learning-queue/) to let agents propose durable memory changes without silently rewriting shared ICP, pricing, compliance, or process assumptions. If `PreCompact` / `PostCompact` or `SessionStart(compact)` workflows decide whether an agent may continue after summarization, use [Compaction resume receipts](docs/compaction-resume-receipts.md) and the [copyable gate](examples/compaction-resume-receipts/) to prove what was summarized, which instruction sources reloaded, what state was lost/kept, and whether `safe_to_resume` is actually true. If an MCP server is healthy but tools are missing in Claude Code/Cursor/Codex, use the [MCP tool visibility receipts](docs/mcp-tool-visibility-receipts.md) checklist to separate launch, handshake, `tools/list`, client catalog, and first invocation failures. If a Claude Code/OpenClaw-style Skill states a hard rule but the run still violates it, use the [Skill policy receipts](docs/skill-policy-receipts.md) guide and [copyable Skill recipe](skills/skill-policy-receipts/) to turn target decisions, refusals, and post-write guards into privacy-safe evidence. If a Skill, plugin resource, MCP instruction, or custom-agent file exists but disappears in ACP/Zed/CLI/chat parity tests, use [Loaded-resource boundary receipts](docs/loaded-resource-boundary.md) and the [copyable checker](examples/loaded-resource-boundary/) to prove discovered, attached, injected, readable, and skipped-resource stages. If long-lived projects keep old specs/TODOs that still match grep but are no longer authoritative, use [Temporal context receipts](docs/temporal-context-receipts.md) and the [copyable current-state example](examples/temporal-context-receipts/) to separate current authority from historical citations before an agent writes code. If AI-generated pull requests are hard to review because diff size hides operational risk, use [AI PR review receipts](docs/ai-pr-review-receipts.md), the [copyable PR template](examples/ai-pr-review-receipts/), and the [GitHub Actions receipt gate](examples/ai-pr-review-receipts/.github/workflows/ai-pr-review-receipt.yml) to review by blast radius: schema/data contracts, async paths, rollout gates, side effects, and ambiguous boundaries. If you delegate work to Codex/Claude Code/Cursor/OpenClaw-style specialist subagents, use [Subagent role receipts](docs/subagent-role-receipts.md) and the [example role definitions](examples/subagent-role-receipts/) to prove the requested role, effective role, loaded instruction source, allowed/refused capabilities, stop point, and next safe action. If you run Claude Code-style dynamic workflows, ultracode, or local LLM gateway orchestration that spawns many agents, use [Dynamic workflow run receipts](docs/dynamic-workflow-run-receipts.md) and the [copyable workflow example](examples/dynamic-workflow-run-receipts/) to prove phases, per-agent roles/models, context loaded/skipped, tool grants, token spend buckets, per-agent fuses, heartbeat, stop reasons, and known gaps. If your workflow routes Explore/Propose/Spec/Design/Tasks/Apply/Verify across OpenCode, Claude Code, Cursor, Codex, or different models, use [Phase-boundary contracts](docs/phase-boundary-contracts.md) and the [copyable Apply→Verify gate](examples/phase-boundary-contract/) to prove allowed input context, output artifact, evidence required before the next phase, dropped context, and stop conditions. If you need CI/reviewers to decide whether an agent handoff can continue, must be reviewed, or should be rejected, use the [Review primitive gate](docs/review-primitive-gate.md), its [copyable gate example](examples/review-primitive-gate/), and the [Claude Code review hook bridge](examples/claude-code-review-hook/) to validate assignment boundaries, approved scope/access changes, required checks, privacy flags, and `complete / partial / unsafe-to-resume` state from CI or Claude Code `TaskCompleted` / `PostCompact` hooks. If Claude Projects, long chats, or compaction make the last clean artifact hard to recover, use [Canonical output receipts](docs/canonical-output-receipts.md) and the [copyable index example](examples/canonical-output-receipts/) to track stable IDs, paths, versions, exact grep phrases, decisions, rejected options, and next actions. If a setup script installs MCP servers, Skills, instruction files, hooks, or plugins across multiple agents, use [Install-plan receipts](docs/install-plan-receipts.md) and the [copyable example](examples/install-plan-receipts/) to prove planned writes, backups, network behavior, and `writes_started=false` before mutation. After a Skill installer runs, use [Skill install/load receipts](docs/skill-install-receipts.md) and the [copyable checker](examples/skill-install-receipts/) to prove source ref, target agents/scopes, discovery/load status, context-cost bucket, and `safe_to_start_session` without logging raw Skill bodies. If you are pruning Skill sprawl after real sessions, use [Skill use-rate receipts](docs/skill-use-rate-receipts.md) and the [copyable checker](examples/skill-use-rate-receipts/) to separate discovered/installed/attached from invoked/acted-on and catch "installed but unused" resources. If you supervise multiple Claude Code/Cursor/Codex/OpenClaw sessions in parallel, use the [Parallel session review ledger](docs/parallel-session-review-ledger.md) and [copyable checker](examples/parallel-session-review-ledger/) to decide which sessions are complete, partial, blocked, or unsafe to resume without trusting an agent summary. If you are reviewing Pluribus for a list, newsletter, or tool directory, use the [Community Review Packet](docs/community-review-packet.md) for directory submission fields, a one-line description, safety notes, and a disposable 60-second smoke test. Maintainers can track package/repo discovery with the [Discovery Smoke Checks](docs/discovery-smoke.md).
|
|
165
165
|
|
|
166
166
|
### Usage
|
|
167
167
|
|
|
@@ -408,6 +408,7 @@ If you've felt this pain, tell me about your setup. What tools do you use? How d
|
|
|
408
408
|
- [Composable Contexts](docs/composable-contexts.md) — local/remote imports, merge behavior, and safety rules
|
|
409
409
|
- [MCP Memory Handoff](docs/memory-mcp-handoff.md) — demo for keeping memory recall/store protocols aligned across tool-specific instruction files
|
|
410
410
|
- [MCP Tool Visibility Receipts](docs/mcp-tool-visibility-receipts.md) — checklist for debugging healthy MCP servers whose tools do not appear in the agent client catalog
|
|
411
|
+
- [MCP Runtime Config Receipts](docs/mcp-runtime-config-receipts.md) — live-vs-template evidence for MCP permission/config drift review
|
|
411
412
|
- [Remote Composable Context Imports](docs/remote-composable-context-imports.md) — design notes for lockfile/cache/auth hardening
|
|
412
413
|
- [Context Format Spec](spec/context-format.md) — the `pluribus.md` format reference
|
|
413
414
|
- [Skills Format Spec](spec/skills-format.md) — how adapters work and how to write custom skills
|
package/bin/pluribus.js
CHANGED
|
@@ -67,7 +67,7 @@ OPTIONS (watch)
|
|
|
67
67
|
--debounce Debounce delay in ms (minimum 300, default 400)
|
|
68
68
|
|
|
69
69
|
OPTIONS (demo)
|
|
70
|
-
--receipt Validate a custom
|
|
70
|
+
--receipt Validate a custom demo receipt JSON file
|
|
71
71
|
--json Print machine-readable demo results
|
|
72
72
|
|
|
73
73
|
EXAMPLES
|
|
@@ -89,6 +89,8 @@ EXAMPLES
|
|
|
89
89
|
pluribus watch --tools claude,cursor
|
|
90
90
|
pluribus demo skill-use-rate
|
|
91
91
|
pluribus demo skill-use-rate --json
|
|
92
|
+
pluribus demo mcp-audit-receipt
|
|
93
|
+
pluribus demo mcp-audit-receipt --json
|
|
92
94
|
|
|
93
95
|
DOCS
|
|
94
96
|
https://github.com/caioribeiroclw-pixel/pluribus
|
|
@@ -30,6 +30,8 @@ Use these fields for directories, awesome lists, or review forms that ask for a
|
|
|
30
30
|
| One sentence | Emit privacy-safe receipts for what context crossed agent boundaries, and audit or sync the generated instruction files used by Claude Code, Cursor, Copilot, OpenClaw, Windsurf, Continue, Zed, and Bob. |
|
|
31
31
|
| 280-char blurb | Pluribus is an open-source CLI for agent context evidence. It emits privacy-safe receipts for MCP/tools, skills, memory/RAG, pruning and compaction boundaries, then audits or syncs AI instruction files like `CLAUDE.md`, Cursor rules, Copilot instructions, and `AGENTS.md`. |
|
|
32
32
|
| Safe first command | `npx --yes pluribus-context@latest audit` |
|
|
33
|
+
| Agent Skill install smoke | `npx --yes skills add https://github.com/caioribeiroclw-pixel/pluribus --list` |
|
|
34
|
+
| Agent Skill one-shot smoke | `npx --yes skills use https://github.com/caioribeiroclw-pixel/pluribus --skill context-receipts --full-depth` |
|
|
33
35
|
|
|
34
36
|
### Awesome-list Markdown entry
|
|
35
37
|
|
|
@@ -98,6 +100,23 @@ Expected result:
|
|
|
98
100
|
- `sync --dry-run` previews generated context files without writing them.
|
|
99
101
|
- `audit --ci` may exit `1` before generated files are synced; that is expected when outputs are missing or drifted.
|
|
100
102
|
|
|
103
|
+
## 60-second Agent Skill smoke
|
|
104
|
+
|
|
105
|
+
Use this when reviewing Pluribus for Skill directories such as Skills CLI, MCP Market, SkillFish, or Agent Skill Exchange. It proves the repo exposes copyable Skill recipes without requiring the reviewer to install them globally:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
npx --yes skills add https://github.com/caioribeiroclw-pixel/pluribus --list
|
|
109
|
+
npx --yes skills use https://github.com/caioribeiroclw-pixel/pluribus --skill context-receipts --full-depth | sed -n '1,40p'
|
|
110
|
+
npx --yes skills use https://github.com/caioribeiroclw-pixel/pluribus --skill skill-policy-receipts --full-depth | sed -n '1,40p'
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Expected result:
|
|
114
|
+
|
|
115
|
+
- `skills add ... --list` finds `context-receipts` and `skill-policy-receipts`.
|
|
116
|
+
- `skills use ... --skill context-receipts` prints a one-shot Skill prompt for privacy-safe context-boundary receipts.
|
|
117
|
+
- `skills use ... --skill skill-policy-receipts` prints a one-shot Skill prompt for hard-policy receipts before/after writes.
|
|
118
|
+
- No global Skill install is required for this review smoke; it only clones the public repo into the Skills CLI cache and prints the selected Skill body.
|
|
119
|
+
|
|
101
120
|
## 60-second native-vs-fallback smoke
|
|
102
121
|
|
|
103
122
|
Use this when reviewing the fidelity-audit positioning. It demonstrates the difference between a native tool discovery surface and a generic fallback in a clean directory:
|
|
@@ -6,7 +6,7 @@ Privacy-safe receipts for answering a narrow operational question:
|
|
|
6
6
|
|
|
7
7
|
This is different from generic token accounting. A context-budget receipt should prove which context surfaces were available, which ones crossed the boundary, which ones stayed deferred or suppressed, and how much budget remained — without exporting raw prompts, tool schemas, tool outputs, memory bodies, file paths, ticket text, secrets, or customer data.
|
|
8
8
|
|
|
9
|
-
If you want a copyable Agent Skill recipe instead of a spec-style guide, see [`
|
|
9
|
+
If you want a copyable Agent Skill recipe instead of a spec-style guide, see [`skills/context-receipts/`](../skills/context-receipts/). It turns the receipt pattern into a 60-second smoke checklist for Tool Search, skills, and subagent boundaries.
|
|
10
10
|
|
|
11
11
|
## When to use this receipt
|
|
12
12
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# MCP runtime config receipts
|
|
2
|
+
|
|
3
|
+
MCP config review gets noisy when every file that looks like an MCP config is treated as an active permission change. A live `.mcp.json` can change what Claude Code, Cursor, Codex, Windsurf, Zed, or another client can load. A `.mcp.json.template`, `.sample`, `.example`, catalog entry, or disabled profile usually cannot.
|
|
4
|
+
|
|
5
|
+
An MCP runtime config receipt records that boundary without dumping secrets or full config bodies. The question is not "does this repository contain MCP-shaped JSON?" The useful question is:
|
|
6
|
+
|
|
7
|
+
> Can this changed file be loaded by an agent runtime now, and did it change the active tool/command/env permission surface?
|
|
8
|
+
|
|
9
|
+
## Minimal receipt shape
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{
|
|
13
|
+
"schema": "pluribus.mcp_runtime_config_receipt.v1",
|
|
14
|
+
"run_id": "mcp-config-review-2026-06-05T23:00Z",
|
|
15
|
+
"generated_at": "2026-06-05T23:00:00Z",
|
|
16
|
+
"repository_ref": "github:example/app@pull/123",
|
|
17
|
+
"configs": [
|
|
18
|
+
{
|
|
19
|
+
"path": ".mcp.json",
|
|
20
|
+
"client": "claude-code",
|
|
21
|
+
"source_kind": "runtime_config",
|
|
22
|
+
"runtime_active": true,
|
|
23
|
+
"loaded_by": ["claude-code"],
|
|
24
|
+
"change_kind": "server_added",
|
|
25
|
+
"permission_surface_changed": true,
|
|
26
|
+
"sample_config_review": false,
|
|
27
|
+
"should_alert": true,
|
|
28
|
+
"evidence": [
|
|
29
|
+
{ "kind": "config_digest", "ref": "sha256:9a1c..." },
|
|
30
|
+
{ "kind": "client_discovery_rule", "ref": "claude-code:.mcp.json" }
|
|
31
|
+
],
|
|
32
|
+
"redacted_env_keys": {
|
|
33
|
+
"required": ["GITHUB_TOKEN"],
|
|
34
|
+
"present": [],
|
|
35
|
+
"missing": ["GITHUB_TOKEN"]
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Review rule
|
|
43
|
+
|
|
44
|
+
Use the receipt to keep these cases separate:
|
|
45
|
+
|
|
46
|
+
| File/change | Runtime-active? | Default review result |
|
|
47
|
+
| --- | --- | --- |
|
|
48
|
+
| `.mcp.json`, Cursor/Windsurf/Zed/Codex/Claude settings that a client loads | yes | alert when server, command, env, or tool surface changes |
|
|
49
|
+
| `.mcp.json.template`, `.sample`, `.example` | no | quiet by default |
|
|
50
|
+
| disabled profile or catalog example | no | quiet by default |
|
|
51
|
+
| sample/template review explicitly enabled | no | label as `sample_config_review`, not `runtime_permission_drift` |
|
|
52
|
+
|
|
53
|
+
This avoids false positives that teach reviewers to ignore MCP permission checks.
|
|
54
|
+
|
|
55
|
+
## Privacy boundary
|
|
56
|
+
|
|
57
|
+
Do record:
|
|
58
|
+
|
|
59
|
+
- path or reviewed alias;
|
|
60
|
+
- target client/runtime;
|
|
61
|
+
- whether the path is runtime-active;
|
|
62
|
+
- source kind (`runtime_config`, `sample_config`, `disabled_config`, `catalog_example`);
|
|
63
|
+
- change kind (`server_added`, `server_removed`, `command_changed`, `env_changed`, `tools_changed`, `unchanged`);
|
|
64
|
+
- before/after digests or reviewed evidence refs;
|
|
65
|
+
- required/present/missing environment **key names**.
|
|
66
|
+
|
|
67
|
+
Do **not** record:
|
|
68
|
+
|
|
69
|
+
- env values, tokens, API keys, cookies, credentials, or private server URLs;
|
|
70
|
+
- raw full config bodies when a digest is enough;
|
|
71
|
+
- prompts, transcripts, tool outputs, or customer data;
|
|
72
|
+
- local absolute paths unless already safe to reveal in review.
|
|
73
|
+
|
|
74
|
+
## Copyable checker
|
|
75
|
+
|
|
76
|
+
The [MCP runtime config receipt example](../examples/mcp-runtime-config-receipts/) includes a tiny checker that validates the active-vs-template boundary and warns on review noise.
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
node examples/mcp-runtime-config-receipts/check-mcp-runtime-config-receipt.mjs \
|
|
80
|
+
examples/mcp-runtime-config-receipts/mcp-runtime-config-receipt.json
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Expected output:
|
|
84
|
+
|
|
85
|
+
```text
|
|
86
|
+
mcp runtime config receipt ok: 3 configs checked, 1 runtime alert, 0 review-noise warnings
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Where this fits
|
|
90
|
+
|
|
91
|
+
This is adjacent to [MCP tool visibility receipts](mcp-tool-visibility-receipts.md), but it answers an earlier review question. Tool visibility receipts ask why a healthy MCP server did not appear in a client catalog. Runtime config receipts ask whether a changed config file should count as an active permission/config drift event at all.
|
|
@@ -84,4 +84,4 @@ The useful question is: **where did the boundary proof stop?**
|
|
|
84
84
|
|
|
85
85
|
## Try the copyable Skill recipe
|
|
86
86
|
|
|
87
|
-
See [`
|
|
87
|
+
See [`skills/skill-policy-receipts/`](../skills/skill-policy-receipts/) for a small `SKILL.md` recipe you can copy into Claude Code/OpenClaw-style Skill workflows.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Legacy Agent Skill mirrors
|
|
2
|
+
|
|
3
|
+
The canonical Pluribus Agent Skills live in [`/skills`](../../skills/):
|
|
4
|
+
|
|
5
|
+
- [`skills/context-receipts/SKILL.md`](../../skills/context-receipts/SKILL.md)
|
|
6
|
+
- [`skills/skill-policy-receipts/SKILL.md`](../../skills/skill-policy-receipts/SKILL.md)
|
|
7
|
+
|
|
8
|
+
These `examples/agent-skills/*/SKILL.md` files are kept as backwards-compatible mirrors for external skill registries and older links that indexed the original example path before Pluribus adopted the canonical top-level `skills/*/SKILL.md` layout.
|
|
9
|
+
|
|
10
|
+
New directories and package managers should index `/skills` directly.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# MCP audit receipt demo
|
|
2
|
+
|
|
3
|
+
This example validates a privacy-safe audit receipt for MCP `tools/call` activity.
|
|
4
|
+
|
|
5
|
+
Run from any directory after `pluribus-context@latest` is published:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npx --yes pluribus-context@latest demo mcp-audit-receipt
|
|
9
|
+
npx --yes pluribus-context@latest demo mcp-audit-receipt --json
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Or validate your own receipt shape:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npx --yes pluribus-context@latest demo mcp-audit-receipt --receipt ./mcp-audit-receipt.json
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
The point is to split:
|
|
19
|
+
|
|
20
|
+
- **audit events**: correlation IDs, hashed user/token subject, token scopes, tool name, redacted argument shape, status, duration, result shape, and error class;
|
|
21
|
+
- **usage metrics**: low-cardinality counters/histograms such as tool name, status, and token scope;
|
|
22
|
+
- **privacy boundary**: no raw prompts, raw SQL, row data, tokens, tool outputs, or private connection strings in the receipt.
|
|
23
|
+
|
|
24
|
+
This is for MCP server/gateway operators who need to answer: “who invoked which tool, under what scope, and did it succeed?” without dumping sensitive content into logs.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema": "pluribus.mcp_tool_call_audit_receipt.v1",
|
|
3
|
+
"run_id": "mcp-audit-2026-06-06T22:00Z",
|
|
4
|
+
"generated_at": "2026-06-06T22:00:00Z",
|
|
5
|
+
"server": {
|
|
6
|
+
"name": "analytics-mcp",
|
|
7
|
+
"transport": "http",
|
|
8
|
+
"version": "1.4.0"
|
|
9
|
+
},
|
|
10
|
+
"client": {
|
|
11
|
+
"name": "claude-desktop",
|
|
12
|
+
"workspace": "hashed:7f3f0f5f"
|
|
13
|
+
},
|
|
14
|
+
"audit_policy": {
|
|
15
|
+
"raw_arguments": "redacted_shape_only",
|
|
16
|
+
"raw_results": "redacted_shape_only",
|
|
17
|
+
"privacy_boundary": "no prompts, raw SQL, row data, tokens, or private connection strings in receipt"
|
|
18
|
+
},
|
|
19
|
+
"tool_calls": [
|
|
20
|
+
{
|
|
21
|
+
"event": "mcp.tool_call",
|
|
22
|
+
"request_id": "req_01JY6GATEWAY",
|
|
23
|
+
"session_id": "sess_01JY6RUN",
|
|
24
|
+
"user_id_hash": "sha256:0d9b4a1a3f0a6f2fd0dc8aa9d0c6f2b7a35f4f5d0b2a4d3e1e04a8b4b6b2e5d8",
|
|
25
|
+
"token_subject_hash": "sha256:6aaf4a3b4d10c45c8fd2cb4f3c73b8cde42bb62779b7e1c6a2c5e0dd8d78f4a1",
|
|
26
|
+
"token_scopes": ["database:read", "query:run"],
|
|
27
|
+
"tool_name": "query_database",
|
|
28
|
+
"args_shape": {
|
|
29
|
+
"database_id": "number",
|
|
30
|
+
"query": "redacted_sql",
|
|
31
|
+
"limit": "number"
|
|
32
|
+
},
|
|
33
|
+
"status": "ok",
|
|
34
|
+
"duration_ms": 184,
|
|
35
|
+
"result_shape": "rows:12 columns:4",
|
|
36
|
+
"error_class": null
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"event": "mcp.tool_call",
|
|
40
|
+
"request_id": "req_01JY6DENIED",
|
|
41
|
+
"session_id": "sess_01JY6RUN",
|
|
42
|
+
"user_id_hash": "sha256:0d9b4a1a3f0a6f2fd0dc8aa9d0c6f2b7a35f4f5d0b2a4d3e1e04a8b4b6b2e5d8",
|
|
43
|
+
"token_subject_hash": "sha256:6aaf4a3b4d10c45c8fd2cb4f3c73b8cde42bb62779b7e1c6a2c5e0dd8d78f4a1",
|
|
44
|
+
"token_scopes": ["database:read"],
|
|
45
|
+
"tool_name": "update_dashboard",
|
|
46
|
+
"args_shape": {
|
|
47
|
+
"dashboard_id": "number",
|
|
48
|
+
"mutation": "redacted_object"
|
|
49
|
+
},
|
|
50
|
+
"status": "denied",
|
|
51
|
+
"duration_ms": 12,
|
|
52
|
+
"result_shape": "policy_denial",
|
|
53
|
+
"error_class": "insufficient_scope"
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
"usage_metrics": [
|
|
57
|
+
{
|
|
58
|
+
"name": "mcp_tool_calls_total",
|
|
59
|
+
"type": "counter",
|
|
60
|
+
"value": "1",
|
|
61
|
+
"labels": ["tool_name", "status", "token_scope"]
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"name": "mcp_tool_call_duration_ms",
|
|
65
|
+
"type": "histogram",
|
|
66
|
+
"value": "184",
|
|
67
|
+
"labels": ["tool_name", "status"]
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# MCP runtime config receipts
|
|
2
|
+
|
|
3
|
+
This example validates the live-vs-template boundary for MCP config review.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
node check-mcp-runtime-config-receipt.mjs mcp-runtime-config-receipt.json
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Expected output:
|
|
10
|
+
|
|
11
|
+
```text
|
|
12
|
+
mcp runtime config receipt ok: 3 configs checked, 1 runtime alert, 0 review-noise warnings
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
The alert is intentional: the live `.mcp.json` changes what Claude Code can load. The template and disabled config are quiet by default because they are not runtime-active.
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'node:fs'
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
|
|
5
|
+
const receiptPath = process.argv[2] || path.join(import.meta.dirname, 'mcp-runtime-config-receipt.json')
|
|
6
|
+
const receipt = JSON.parse(fs.readFileSync(receiptPath, 'utf8'))
|
|
7
|
+
const errors = []
|
|
8
|
+
const warnings = []
|
|
9
|
+
let runtimeAlerts = 0
|
|
10
|
+
|
|
11
|
+
function fieldName(prefix, field) {
|
|
12
|
+
return prefix ? `${prefix}.${field}` : field
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function requireString(value, field) {
|
|
16
|
+
if (typeof value !== 'string' || value.trim() === '') {
|
|
17
|
+
errors.push(`${field} must be a non-empty string`)
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function requireBoolean(value, field) {
|
|
22
|
+
if (typeof value !== 'boolean') {
|
|
23
|
+
errors.push(`${field} must be boolean`)
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function requireArray(value, field) {
|
|
28
|
+
if (!Array.isArray(value)) {
|
|
29
|
+
errors.push(`${field} must be an array`)
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function requireStringArray(value, field) {
|
|
34
|
+
requireArray(value, field)
|
|
35
|
+
for (const [index, item] of (value || []).entries()) {
|
|
36
|
+
if (typeof item !== 'string' || item.trim() === '') {
|
|
37
|
+
errors.push(`${field}[${index}] must be a non-empty string`)
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const runtimeKinds = new Set(['runtime_config'])
|
|
43
|
+
const inactiveKinds = new Set(['sample_config', 'disabled_config', 'catalog_example'])
|
|
44
|
+
const allowedKinds = new Set([...runtimeKinds, ...inactiveKinds])
|
|
45
|
+
const allowedChanges = new Set(['server_added', 'server_removed', 'command_changed', 'env_changed', 'tools_changed', 'unchanged'])
|
|
46
|
+
|
|
47
|
+
if (receipt.schema !== 'pluribus.mcp_runtime_config_receipt.v1') {
|
|
48
|
+
errors.push('schema must be pluribus.mcp_runtime_config_receipt.v1')
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
requireString(receipt.run_id, 'run_id')
|
|
52
|
+
requireString(receipt.generated_at, 'generated_at')
|
|
53
|
+
requireString(receipt.repository_ref, 'repository_ref')
|
|
54
|
+
|
|
55
|
+
if (!Array.isArray(receipt.configs) || receipt.configs.length === 0) {
|
|
56
|
+
errors.push('configs must be a non-empty array')
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
for (const [index, config] of (receipt.configs || []).entries()) {
|
|
60
|
+
const prefix = `configs[${index}]`
|
|
61
|
+
requireString(config.path, fieldName(prefix, 'path'))
|
|
62
|
+
requireString(config.client, fieldName(prefix, 'client'))
|
|
63
|
+
requireString(config.source_kind, fieldName(prefix, 'source_kind'))
|
|
64
|
+
requireString(config.change_kind, fieldName(prefix, 'change_kind'))
|
|
65
|
+
requireBoolean(config.runtime_active, fieldName(prefix, 'runtime_active'))
|
|
66
|
+
requireBoolean(config.permission_surface_changed, fieldName(prefix, 'permission_surface_changed'))
|
|
67
|
+
requireBoolean(config.sample_config_review, fieldName(prefix, 'sample_config_review'))
|
|
68
|
+
requireBoolean(config.should_alert, fieldName(prefix, 'should_alert'))
|
|
69
|
+
requireStringArray(config.loaded_by, fieldName(prefix, 'loaded_by'))
|
|
70
|
+
|
|
71
|
+
if (!allowedKinds.has(config.source_kind)) {
|
|
72
|
+
errors.push(`${prefix}.source_kind must be one of ${[...allowedKinds].join(', ')}`)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (!allowedChanges.has(config.change_kind)) {
|
|
76
|
+
errors.push(`${prefix}.change_kind must be one of ${[...allowedChanges].join(', ')}`)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (runtimeKinds.has(config.source_kind) && config.runtime_active !== true) {
|
|
80
|
+
errors.push(`${prefix}.runtime_active must be true for runtime_config`)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (inactiveKinds.has(config.source_kind) && config.runtime_active !== false) {
|
|
84
|
+
errors.push(`${prefix}.runtime_active must be false for ${config.source_kind}`)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (config.runtime_active && (!Array.isArray(config.loaded_by) || config.loaded_by.length === 0)) {
|
|
88
|
+
errors.push(`${prefix}.loaded_by must name at least one client when runtime_active is true`)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (config.runtime_active && config.permission_surface_changed && config.change_kind !== 'unchanged') {
|
|
92
|
+
runtimeAlerts += 1
|
|
93
|
+
if (config.should_alert !== true) {
|
|
94
|
+
errors.push(`${prefix}.should_alert must be true when an active runtime permission surface changed`)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (!config.runtime_active && config.should_alert && !config.sample_config_review) {
|
|
99
|
+
warnings.push(`${config.path || prefix} alerts even though it is not runtime-active; use sample_config_review or suppress as template noise`)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (!Array.isArray(config.evidence) || config.evidence.length === 0) {
|
|
103
|
+
errors.push(`${prefix}.evidence must include at least one privacy-safe evidence ref`)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
for (const [evidenceIndex, evidence] of (config.evidence || []).entries()) {
|
|
107
|
+
const evidencePrefix = `${prefix}.evidence[${evidenceIndex}]`
|
|
108
|
+
requireString(evidence.kind, fieldName(evidencePrefix, 'kind'))
|
|
109
|
+
requireString(evidence.ref, fieldName(evidencePrefix, 'ref'))
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const envKeys = config.redacted_env_keys || {}
|
|
113
|
+
requireStringArray(envKeys.required, fieldName(prefix, 'redacted_env_keys.required'))
|
|
114
|
+
requireStringArray(envKeys.present, fieldName(prefix, 'redacted_env_keys.present'))
|
|
115
|
+
requireStringArray(envKeys.missing, fieldName(prefix, 'redacted_env_keys.missing'))
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (errors.length > 0) {
|
|
119
|
+
console.error('mcp runtime config receipt invalid:')
|
|
120
|
+
for (const error of errors) console.error(`- ${error}`)
|
|
121
|
+
process.exit(1)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const warningLabel = warnings.length === 1 ? 'warning' : 'warnings'
|
|
125
|
+
const alertLabel = runtimeAlerts === 1 ? 'alert' : 'alerts'
|
|
126
|
+
console.log(`mcp runtime config receipt ok: ${receipt.configs.length} configs checked, ${runtimeAlerts} runtime ${alertLabel}, ${warnings.length} review-noise ${warningLabel}`)
|
|
127
|
+
for (const warning of warnings) console.log(`- ${warning}`)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema": "pluribus.mcp_runtime_config_receipt.v1",
|
|
3
|
+
"run_id": "mcp-config-review-2026-06-05T23:00Z",
|
|
4
|
+
"generated_at": "2026-06-05T23:00:00Z",
|
|
5
|
+
"repository_ref": "github:example/app@pull/123",
|
|
6
|
+
"configs": [
|
|
7
|
+
{
|
|
8
|
+
"path": ".mcp.json",
|
|
9
|
+
"client": "claude-code",
|
|
10
|
+
"source_kind": "runtime_config",
|
|
11
|
+
"runtime_active": true,
|
|
12
|
+
"loaded_by": ["claude-code"],
|
|
13
|
+
"change_kind": "server_added",
|
|
14
|
+
"permission_surface_changed": true,
|
|
15
|
+
"sample_config_review": false,
|
|
16
|
+
"should_alert": true,
|
|
17
|
+
"evidence": [
|
|
18
|
+
{
|
|
19
|
+
"kind": "config_digest",
|
|
20
|
+
"ref": "sha256:9a1c4b7d4f1a"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"kind": "client_discovery_rule",
|
|
24
|
+
"ref": "claude-code:.mcp.json"
|
|
25
|
+
}
|
|
26
|
+
],
|
|
27
|
+
"redacted_env_keys": {
|
|
28
|
+
"required": ["GITHUB_TOKEN"],
|
|
29
|
+
"present": [],
|
|
30
|
+
"missing": ["GITHUB_TOKEN"]
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"path": ".mcp.json.template",
|
|
35
|
+
"client": "claude-code",
|
|
36
|
+
"source_kind": "sample_config",
|
|
37
|
+
"runtime_active": false,
|
|
38
|
+
"loaded_by": [],
|
|
39
|
+
"change_kind": "server_added",
|
|
40
|
+
"permission_surface_changed": true,
|
|
41
|
+
"sample_config_review": false,
|
|
42
|
+
"should_alert": false,
|
|
43
|
+
"evidence": [
|
|
44
|
+
{
|
|
45
|
+
"kind": "config_digest",
|
|
46
|
+
"ref": "sha256:2e76a9c103ab"
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"redacted_env_keys": {
|
|
50
|
+
"required": ["EXAMPLE_API_KEY"],
|
|
51
|
+
"present": [],
|
|
52
|
+
"missing": ["EXAMPLE_API_KEY"]
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"path": ".cursor/mcp.disabled.json",
|
|
57
|
+
"client": "cursor",
|
|
58
|
+
"source_kind": "disabled_config",
|
|
59
|
+
"runtime_active": false,
|
|
60
|
+
"loaded_by": [],
|
|
61
|
+
"change_kind": "command_changed",
|
|
62
|
+
"permission_surface_changed": true,
|
|
63
|
+
"sample_config_review": false,
|
|
64
|
+
"should_alert": false,
|
|
65
|
+
"evidence": [
|
|
66
|
+
{
|
|
67
|
+
"kind": "config_digest",
|
|
68
|
+
"ref": "sha256:66f3ec00a12b"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"kind": "disabled_profile_marker",
|
|
72
|
+
"ref": "filename:mcp.disabled.json"
|
|
73
|
+
}
|
|
74
|
+
],
|
|
75
|
+
"redacted_env_keys": {
|
|
76
|
+
"required": [],
|
|
77
|
+
"present": [],
|
|
78
|
+
"missing": []
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pluribus-context",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.38",
|
|
4
4
|
"description": "AI context and rules sync CLI for Claude.md, Claude Code, Cursor, and Copilot instructions, with privacy-safe context receipts that prove what memory, tools, skills, compactions, and security findings crossed agent boundaries without logging raw content.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"homepage": "https://github.com/caioribeiroclw-pixel/pluribus#readme",
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
"spec/",
|
|
23
23
|
"schemas/",
|
|
24
24
|
"examples/",
|
|
25
|
+
"skills/",
|
|
25
26
|
"CHANGELOG.md"
|
|
26
27
|
],
|
|
27
28
|
"scripts": {
|
|
@@ -19,10 +19,10 @@ grep -E 'raw_(schema|query|args|result|output|transcript|text)_copied":false|raw
|
|
|
19
19
|
|
|
20
20
|
Then manually check that the receipt contains counts, hashes, ids, buckets, and `audit_gap`, but does **not** contain private prompts, raw schemas, tool args/results, skill bodies, memory bodies, customer names, secrets, or transcript text.
|
|
21
21
|
|
|
22
|
-
For executable fixture examples, see [`../../context-input-evidence/`](../../context-input-evidence/), including the ToolSearch propagation, pruning, and compaction transaction smokes:
|
|
22
|
+
For executable fixture examples, see [`../../examples/context-input-evidence/`](../../examples/context-input-evidence/), including the ToolSearch propagation, pruning, and compaction transaction smokes:
|
|
23
23
|
|
|
24
24
|
```bash
|
|
25
|
-
node ../../context-input-evidence/convert-subagent-toolsearch-propagation-log.mjs
|
|
26
|
-
node ../../context-input-evidence/convert-pruning-log.mjs
|
|
27
|
-
node ../../context-input-evidence/convert-compaction-transaction-log.mjs
|
|
25
|
+
node ../../examples/context-input-evidence/convert-subagent-toolsearch-propagation-log.mjs
|
|
26
|
+
node ../../examples/context-input-evidence/convert-pruning-log.mjs
|
|
27
|
+
node ../../examples/context-input-evidence/convert-compaction-transaction-log.mjs
|
|
28
28
|
```
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: context-receipts
|
|
3
|
+
description: Emit privacy-safe receipts for context selection, deferral, hydration, compaction, pruning, delegation, usage attribution, and boundary handoffs.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Context Receipts
|
|
7
|
+
|
|
8
|
+
Use this skill when an agent workflow claims to save context by selecting, deferring, hydrating, summarizing, compacting, pruning, delegating, attributing usage, or isolating context.
|
|
9
|
+
|
|
10
|
+
The job is not to log the private content. The job is to emit a small receipt that lets a reviewer answer:
|
|
11
|
+
|
|
12
|
+
> what crossed the context boundary, what stayed out, and what audit gap remains?
|
|
13
|
+
|
|
14
|
+
## Privacy defaults
|
|
15
|
+
|
|
16
|
+
Never include raw prompts, raw tool schemas, raw tool arguments, raw tool results, raw skill bodies, memory bodies, secrets, customer names, or full transcripts in the receipt.
|
|
17
|
+
|
|
18
|
+
Prefer:
|
|
19
|
+
|
|
20
|
+
- stable ids or hashed ids;
|
|
21
|
+
- counts and token/line buckets;
|
|
22
|
+
- categorical reasons;
|
|
23
|
+
- explicit booleans for raw content copied/not copied;
|
|
24
|
+
- before/after context budget buckets;
|
|
25
|
+
- an `audit_gap` field when the receipt proves routing but not semantic correctness.
|
|
26
|
+
|
|
27
|
+
## 60-second Tool Search smoke
|
|
28
|
+
|
|
29
|
+
For MCP Tool Search, lazy tool loading, or progressive disclosure, emit enough evidence to answer these seven checks:
|
|
30
|
+
|
|
31
|
+
1. **Index-only startup:** did the session load a compact tool/server index instead of all full schemas?
|
|
32
|
+
2. **Search/routing:** what hashed query/category or routing reason selected candidate tools?
|
|
33
|
+
3. **Hydration:** which full tool definition was loaded, why, and how many definitions stayed suppressed?
|
|
34
|
+
4. **Call:** which server/tool id was invoked, with argument/result redaction status and success/error status?
|
|
35
|
+
5. **Boundary:** if a manager subagent or child agent was used, did raw child output return to the parent?
|
|
36
|
+
6. **Budget:** what were the startup and post-hydration context-token buckets?
|
|
37
|
+
7. **Audit gap:** what is not proven, such as whether the selected tool was semantically optimal?
|
|
38
|
+
|
|
39
|
+
Minimal JSONL event names:
|
|
40
|
+
|
|
41
|
+
```jsonl
|
|
42
|
+
{"event":"mcp.tool_index.loaded","loaded_server_count":12,"loaded_tool_index_count":84,"full_schema_count":0,"suppressed_tool_count":84,"raw_schema_copied":false,"startup_token_bucket":"lt_1k"}
|
|
43
|
+
{"event":"mcp.tool_search.performed","query_hash":"sha256:...","query_category":"repo_search","candidate_tool_count":5,"selected_tool_id":"github.search_code","raw_query_copied":false}
|
|
44
|
+
{"event":"mcp.tool_definition.loaded","tool_id":"github.search_code","hydrate_reason":"selected_after_tool_search","suppressed_tool_count":83,"definition_token_bucket":"1k_2k","raw_schema_copied":false}
|
|
45
|
+
{"event":"mcp.tool_call.completed","tool_id":"github.search_code","args_hash":"sha256:...","result_token_bucket":"2k_4k","raw_args_copied":false,"raw_result_copied":false,"status":"ok"}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Skill / prompt context smoke
|
|
49
|
+
|
|
50
|
+
For skills, rules, AGENTS.md overlays, or instruction files, answer:
|
|
51
|
+
|
|
52
|
+
- which index/listing entered the session;
|
|
53
|
+
- which full skill/rule/instruction body was selected;
|
|
54
|
+
- which candidates were suppressed and why;
|
|
55
|
+
- whether the body was loaded at session start, after a search, or after an explicit command;
|
|
56
|
+
- source hash, delivered hash, and canonical form when available;
|
|
57
|
+
- whether the skill/instruction text was copied into the receipt.
|
|
58
|
+
|
|
59
|
+
Minimal event names:
|
|
60
|
+
|
|
61
|
+
- `context.skill.registry.index.loaded`
|
|
62
|
+
- `context.skill.registry.skill.read`
|
|
63
|
+
- `context.skill.registry.skill.injected`
|
|
64
|
+
- `context.input.loaded`
|
|
65
|
+
- `context.input.candidate_suppressed`
|
|
66
|
+
|
|
67
|
+
## Per-agent MCP injection smoke
|
|
68
|
+
|
|
69
|
+
For role-specific subagents or per-agent MCP configs, prove the policy boundary before debugging model quality:
|
|
70
|
+
|
|
71
|
+
- which subagent role/session requested tools;
|
|
72
|
+
- which MCP servers were available to that role;
|
|
73
|
+
- which servers were explicitly excluded before boot;
|
|
74
|
+
- whether startup loaded full schemas or only a compact index;
|
|
75
|
+
- how many tool definitions stayed deferred/suppressed; and
|
|
76
|
+
- the startup token bucket after policy was applied.
|
|
77
|
+
|
|
78
|
+
Minimal JSONL event names:
|
|
79
|
+
|
|
80
|
+
```jsonl
|
|
81
|
+
{"event":"subagent.mcp_policy.applied","subagent_role":"testing","available_server_count":2,"available_servers_hash":"sha256:...","excluded_server_count":5,"excluded_servers_hash":"sha256:...","policy_source":"role_config","raw_server_names_copied":false}
|
|
82
|
+
{"event":"subagent.context_boot.evaluated","subagent_role":"testing","loaded_tool_definition_count":0,"deferred_tool_definition_count":48,"startup_token_bucket":"50k_75k","raw_schema_copied":false,"audit_gap":"proves injection boundary, not tool relevance"}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## ToolSearch propagation smoke
|
|
86
|
+
|
|
87
|
+
For subagents that should inherit MCP through `ToolSearch`, distinguish policy, declaration, and runtime filtering:
|
|
88
|
+
|
|
89
|
+
- did the parent/orchestrator intend to expose MCP or exclude it for this subagent?
|
|
90
|
+
- was the subagent spawned immediately or after parent tool calls/orchestration work?
|
|
91
|
+
- was the `tools:` declaration wildcard, explicit include, or exclusion style?
|
|
92
|
+
- was `ToolSearch` declared and was it actually exposed in the subagent tool surface?
|
|
93
|
+
- did MCP servers/tool definitions stay deferred, or did the channel collapse to zero?
|
|
94
|
+
- was the agent registry loaded at session boot, making newly added agent files invisible until restart?
|
|
95
|
+
|
|
96
|
+
Minimal JSONL event names:
|
|
97
|
+
|
|
98
|
+
```jsonl
|
|
99
|
+
{"event":"subagent.toolsearch.propagation.evaluated","spawn_path":"Task","tools_declaration_shape":"enumerated_include","toolsearch_declared":false,"toolsearch_exposed":false,"mcp_servers_available_bucket":"0","deferred_tool_definitions_bucket":"0","filtered_by":"frontmatter_tools_policy_or_runtime_filter","raw_tool_schemas_copied":false}
|
|
100
|
+
{"event":"subagent.toolsearch.matrix.completed","tested_axis":"tools_frontmatter_shape","audit_gap":"proves ToolSearch exposure, not semantic tool relevance or runtime call success"}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Retrieval / code-search smoke
|
|
104
|
+
|
|
105
|
+
For semantic code search, repo RAG, or MCP tools such as Claude Context, separate "search returned" from "agent context loaded":
|
|
106
|
+
|
|
107
|
+
- which index snapshot/version was used, without raw local codebase paths;
|
|
108
|
+
- what query/category/filter identity selected the candidates, without raw query text;
|
|
109
|
+
- which result ids/chunk hashes were returned, with rank, score bucket, stale flag, duplicate marker, path hash/extension, and range bucket;
|
|
110
|
+
- which returned chunks were actually loaded into the agent context;
|
|
111
|
+
- which chunks were suppressed as duplicate, stale, clipped, policy-blocked, or over budget;
|
|
112
|
+
- whether raw code, raw prompts, raw paths, customer names, URLs, secrets, and ticket text stayed out of the receipt;
|
|
113
|
+
- the audit gap: this proves retrieval/loading boundaries, not semantic answer quality.
|
|
114
|
+
|
|
115
|
+
Minimal JSONL event names:
|
|
116
|
+
|
|
117
|
+
```jsonl
|
|
118
|
+
{"event":"code.index.snapshot.used","snapshot_id_hash":"sha256:...","codebase_path_hash":"sha256:...","indexed_chunk_count_bucket":"over_1k","raw_codebase_path_copied":false}
|
|
119
|
+
{"event":"code.search.performed","query_hash":"sha256:...","query_category":"auth_debug","candidate_count_bucket":"over_1k","raw_query_copied":false}
|
|
120
|
+
{"event":"code.search.result.returned","rank":1,"chunk_id_hash":"sha256:...","chunk_text_hash":"sha256:...","path_hash":"sha256:...","score_bucket":"high","stale":false,"raw_code_copied":false}
|
|
121
|
+
{"event":"context.input.loaded","kind":"retrieved_code_chunks","loaded_chunk_count":3,"suppressed_chunk_count":2,"suppression_reasons":["duplicate","stale_snapshot_chunk"],"raw_code_copied":false}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Usage attribution smoke
|
|
125
|
+
|
|
126
|
+
For `/usage`, `/context`, `/doctor`, or other context-budget breakdowns, map each displayed category to evidence that can be reviewed without exposing private content:
|
|
127
|
+
|
|
128
|
+
- what measurement window was used;
|
|
129
|
+
- which categories were attributed, such as skills, subagents, plugins, MCP servers, rules, memory, or project files;
|
|
130
|
+
- which components were loaded, deferred, hydrated, suppressed, pruned, or rolled back;
|
|
131
|
+
- before/after or current token/cost buckets by category;
|
|
132
|
+
- whether raw skill bodies, prompts, MCP schemas, tool outputs, and file paths were excluded;
|
|
133
|
+
- the remaining audit gap, such as not proving semantic usefulness of a high-cost component.
|
|
134
|
+
|
|
135
|
+
Minimal JSONL event names:
|
|
136
|
+
|
|
137
|
+
```jsonl
|
|
138
|
+
{"event":"context.usage.window.measured","window":"current_session","total_token_bucket":"100k_150k","raw_prompts_copied":false}
|
|
139
|
+
{"event":"context.usage.category.attributed","category":"mcp_server","component_hash":"sha256:...","loaded_token_bucket":"10k_25k","deferred_definition_count":42,"hydrated_definition_count":3,"raw_schema_copied":false}
|
|
140
|
+
{"event":"context.usage.breakdown.completed","categories":["skills","subagents","plugins","mcp_server"],"audit_gap":"proves attribution buckets, not whether each component was necessary"}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Pruning / compaction smoke
|
|
144
|
+
|
|
145
|
+
For context-cleaning, pruning, compaction, or doctor/guard tools, answer:
|
|
146
|
+
|
|
147
|
+
- what prescription/trigger started the run;
|
|
148
|
+
- which strategies changed context and which candidates were protected;
|
|
149
|
+
- before/after token and byte buckets;
|
|
150
|
+
- whether summaries, behavioral digests, team messages, and backups were preserved;
|
|
151
|
+
- whether private transcript text, raw tool output, file paths, secrets, and customer text were excluded from the receipt;
|
|
152
|
+
- the remaining audit gap, such as not proving semantic quality of the pruned text.
|
|
153
|
+
|
|
154
|
+
Minimal JSONL event names:
|
|
155
|
+
|
|
156
|
+
```jsonl
|
|
157
|
+
{"event":"context.prune.started","prescription":"balanced","trigger":"manual_dry_run","before_token_bucket":"150k_200k","raw_transcript_copied":false}
|
|
158
|
+
{"event":"context.prune.strategy.evaluated","strategy":"tool-output-trim","candidate_bucket":"10_25","changed_bucket":"5_10","protected_bucket":"1_5","raw_tool_output_copied":false}
|
|
159
|
+
{"event":"context.prune.completed","after_token_bucket":"75k_100k","backup_verified":true,"protected_summary_count":2,"raw_text_copied":false,"audit_gap":"proves pruning/protection counts, not semantic disposability"}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
For failed compaction, also prove transaction safety:
|
|
163
|
+
|
|
164
|
+
- did the summary call succeed, fail, or timeout;
|
|
165
|
+
- was a candidate summary validated before any swap;
|
|
166
|
+
- did the harness commit a context swap or preserve the original context;
|
|
167
|
+
- were deferred-tool registries and system-reminder queues restored on rollback;
|
|
168
|
+
- did stale system reminders/tool results replay as fresh state;
|
|
169
|
+
- was post-token metadata recorded as success even though summary failed.
|
|
170
|
+
|
|
171
|
+
Minimal JSONL event names:
|
|
172
|
+
|
|
173
|
+
```jsonl
|
|
174
|
+
{"event":"context.compaction.summary.attempted","summary_call_status":"failed_rate_limited","candidate_summary_available":false,"raw_error_copied":false}
|
|
175
|
+
{"event":"context.compaction.rollback.completed","swap_committed":false,"original_context_preserved":true,"deferred_tool_registry_restored":true,"system_reminder_queue_restored":true,"replayed_system_reminder_count":0}
|
|
176
|
+
{"event":"context.compaction.transaction.completed","status":"rolled_back","authoritative_state":"pre_compaction_context","post_tokens_recorded_as_success":false,"raw_context_copied":false}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Subagent / manager boundary smoke
|
|
180
|
+
|
|
181
|
+
For subagents, manager agents, or child workers, answer:
|
|
182
|
+
|
|
183
|
+
- what task was delegated, by category and hashed objective;
|
|
184
|
+
- what large output was captured by the child, as line/token buckets;
|
|
185
|
+
- what bounded summary returned to the parent;
|
|
186
|
+
- whether raw child output, tool results, or MCP schemas entered the parent context;
|
|
187
|
+
- the remaining audit gap.
|
|
188
|
+
|
|
189
|
+
Minimal event names:
|
|
190
|
+
|
|
191
|
+
- `subagent.delegation.requested`
|
|
192
|
+
- `subagent.tool_output.captured`
|
|
193
|
+
- `subagent.summary.returned`
|
|
194
|
+
- `parent.context_budget.evaluated`
|
|
195
|
+
|
|
196
|
+
## Good receipt test
|
|
197
|
+
|
|
198
|
+
A receipt is useful if a maintainer can debug one of these failures without seeing private content:
|
|
199
|
+
|
|
200
|
+
- the agent never found the right tool/skill;
|
|
201
|
+
- the full definition loaded too early;
|
|
202
|
+
- too many definitions stayed in context;
|
|
203
|
+
- a child/subagent saved no budget because raw output returned to the parent;
|
|
204
|
+
- compaction/pruning happened but no one can prove what was changed, protected, backed up, summarized, or dropped.
|
|
205
|
+
|
|
206
|
+
A receipt is not enough if it only says “Tool Search enabled” or “used subagent”. It must prove the boundary behavior.
|
|
@@ -19,4 +19,4 @@ The receipt should prove:
|
|
|
19
19
|
- post-write guard passed or failed;
|
|
20
20
|
- no raw prompt, code, secret, customer data, stack trace, or full transcript was logged.
|
|
21
21
|
|
|
22
|
-
Related guide: [`docs/skill-policy-receipts.md`](
|
|
22
|
+
Related guide: [`docs/skill-policy-receipts.md`](../../docs/skill-policy-receipts.md).
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: skill-policy-receipts
|
|
3
|
+
description: Use when a task must obey a hard project policy, such as "do not generate tests for internal services", "do not call production APIs", or "do not edit generated files". Emits a privacy-safe receipt before writes and after guard checks.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Skill Policy Receipts
|
|
7
|
+
|
|
8
|
+
This Skill turns natural-language guardrails into an inspectable policy receipt.
|
|
9
|
+
|
|
10
|
+
## Preflight: decide before writing
|
|
11
|
+
|
|
12
|
+
Before creating or editing files:
|
|
13
|
+
|
|
14
|
+
1. List intended targets using coarse paths or globs.
|
|
15
|
+
2. For each target, decide `allowed` or `refused`.
|
|
16
|
+
3. Give a short reason.
|
|
17
|
+
4. If any target is refused, stop before writing.
|
|
18
|
+
5. Emit a receipt with `write_started=false` and `stopped_at="policy_refused"`.
|
|
19
|
+
|
|
20
|
+
Receipt shape:
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"receipt_type": "skill.policy.v1",
|
|
25
|
+
"skill": "skill-policy-receipts",
|
|
26
|
+
"policy_scope": "<short policy name>",
|
|
27
|
+
"targets": [
|
|
28
|
+
{
|
|
29
|
+
"target": "<coarse path or glob>",
|
|
30
|
+
"decision": "allowed|refused",
|
|
31
|
+
"reason": "<short reason>"
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"write_started": false,
|
|
35
|
+
"post_write_guard": "not_run",
|
|
36
|
+
"stopped_at": "policy_refused|all_targets_allowed"
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Do not include raw prompts, code, secrets, customer data, stack traces, or full tool output.
|
|
41
|
+
|
|
42
|
+
## Write only after all targets are allowed
|
|
43
|
+
|
|
44
|
+
If every target is allowed:
|
|
45
|
+
|
|
46
|
+
1. Emit or state `stopped_at="all_targets_allowed"`.
|
|
47
|
+
2. Perform the write.
|
|
48
|
+
3. Run the configured post-write guard.
|
|
49
|
+
4. Emit whether the guard passed or failed.
|
|
50
|
+
|
|
51
|
+
Post-write receipt shape:
|
|
52
|
+
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"receipt_type": "skill.policy.v1",
|
|
56
|
+
"skill": "skill-policy-receipts",
|
|
57
|
+
"policy_scope": "<short policy name>",
|
|
58
|
+
"write_started": true,
|
|
59
|
+
"post_write_guard": "passed|failed|not_configured",
|
|
60
|
+
"stopped_at": "guard_passed|guard_failed"
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Example policy: no internal-service unit tests
|
|
65
|
+
|
|
66
|
+
Policy:
|
|
67
|
+
|
|
68
|
+
> Do not generate unit tests for internal services. If the requested test imports `internal/`, `@/internal`, or a known private service module, refuse before writing and explain the safer target.
|
|
69
|
+
|
|
70
|
+
Example guard:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
grep -R "from ['\"]\.\./\.\./internal\|from ['\"]@/internal\|require(['\"]@/internal" \
|
|
74
|
+
-- '*test.*' '*spec.*'
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
If the grep finds a match in generated tests, stop and report `post_write_guard="failed"`.
|
package/src/commands/demo.js
CHANGED
|
@@ -7,7 +7,11 @@ import * as path from 'path'
|
|
|
7
7
|
import { fileURLToPath } from 'url'
|
|
8
8
|
|
|
9
9
|
const DEFAULT_DEMO = 'skill-use-rate'
|
|
10
|
+
const SKILL_USE_RATE_DEMO = 'skill-use-rate'
|
|
11
|
+
const MCP_AUDIT_RECEIPT_DEMO = 'mcp-audit-receipt'
|
|
12
|
+
const AVAILABLE_DEMOS = [SKILL_USE_RATE_DEMO, MCP_AUDIT_RECEIPT_DEMO]
|
|
10
13
|
const SKILL_USE_RATE_SCHEMA = 'pluribus.skill_use_rate_receipt.v1'
|
|
14
|
+
const MCP_AUDIT_RECEIPT_SCHEMA = 'pluribus.mcp_tool_call_audit_receipt.v1'
|
|
11
15
|
|
|
12
16
|
/**
|
|
13
17
|
* @param {Record<string, string | boolean>} args
|
|
@@ -16,29 +20,42 @@ const SKILL_USE_RATE_SCHEMA = 'pluribus.skill_use_rate_receipt.v1'
|
|
|
16
20
|
export async function runDemo(args, positional = []) {
|
|
17
21
|
const demoName = positional[0] || DEFAULT_DEMO
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
+
switch (demoName) {
|
|
24
|
+
case SKILL_USE_RATE_DEMO:
|
|
25
|
+
return runSkillUseRateDemo(args)
|
|
26
|
+
case MCP_AUDIT_RECEIPT_DEMO:
|
|
27
|
+
return runMcpAuditReceiptDemo(args)
|
|
28
|
+
default:
|
|
29
|
+
console.error(`❌ Unknown demo: ${demoName}`)
|
|
30
|
+
console.error(` Available demos: ${AVAILABLE_DEMOS.join(', ')}`)
|
|
31
|
+
process.exit(1)
|
|
23
32
|
}
|
|
33
|
+
}
|
|
24
34
|
|
|
25
|
-
|
|
26
|
-
? path.resolve(process.cwd(), args.receipt)
|
|
27
|
-
: bundledSkillUseRateReceiptPath()
|
|
28
|
-
|
|
29
|
-
let receipt
|
|
35
|
+
function readReceipt(receiptPath, label) {
|
|
30
36
|
try {
|
|
31
|
-
|
|
37
|
+
return JSON.parse(fs.readFileSync(receiptPath, 'utf8'))
|
|
32
38
|
} catch (err) {
|
|
33
|
-
console.error(`❌ Could not read
|
|
39
|
+
console.error(`❌ Could not read ${label} receipt at ${receiptPath}: ${err.message}`)
|
|
34
40
|
process.exit(1)
|
|
35
41
|
}
|
|
42
|
+
}
|
|
36
43
|
|
|
44
|
+
function selectedReceiptPath(args, defaultPath) {
|
|
45
|
+
return typeof args.receipt === 'string' && args.receipt.trim()
|
|
46
|
+
? path.resolve(process.cwd(), args.receipt)
|
|
47
|
+
: defaultPath
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function runSkillUseRateDemo(args) {
|
|
51
|
+
const receiptPath = selectedReceiptPath(args, bundledSkillUseRateReceiptPath())
|
|
52
|
+
const receipt = readReceipt(receiptPath, 'skill use-rate')
|
|
37
53
|
const result = validateSkillUseRateReceipt(receipt)
|
|
54
|
+
|
|
38
55
|
if (Boolean(args.json)) {
|
|
39
56
|
console.log(JSON.stringify({
|
|
40
57
|
ok: result.errors.length === 0,
|
|
41
|
-
demo:
|
|
58
|
+
demo: SKILL_USE_RATE_DEMO,
|
|
42
59
|
receipt: path.relative(process.cwd(), receiptPath) || receiptPath,
|
|
43
60
|
summary: result.summary,
|
|
44
61
|
warnings: result.warnings,
|
|
@@ -65,10 +82,48 @@ export async function runDemo(args, positional = []) {
|
|
|
65
82
|
if (result.errors.length > 0) process.exit(1)
|
|
66
83
|
}
|
|
67
84
|
|
|
85
|
+
function runMcpAuditReceiptDemo(args) {
|
|
86
|
+
const receiptPath = selectedReceiptPath(args, bundledMcpAuditReceiptPath())
|
|
87
|
+
const receipt = readReceipt(receiptPath, 'MCP audit')
|
|
88
|
+
const result = validateMcpAuditReceipt(receipt)
|
|
89
|
+
|
|
90
|
+
if (Boolean(args.json)) {
|
|
91
|
+
console.log(JSON.stringify({
|
|
92
|
+
ok: result.errors.length === 0,
|
|
93
|
+
demo: MCP_AUDIT_RECEIPT_DEMO,
|
|
94
|
+
receipt: path.relative(process.cwd(), receiptPath) || receiptPath,
|
|
95
|
+
summary: result.summary,
|
|
96
|
+
warnings: result.warnings,
|
|
97
|
+
errors: result.errors,
|
|
98
|
+
}, null, 2))
|
|
99
|
+
} else {
|
|
100
|
+
console.log('🧪 Pluribus demo: MCP audit receipt')
|
|
101
|
+
console.log(` Receipt: ${path.relative(process.cwd(), receiptPath) || receiptPath}`)
|
|
102
|
+
console.log('')
|
|
103
|
+
|
|
104
|
+
if (result.errors.length === 0) {
|
|
105
|
+
console.log(`✅ MCP audit receipt ok: ${result.summary.toolCallCount} tool calls, ${result.summary.auditEventCount} audit events, ${result.summary.metricCount} metrics`)
|
|
106
|
+
for (const warning of result.warnings) console.log(` • ${warning}`)
|
|
107
|
+
console.log('')
|
|
108
|
+
console.log('Why this matters: production MCP needs audit events and low-cardinality metrics, not raw prompt/tool dumps. Prove who invoked which tool, under which scope, with redacted argument/result shape.')
|
|
109
|
+
console.log('Try your own receipt: pluribus demo mcp-audit-receipt --receipt path/to/mcp-audit-receipt.json')
|
|
110
|
+
} else {
|
|
111
|
+
console.error('❌ MCP audit receipt invalid:')
|
|
112
|
+
for (const error of result.errors) console.error(` • ${error}`)
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (result.errors.length > 0) process.exit(1)
|
|
117
|
+
}
|
|
118
|
+
|
|
68
119
|
function bundledSkillUseRateReceiptPath() {
|
|
69
120
|
return fileURLToPath(new URL('../../examples/skill-use-rate-receipts/skill-use-rate-receipt.json', import.meta.url))
|
|
70
121
|
}
|
|
71
122
|
|
|
123
|
+
function bundledMcpAuditReceiptPath() {
|
|
124
|
+
return fileURLToPath(new URL('../../examples/mcp-audit-receipts/mcp-audit-receipt.json', import.meta.url))
|
|
125
|
+
}
|
|
126
|
+
|
|
72
127
|
export function validateSkillUseRateReceipt(receipt) {
|
|
73
128
|
const errors = []
|
|
74
129
|
const warnings = []
|
|
@@ -153,3 +208,102 @@ export function validateSkillUseRateReceipt(receipt) {
|
|
|
153
208
|
},
|
|
154
209
|
}
|
|
155
210
|
}
|
|
211
|
+
|
|
212
|
+
export function validateMcpAuditReceipt(receipt) {
|
|
213
|
+
const errors = []
|
|
214
|
+
const warnings = []
|
|
215
|
+
|
|
216
|
+
function requireString(value, field) {
|
|
217
|
+
if (typeof value !== 'string' || value.trim() === '') {
|
|
218
|
+
errors.push(`${field} must be a non-empty string`)
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function requireArray(value, field) {
|
|
223
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
224
|
+
errors.push(`${field} must be a non-empty array`)
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function requireNonNegativeNumber(value, field) {
|
|
229
|
+
if (typeof value !== 'number' || Number.isNaN(value) || value < 0) {
|
|
230
|
+
errors.push(`${field} must be a non-negative number`)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (receipt.schema !== MCP_AUDIT_RECEIPT_SCHEMA) {
|
|
235
|
+
errors.push(`schema must be ${MCP_AUDIT_RECEIPT_SCHEMA}`)
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
requireString(receipt.run_id, 'run_id')
|
|
239
|
+
requireString(receipt.generated_at, 'generated_at')
|
|
240
|
+
requireString(receipt.server?.name, 'server.name')
|
|
241
|
+
requireString(receipt.server?.transport, 'server.transport')
|
|
242
|
+
requireString(receipt.client?.name, 'client.name')
|
|
243
|
+
requireString(receipt.audit_policy?.raw_arguments, 'audit_policy.raw_arguments')
|
|
244
|
+
requireString(receipt.audit_policy?.raw_results, 'audit_policy.raw_results')
|
|
245
|
+
requireString(receipt.audit_policy?.privacy_boundary, 'audit_policy.privacy_boundary')
|
|
246
|
+
requireArray(receipt.tool_calls, 'tool_calls')
|
|
247
|
+
requireArray(receipt.usage_metrics, 'usage_metrics')
|
|
248
|
+
|
|
249
|
+
if (receipt.audit_policy?.raw_arguments !== 'redacted_shape_only') {
|
|
250
|
+
errors.push('audit_policy.raw_arguments must be redacted_shape_only')
|
|
251
|
+
}
|
|
252
|
+
if (receipt.audit_policy?.raw_results !== 'redacted_shape_only') {
|
|
253
|
+
errors.push('audit_policy.raw_results must be redacted_shape_only')
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const lowCardinalityMetricLabels = new Set(['tool_name', 'status', 'token_scope', 'user_type'])
|
|
257
|
+
|
|
258
|
+
for (const [index, call] of (receipt.tool_calls || []).entries()) {
|
|
259
|
+
const prefix = `tool_calls[${index}]`
|
|
260
|
+
requireString(call.event, `${prefix}.event`)
|
|
261
|
+
requireString(call.request_id, `${prefix}.request_id`)
|
|
262
|
+
requireString(call.session_id, `${prefix}.session_id`)
|
|
263
|
+
requireString(call.user_id_hash, `${prefix}.user_id_hash`)
|
|
264
|
+
requireString(call.token_subject_hash, `${prefix}.token_subject_hash`)
|
|
265
|
+
requireArray(call.token_scopes, `${prefix}.token_scopes`)
|
|
266
|
+
requireString(call.tool_name, `${prefix}.tool_name`)
|
|
267
|
+
requireString(call.status, `${prefix}.status`)
|
|
268
|
+
requireNonNegativeNumber(call.duration_ms, `${prefix}.duration_ms`)
|
|
269
|
+
requireString(call.result_shape, `${prefix}.result_shape`)
|
|
270
|
+
|
|
271
|
+
if (call.event !== 'mcp.tool_call') errors.push(`${prefix}.event must be mcp.tool_call`)
|
|
272
|
+
if (!['ok', 'empty', 'error', 'timeout', 'denied'].includes(call.status)) {
|
|
273
|
+
errors.push(`${prefix}.status must be one of ok|empty|error|timeout|denied`)
|
|
274
|
+
}
|
|
275
|
+
if (!call.args_shape || typeof call.args_shape !== 'object' || Array.isArray(call.args_shape)) {
|
|
276
|
+
errors.push(`${prefix}.args_shape must be an object with redacted argument types/shapes`)
|
|
277
|
+
}
|
|
278
|
+
if (typeof call.args_preview === 'string' || typeof call.result_preview === 'string') {
|
|
279
|
+
errors.push(`${prefix} must not include raw args/results previews; use args_shape/result_shape instead`)
|
|
280
|
+
}
|
|
281
|
+
if (call.error_class != null && typeof call.error_class !== 'string') {
|
|
282
|
+
errors.push(`${prefix}.error_class must be string or null`)
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
for (const [index, metric] of (receipt.usage_metrics || []).entries()) {
|
|
287
|
+
const prefix = `usage_metrics[${index}]`
|
|
288
|
+
requireString(metric.name, `${prefix}.name`)
|
|
289
|
+
requireString(metric.type, `${prefix}.type`)
|
|
290
|
+
requireString(metric.value, `${prefix}.value`)
|
|
291
|
+
requireArray(metric.labels, `${prefix}.labels`)
|
|
292
|
+
|
|
293
|
+
for (const label of metric.labels || []) {
|
|
294
|
+
if (!lowCardinalityMetricLabels.has(label)) {
|
|
295
|
+
warnings.push(`${prefix}.labels includes high-cardinality label ${label}; prefer ${[...lowCardinalityMetricLabels].join(', ')}`)
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return {
|
|
301
|
+
errors,
|
|
302
|
+
warnings,
|
|
303
|
+
summary: {
|
|
304
|
+
toolCallCount: Array.isArray(receipt.tool_calls) ? receipt.tool_calls.length : 0,
|
|
305
|
+
auditEventCount: Array.isArray(receipt.tool_calls) ? receipt.tool_calls.length : 0,
|
|
306
|
+
metricCount: Array.isArray(receipt.usage_metrics) ? receipt.usage_metrics.length : 0,
|
|
307
|
+
},
|
|
308
|
+
}
|
|
309
|
+
}
|
package/src/utils/version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '0.3.
|
|
1
|
+
export const VERSION = '0.3.38'
|